You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2018/08/06 22:43:37 UTC
[1/2] orc git commit: ORC-203 - Update StringStatistics to trim long
strings to 1024 characters & record they were trimmed
Repository: orc
Updated Branches:
refs/heads/master df20c2139 -> fb815ffdb
ORC-203 - Update StringStatistics to trim long strings to 1024 characters & record they were trimmed
Fixes #292
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/cedd0f91
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/cedd0f91
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/cedd0f91
Branch: refs/heads/master
Commit: cedd0f913ad4d91348bb99e70b07d83854ca2719
Parents: df20c21
Author: Sandeep More <mo...@apache.org>
Authored: Wed Jul 18 09:31:02 2018 -0400
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Aug 6 14:32:48 2018 -0700
----------------------------------------------------------------------
.../org/apache/orc/StringColumnStatistics.java | 14 +
.../apache/orc/impl/ColumnStatisticsImpl.java | 284 +++++++++++++++++--
.../org/apache/orc/TestColumnStatistics.java | 138 +++++++--
proto/orc_proto.proto | 4 +
4 files changed, 405 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/cedd0f91/java/core/src/java/org/apache/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/StringColumnStatistics.java b/java/core/src/java/org/apache/orc/StringColumnStatistics.java
index 936b100..93d197a 100644
--- a/java/core/src/java/org/apache/orc/StringColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/StringColumnStatistics.java
@@ -34,6 +34,20 @@ public interface StringColumnStatistics extends ColumnStatistics {
String getMaximum();
/**
+ * Get the string with
+ * length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMinimum())
+ * @return lower bound
+ */
+ String getLowerBound();
+
+ /**
+ * Get the string with
+ * length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMaximum())
+ * @return upper bound
+ */
+ String getUpperBound();
+
+ /**
* Get the total length of all strings
* @return the sum (total length)
*/
http://git-wip-us.apache.org/repos/asf/orc/blob/cedd0f91/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index be05d80..0514839 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -17,13 +17,9 @@
*/
package org.apache.orc.impl;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.TimeZone;
-
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
@@ -39,6 +35,14 @@ import org.apache.orc.StringColumnStatistics;
import org.apache.orc.TimestampColumnStatistics;
import org.apache.orc.TypeDescription;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.TimeZone;
+
public class ColumnStatisticsImpl implements ColumnStatistics {
@Override
@@ -517,10 +521,14 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
implements StringColumnStatistics {
+ public static final int MAX_BYTES_RECORDED = 1024;
private Text minimum = null;
private Text maximum = null;
private long sum = 0;
+ private boolean isLowerBoundSet = false;
+ private boolean isUpperBoundSet = false;
+
StringStatisticsImpl() {
}
@@ -543,35 +551,73 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
super.reset();
minimum = null;
maximum = null;
+ isLowerBoundSet = false;
+ isUpperBoundSet = false;
sum = 0;
}
@Override
public void updateString(Text value) {
if (minimum == null) {
- maximum = minimum = new Text(value);
+ if(value.getLength() > MAX_BYTES_RECORDED) {
+ minimum = truncateLowerBound(value);
+ maximum = truncateUpperBound(value);
+ isLowerBoundSet = true;
+ isUpperBoundSet = true;
+ } else {
+ maximum = minimum = new Text(value);
+ }
} else if (minimum.compareTo(value) > 0) {
- minimum = new Text(value);
+ if(value.getLength() > MAX_BYTES_RECORDED) {
+ minimum = truncateLowerBound(value);
+ isLowerBoundSet = true;
+ }else {
+ minimum = new Text(value);
+ }
} else if (maximum.compareTo(value) < 0) {
- maximum = new Text(value);
+ if(value.getLength() > MAX_BYTES_RECORDED) {
+ maximum = truncateUpperBound(value);
+ isUpperBoundSet = true;
+ } else {
+ maximum = new Text(value);
+ }
}
sum += value.getLength();
}
+
@Override
public void updateString(byte[] bytes, int offset, int length,
int repetitions) {
+ byte[] input = Arrays.copyOfRange(bytes, offset, offset+(length));
if (minimum == null) {
- maximum = minimum = new Text();
- maximum.set(bytes, offset, length);
+ if(length > MAX_BYTES_RECORDED) {
+ minimum = truncateLowerBound(input);
+ maximum = truncateUpperBound(input);
+ isLowerBoundSet = true;
+ isUpperBoundSet = true;
+ } else {
+ maximum = minimum = new Text();
+ maximum.set(bytes, offset, length);
+ }
} else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
minimum.getLength(), bytes, offset, length) > 0) {
- minimum = new Text();
- minimum.set(bytes, offset, length);
+ if(length > MAX_BYTES_RECORDED) {
+ minimum = truncateLowerBound(input);
+ isLowerBoundSet = true;
+ } else {
+ minimum = new Text();
+ minimum.set(bytes, offset, length);
+ }
} else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
maximum.getLength(), bytes, offset, length) < 0) {
- maximum = new Text();
- maximum.set(bytes, offset, length);
+ if(length > MAX_BYTES_RECORDED) {
+ maximum = truncateUpperBound(input);
+ isUpperBoundSet = true;
+ } else {
+ maximum = new Text();
+ maximum.set(bytes, offset, length);
+ }
}
sum += (long)length * repetitions;
}
@@ -584,16 +630,40 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
if (str.minimum != null) {
maximum = new Text(str.getMaximum());
minimum = new Text(str.getMinimum());
- } else {
+ }
+ /* str.minimum == null when lower bound set */
+ else if (str.isLowerBoundSet) {
+ minimum = new Text(str.getLowerBound());
+ isLowerBoundSet = true;
+
+ /* check for upper bound before setting max */
+ if (str.isUpperBoundSet) {
+ maximum = new Text(str.getUpperBound());
+ isUpperBoundSet = true;
+ } else {
+ maximum = new Text(str.getMaximum());
+ }
+ }
+ else {
/* both are empty */
maximum = minimum = null;
}
} else if (str.minimum != null) {
if (minimum.compareTo(str.minimum) > 0) {
- minimum = new Text(str.getMinimum());
+ if(str.isLowerBoundSet) {
+ minimum = new Text(str.getLowerBound());
+ isLowerBoundSet = true;
+ } else {
+ minimum = new Text(str.getMinimum());
+ }
}
if (maximum.compareTo(str.maximum) < 0) {
- maximum = new Text(str.getMaximum());
+ if(str.isUpperBoundSet) {
+ maximum = new Text(str.getUpperBound());
+ isUpperBoundSet = true;
+ }else {
+ maximum = new Text(str.getMaximum());
+ }
}
}
sum += str.sum;
@@ -621,11 +691,45 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
@Override
public String getMinimum() {
- return minimum == null ? null : minimum.toString();
+ /* if we have lower bound set (in case of truncation)
+ getMinimum will be null */
+ if(isLowerBoundSet) {
+ return null;
+ } else {
+ return minimum == null ? null : minimum.toString();
+ }
}
@Override
public String getMaximum() {
+ /* if we have upper bound is set (in case of truncation)
+ getMaximum will be null */
+ if(isUpperBoundSet) {
+ return null;
+ } else {
+ return maximum == null ? null : maximum.toString();
+ }
+ }
+
+ /**
+ * Get the string with
+ * length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMinimum())
+ *
+ * @return lower bound
+ */
+ @Override
+ public String getLowerBound() {
+ return minimum == null ? null : minimum.toString();
+ }
+
+ /**
+ * Get the string with
+ * length = Min(StringStatisticsImpl.MAX_BYTES_RECORDED, getMaximum())
+ *
+ * @return upper bound
+ */
+ @Override
+ public String getUpperBound() {
return maximum == null ? null : maximum.toString();
}
@@ -683,6 +787,150 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
result = 31 * result + (int) (sum ^ (sum >>> 32));
return result;
}
+
+ /**
+ * A helper function that truncates the {@link Text} input
+ * based on {@link #MAX_BYTES_RECORDED} and increments
+ * the last codepoint by 1.
+ * @param text
+ * @return truncated Text value
+ */
+ private static Text truncateUpperBound(final Text text) {
+
+ if(text.getBytes().length > MAX_BYTES_RECORDED) {
+ return truncateUpperBound(text.getBytes());
+ } else {
+ return text;
+ }
+
+ }
+
+ /**
+ * A helper function that truncates the {@link byte[]} input
+ * based on {@link #MAX_BYTES_RECORDED} and increments
+ * the last codepoint by 1.
+ * @param text
+ * @return truncated Text value
+ */
+ private static Text truncateUpperBound(final byte[] text) {
+ if(text.length > MAX_BYTES_RECORDED) {
+ final Text truncated = truncateLowerBound(text);
+ final byte[] data = truncated.getBytes();
+
+ int lastCharPosition = data.length - 1;
+ int offset = 0;
+
+ /* we don't expect characters more than 5 bytes */
+ for (int i = 0; i < 5; i++) {
+ final byte b = data[lastCharPosition];
+ offset = getCharLength(b);
+
+ /* found beginning of a valid char */
+ if (offset > 0) {
+ final byte[] lastCharBytes = Arrays
+ .copyOfRange(text, lastCharPosition, lastCharPosition + offset);
+ /* last character */
+ final String s = new String(lastCharBytes, Charset.forName("UTF-8"));
+
+ /* increment the codepoint of last character */
+ int codePoint = s.codePointAt(s.length() - 1);
+ codePoint++;
+ final char[] incrementedChars = Character.toChars(codePoint);
+
+ /* convert char array to byte array */
+ final CharBuffer charBuffer = CharBuffer.wrap(incrementedChars);
+ final ByteBuffer byteBuffer = Charset.forName("UTF-8").encode(charBuffer);
+ final byte[] bytes = Arrays.copyOfRange(byteBuffer.array(), byteBuffer.position(),
+ byteBuffer.limit());
+
+ final byte[] result = new byte[lastCharPosition + bytes.length];
+
+ /* copy truncated array minus last char */
+ System.arraycopy(text, 0, result, 0, lastCharPosition);
+ /* copy last char */
+ System.arraycopy(bytes, 0, result, lastCharPosition, bytes.length);
+
+ return new Text(result);
+
+ } /* not found keep looking for a beginning byte */ else {
+ --lastCharPosition;
+ }
+
+ }
+ /* beginning of a valid char not found */
+ throw new IllegalArgumentException(
+ "Could not truncate string, beginning of a valid char not found");
+ } else {
+ return new Text(text);
+ }
+ }
+
+ private static Text truncateLowerBound(final Text text) {
+ if(text.getBytes().length > MAX_BYTES_RECORDED) {
+ return truncateLowerBound(text.getBytes());
+ } else {
+ return text;
+ }
+ }
+
+
+ private static Text truncateLowerBound(final byte[] text) {
+
+ if(text.length > MAX_BYTES_RECORDED) {
+
+ int truncateLen = MAX_BYTES_RECORDED;
+ int offset = 0;
+
+ for(int i=0; i<5; i++) {
+
+ byte b = text[truncateLen];
+ /* check for the beginning of 1,2,3,4,5 bytes long char */
+ offset = getCharLength(b);
+
+ /* found beginning of a valid char */
+ if(offset > 0) {
+ byte[] truncated = Arrays.copyOfRange(text, 0, (truncateLen));
+ return new Text(truncated);
+ } else {
+ /* beginning of a valid char not found decrease the
+ length of array by 1 and loop */
+ --truncateLen;
+ }
+
+ }
+ /* beginning of a valid char not found */
+ throw new IllegalArgumentException("Could not truncate string, beginning of a valid char not found");
+
+ } else {
+ return new Text(text);
+ }
+
+ }
+
+ /**
+ * A helper function that returns the length of the UTF-8 character
+ * IF the given byte is beginning of a valid char.
+ * In case it is a beginning byte, a value greater than 0
+ * is returned (length of character in bytes).
+ * Else 0 is returned
+ * @param b
+ * @return 0 if not beginning of char else length of char in bytes
+ */
+ private static int getCharLength(byte b) {
+ int len = 0;
+ if((b & 0b10000000) == 0b00000000 ) {
+ len = 1;
+ } else if ((b & 0b11100000) == 0b11000000 ) {
+ len = 2;
+ } else if ((b & 0b11110000) == 0b11100000 ) {
+ len = 3;
+ } else if ((b & 0b11111000) == 0b11110000 ) {
+ len = 4;
+ } else if ((b & 0b11111100) == 0b11111000 ) {
+ len = 5;
+ }
+ return len;
+ }
}
protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
http://git-wip-us.apache.org/repos/asf/orc/blob/cedd0f91/java/core/src/test/org/apache/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index 2045004..eae53fc 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -18,27 +18,12 @@
package org.apache.orc;
-import static junit.framework.Assert.assertEquals;
-import static org.junit.Assume.assumeTrue;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
-import java.sql.Timestamp;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.List;
-import java.util.TimeZone;
-
+import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.io.Text;
import org.apache.orc.impl.ColumnStatisticsImpl;
import org.junit.Before;
@@ -46,6 +31,16 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
+import java.io.File;
+import java.sql.Timestamp;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.TimeZone;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertTrue;
+
/**
* Test ColumnStatisticsImpl for ORC.
*/
@@ -122,6 +117,115 @@ public class TestColumnStatistics {
}
@Test
+ public void testUpperAndLowerBounds() throws Exception {
+ final TypeDescription schema = TypeDescription.createString();
+
+ final String test = RandomStringUtils.random(1024+10);
+ final String fragment = "foo"+test;
+ final String fragmentLowerBound = "bar"+test;
+
+
+ final ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ final ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+
+ /* test a scenario for the first max string */
+ stats1.updateString(new Text(test));
+
+ final StringColumnStatistics typed = (StringColumnStatistics) stats1;
+ final StringColumnStatistics typed2 = (StringColumnStatistics) stats2;
+
+ assertTrue("Upperbound cannot be more than 1024 bytes",1024 >= typed.getUpperBound().getBytes().length);
+ assertTrue("Lowerbound cannot be more than 1024 bytes",1024 >= typed.getLowerBound().getBytes().length);
+
+ assertEquals(null, typed.getMinimum());
+ assertEquals(null, typed.getMaximum());
+
+ stats1.reset();
+
+ /* test a scenario for the first max bytes */
+ stats1.updateString(test.getBytes(), 0, test.getBytes().length, 0);
+
+ assertTrue("Lowerbound cannot be more than 1024 bytes", 1024 >= typed.getLowerBound().getBytes().length);
+ assertTrue("Upperbound cannot be more than 1024 bytes", 1024 >= typed.getUpperBound().getBytes().length);
+
+ assertEquals(null, typed.getMinimum());
+ assertEquals(null, typed.getMaximum());
+
+ stats1.reset();
+ /* test upper bound - merging */
+ stats1.updateString(new Text("bob"));
+ stats1.updateString(new Text("david"));
+ stats1.updateString(new Text("charles"));
+
+ stats2.updateString(new Text("anne"));
+ stats2.updateString(new Text(fragment));
+
+ assertEquals("anne", typed2.getMinimum());
+ assertEquals(null, typed2.getMaximum());
+
+ stats1.merge(stats2);
+
+ assertEquals("anne", typed.getMinimum());
+ assertEquals(null, typed.getMaximum());
+
+
+ /* test lower bound - merging */
+ stats1.reset();
+ stats2.reset();
+
+ stats1.updateString(new Text("david"));
+ stats1.updateString(new Text("charles"));
+
+ stats2.updateString(new Text("jane"));
+ stats2.updateString(new Text(fragmentLowerBound));
+
+ stats1.merge(stats2);
+
+ assertEquals(null, typed.getMinimum());
+ assertEquals("jane", typed.getMaximum());
+ }
+
+ @Test
+ public void testUpperBoundCodepointIncrement() {
+ /* test with characters that use more than one byte */
+ final String fragment = "載記応存環敢辞月発併際岩。外現抱疑曲旧持九柏先済索。"
+ + "富扁件戒程少交文相修宮由改価苦。位季供幾日本求知集機所江取号均下犯変第勝。"
+ + "管今文図石職常暮海営感覧果賞挙。難加判郵年太願会周面市害成産。"
+ + "内分載函取片領披見復来車必教。元力理関未法会伊団万球幕点帳幅為都話間。"
+ + "親禁感栗合開注読月島月紀間卒派伏闘。幕経阿刊間都紹知禁追半業。"
+ + "根案協話射格治位相機遇券外野何。話第勝平当降負京複掲書変痛。"
+ + "博年群辺軽妻止和真権暑着要質在破応。"
+ + "नीचे मुक्त बिन्दुओ समस्याओ आंतरकार्यक्षमता सुना प्रति सभीकुछ यायेका दिनांक वातावरण ";
+
+ final String input = fragment
+ + "मुश्किले केन्द्रिय "
+ + "लगती नवंबर प्रमान गयेगया समस्याओ विश्व लिये समजते आपके एकत्रित विकेन्द्रित स्वतंत्र "
+ + "व्याख्यान भेदनक्षमता शीघ्र होभर मुखय करता। दर्शाता वातावरण विस्तरणक्षमता दोषसके प्राप्त समाजो "
+ + "।क तकनीकी दर्शाता कार्यकर्ता बाधा औषधिक समस्याओ समस्याए गोपनीयता प्राण पसंद "
+ + "भीयह नवंबर दोषसके अनुवादक सोफ़तवेर समस्याए क्षमता। कार्य होभर\n";
+
+ final String lowerBound = fragment +
+ "मुश्किले केन्द्रिय लगती नवंबर प्रमान गयेगया समस्याओ विश्व लिये ";
+
+ final String upperbound = fragment +
+ "मुश्किले केन्द्रिय लगती नवंबर प्रमान गयेगया समस्याओ विश्व लिये!";
+
+ final TypeDescription schema = TypeDescription.createString();
+ final ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+
+ stats1.updateString(input.getBytes(), 0, input.getBytes().length, 1);
+
+ final StringColumnStatistics typed = (StringColumnStatistics) stats1;
+
+ assertEquals(1022, typed.getUpperBound().getBytes().length);
+ assertEquals(1022, typed.getLowerBound().getBytes().length);
+
+ assertEquals(upperbound, typed.getUpperBound());
+ assertEquals(lowerBound, typed.getLowerBound());
+ }
+
+
+ @Test
public void testDateMerge() throws Exception {
TypeDescription schema = TypeDescription.createDate();
http://git-wip-us.apache.org/repos/asf/orc/blob/cedd0f91/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index f92e531..e54427d 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -39,6 +39,10 @@ message StringStatistics {
optional string maximum = 2;
// sum will store the total length of all strings in a stripe
optional sint64 sum = 3;
+ // If the minimum or maximum value was longer than 1024 bytes, store a lower or upper
+ // bound instead of the minimum or maximum values above.
+ optional string lowerBound = 4;
+ optional string upperBound = 5;
}
message BucketStatistics {
[2/2] orc git commit: ORC-392. Download page shouldn't link to
dist.apache.org for keys.
Posted by om...@apache.org.
ORC-392. Download page shouldn't link to dist.apache.org for keys.
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/fb815ffd
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/fb815ffd
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/fb815ffd
Branch: refs/heads/master
Commit: fb815ffdb7fdec1960dd36d3dbd412c26e99e2da
Parents: cedd0f9
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Aug 6 15:42:58 2018 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Aug 6 15:43:26 2018 -0700
----------------------------------------------------------------------
site/_config.yml | 2 +-
site/develop/index.md | 2 +-
site/develop/make-release.md | 6 +++++-
3 files changed, 7 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/fb815ffd/site/_config.yml
----------------------------------------------------------------------
diff --git a/site/_config.yml b/site/_config.yml
index 0cc3a20..b091925 100644
--- a/site/_config.yml
+++ b/site/_config.yml
@@ -5,7 +5,7 @@ excerpt_separator: ""
repository: https://github.com/apache/orc
jira: https://issues.apache.org/jira/browse
-dist: https://dist.apache.org/repos/dist/release/orc
+dist: https://www.apache.org/dist/orc
dist_mirror: https://www.apache.org/dyn/closer.cgi/orc
tag_url: https://github.com/apache/orc/releases/tag/rel
dist_archive: https://archive.apache.org/dist/orc
http://git-wip-us.apache.org/repos/asf/orc/blob/fb815ffd/site/develop/index.md
----------------------------------------------------------------------
diff --git a/site/develop/index.md b/site/develop/index.md
index be0a80d..01d495a 100644
--- a/site/develop/index.md
+++ b/site/develop/index.md
@@ -186,7 +186,7 @@ Apache expects the projects to manage their current release artifact
distribution using subversion. It should be limited to the latest
release in each of the active release branches.
-The ORC dist directory is managed via
+The ORC dist directory is managed via svn in
[https://dist.apache.org/repos/dist/release/orc](https://dist.apache.org/repos/dist/release/orc).
The release artifacts are pushed to many mirrors. Files in the dist
directory are available forever via the [Apache dist
http://git-wip-us.apache.org/repos/asf/orc/blob/fb815ffd/site/develop/make-release.md
----------------------------------------------------------------------
diff --git a/site/develop/make-release.md b/site/develop/make-release.md
index 04f7756..ff24fa9 100644
--- a/site/develop/make-release.md
+++ b/site/develop/make-release.md
@@ -45,7 +45,11 @@ sftp> put orc-X.Y.Zrc0*
sftp> quit
~~~
-Make sure your GPG key exists [here](https://dist.apache.org/repos/dist/release/orc/KEYS) for others to verify signature in RC
+Make sure your GPG key is present in [Apache
+LDAP](https://id.apache.org) and the ORC [svn dist
+area](https://dist.apache.org/repos/dist/release/orc/KEYS). That will
+be necessary for others to verify the signatures on the release
+canidate.
Click the version to release (X.Y.Z) [here](https://issues.apache.org/jira/projects/ORC?selectedItem=com.atlassian.jira.jira-projects-plugin:release-page)
to get the list of jiras that are fixed in X.Y.Z