You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cd...@apache.org on 2008/06/11 02:56:25 UTC
svn commit: r666406 - in /hadoop/core/branches/branch-0.18: ./ conf/
src/core/org/apache/hadoop/util/ src/mapred/org/apache/hadoop/mapred/
src/test/org/apache/hadoop/util/
Author: cdouglas
Date: Tue Jun 10 17:56:25 2008
New Revision: 666406
URL: http://svn.apache.org/viewvc?rev=666406&view=rev
Log:
HADOOP-3442. Limit recursion depth on the stack for QuickSort to prevent
StackOverflowErrors. To avoid O(n*n) cases, when partitioning depth exceeds a
multiple of log(n), change to HeapSort.
Added:
hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/HeapSort.java
Modified:
hadoop/core/branches/branch-0.18/CHANGES.txt
hadoop/core/branches/branch-0.18/conf/hadoop-default.xml
hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/IndexedSorter.java
hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/QuickSort.java
hadoop/core/branches/branch-0.18/src/mapred/org/apache/hadoop/mapred/MapTask.java
hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/util/TestIndexedSort.java
Modified: hadoop/core/branches/branch-0.18/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/CHANGES.txt?rev=666406&r1=666405&r2=666406&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.18/CHANGES.txt Tue Jun 10 17:56:25 2008
@@ -555,6 +555,10 @@
HADOOP-3528. Metrics FilesCreated and files_deleted metrics
do not match. (Lohit via Mahadev)
+ HADOOP-3442. Limit recursion depth on the stack for QuickSort to prevent
+ StackOverflowErrors. To avoid O(n*n) cases, when partitioning depth exceeds
+ a multiple of log(n), change to HeapSort. (cdouglas)
+
Release 0.17.0 - 2008-05-18
INCOMPATIBLE CHANGES
Modified: hadoop/core/branches/branch-0.18/conf/hadoop-default.xml
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/conf/hadoop-default.xml?rev=666406&r1=666405&r2=666406&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/conf/hadoop-default.xml (original)
+++ hadoop/core/branches/branch-0.18/conf/hadoop-default.xml Tue Jun 10 17:56:25 2008
@@ -966,7 +966,7 @@
<property>
<name>map.sort.class</name>
- <value>org.apache.hadoop.mapred.MergeSorter</value>
+ <value>org.apache.hadoop.util.QuickSort</value>
<description>The default sort class for sorting keys.
</description>
</property>
Added: hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/HeapSort.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/HeapSort.java?rev=666406&view=auto
==============================================================================
--- hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/HeapSort.java (added)
+++ hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/HeapSort.java Tue Jun 10 17:56:25 2008
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+/**
+ * An implementation of the core algorithm of HeapSort.
+ */
+public final class HeapSort implements IndexedSorter {
+
+ public HeapSort() { }
+
+ private static void downHeap(final IndexedSortable s, final int b,
+ int i, final int N) {
+ for (int idx = i << 1; idx < N; idx = i << 1) {
+ if (idx + 1 < N && s.compare(b + idx, b + idx + 1) < 0) {
+ if (s.compare(b + i, b + idx + 1) < 0) {
+ s.swap(b + i, b + idx + 1);
+ } else return;
+ i = idx + 1;
+ } else if (s.compare(b + i, b + idx) < 0) {
+ s.swap(b + i, b + idx);
+ i = idx;
+ } else return;
+ }
+ }
+
+ /**
+ * Sort the given range of items using heap sort.
+ * {@inheritDoc}
+ */
+ public void sort(IndexedSortable s, int p, int r) {
+ sort(s, p, r, null);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void sort(final IndexedSortable s, final int p, final int r,
+ final Progressable rep) {
+ final int N = r - p;
+ // build heap w/ reverse comparator, then write in-place from end
+ final int t = Integer.highestOneBit(N);
+ for (int i = t; i > 1; i >>>= 1) {
+ for (int j = i >>> 1; j < i; ++j) {
+ downHeap(s, p-1, j, N + 1);
+ }
+ if (null != rep) {
+ rep.progress();
+ }
+ }
+ for (int i = r - 1; i > p; --i) {
+ s.swap(p, i);
+ downHeap(s, p - 1, 1, i - p + 1);
+ }
+ }
+}
Modified: hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/IndexedSorter.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/IndexedSorter.java?rev=666406&r1=666405&r2=666406&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/IndexedSorter.java (original)
+++ hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/IndexedSorter.java Tue Jun 10 17:56:25 2008
@@ -35,4 +35,12 @@
* @see IndexedSortable#swap
*/
void sort(IndexedSortable s, int l, int r);
+
+ /**
+ * Same as {@link #sort(IndexedSortable,int,int)}, but indicate progress
+ * periodically.
+ * @see #sort(IndexedSortable,int,int)
+ */
+ void sort(IndexedSortable s, int l, int r, Progressable rep);
+
}
Modified: hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/QuickSort.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/QuickSort.java?rev=666406&r1=666405&r2=666406&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/QuickSort.java (original)
+++ hadoop/core/branches/branch-0.18/src/core/org/apache/hadoop/util/QuickSort.java Tue Jun 10 17:56:25 2008
@@ -19,41 +19,66 @@
/**
* An implementation of the core algorithm of QuickSort.
- * See "Median-of-Three Partitioning" in Sedgewick book.
*/
public final class QuickSort implements IndexedSorter {
+ private static final IndexedSorter alt = new HeapSort();
+
public QuickSort() { }
- private void fix(IndexedSortable s, int p, int r) {
+ private static void fix(IndexedSortable s, int p, int r) {
if (s.compare(p, r) > 0) {
s.swap(p, r);
}
}
+ /**
+ * Deepest recursion before giving up and doing a heapsort.
+ * Returns 2 * ceil(log(n)).
+ */
+ protected static int getMaxDepth(int x) {
+ if (x <= 0)
+ throw new IllegalArgumentException("Undefined for " + x);
+ return (32 - Integer.numberOfLeadingZeros(x - 1)) << 2;
+ }
+
+ /**
+ * Sort the given range of items using quick sort.
+ * {@inheritDoc} If the recursion depth falls below {@link #getMaxDepth},
+ * then switch to {@link HeapSort}.
+ */
public void sort(IndexedSortable s, int p, int r) {
sort(s, p, r, null);
}
/**
- * Same as {@link #sort}, but indicate that we're making progress after
- * each partition.
+ * {@inheritDoc}
*/
- public void sort(final IndexedSortable s, final int p, final int r,
+ public void sort(final IndexedSortable s, int p, int r,
final Progressable rep) {
+ sortInternal(s, p, r, rep, getMaxDepth(r - p));
+ }
+
+ private static void sortInternal(final IndexedSortable s, int p, int r,
+ final Progressable rep, int depth) {
if (null != rep) {
rep.progress();
}
+ while (true) {
if (r-p < 13) {
for (int i = p; i < r; ++i) {
- for (int j = i; j > p; --j) {
- if (s.compare(j-1, j) > 0) {
- s.swap(j, j-1);
- }
+ for (int j = i; j > p && s.compare(j-1, j) > 0; --j) {
+ s.swap(j, j-1);
}
}
return;
}
+ if (--depth < 0) {
+ // give up
+ System.out.print("H");
+ alt.sort(s, p, r, rep);
+ return;
+ }
// select, move pivot into first position
fix(s, (p+r) >>> 1, p);
@@ -95,11 +120,12 @@
// Recurse on smaller interval first to keep stack shallow
assert i != j;
if (i - p < r - j) {
- sort(s, p, i, rep);
- sort(s, j, r, rep);
+ sortInternal(s, p, i, rep, depth);
+ p = j;
} else {
- sort(s, j, r, rep);
- sort(s, p, i, rep);
+ sortInternal(s, j, r, rep, depth);
+ r = i;
+ }
}
}
Modified: hadoop/core/branches/branch-0.18/src/mapred/org/apache/hadoop/mapred/MapTask.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/mapred/org/apache/hadoop/mapred/MapTask.java?rev=666406&r1=666405&r2=666406&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/mapred/org/apache/hadoop/mapred/MapTask.java (original)
+++ hadoop/core/branches/branch-0.18/src/mapred/org/apache/hadoop/mapred/MapTask.java Tue Jun 10 17:56:25 2008
@@ -51,6 +51,7 @@
import org.apache.hadoop.mapred.IFile.Reader;
import org.apache.hadoop.mapred.Merger.Segment;
import org.apache.hadoop.util.IndexedSortable;
+import org.apache.hadoop.util.IndexedSorter;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.util.QuickSort;
import org.apache.hadoop.util.ReflectionUtils;
@@ -323,8 +324,8 @@
private final int softRecordLimit;
private final int softBufferLimit;
private final int minSpillsForCombine;
+ private final IndexedSorter sorter;
private final Object spillLock = new Object();
- private final QuickSort sorter = new QuickSort();
private final BlockingBuffer bb = new BlockingBuffer();
private final FileSystem localFs;
@@ -356,6 +357,9 @@
if ((sortmb & 0x7FF) != sortmb) {
throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
}
+ sorter = (IndexedSorter)
+ ReflectionUtils.newInstance(
+ job.getClass("map.sort.class", QuickSort.class), job);
LOG.info("io.sort.mb = " + sortmb);
// buffers and accounting
int maxMemUsage = sortmb << 20;
Modified: hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/util/TestIndexedSort.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/util/TestIndexedSort.java?rev=666406&r1=666405&r2=666406&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/util/TestIndexedSort.java (original)
+++ hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/util/TestIndexedSort.java Tue Jun 10 17:56:25 2008
@@ -31,6 +31,146 @@
public class TestIndexedSort extends TestCase {
+ public void sortAllEqual(IndexedSorter sorter) throws Exception {
+ final int SAMPLE = 500;
+ int[] values = new int[SAMPLE];
+ Arrays.fill(values, 10);
+ SampleSortable s = new SampleSortable(values);
+ sorter.sort(s, 0, SAMPLE);
+ int[] check = s.getSorted();
+ assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+ Arrays.toString(check), Arrays.equals(values, check));
+ // Set random min/max, re-sort.
+ Random r = new Random();
+ int min = r.nextInt(SAMPLE);
+ int max = (min + 1 + r.nextInt(SAMPLE - 2)) % SAMPLE;
+ values[min] = 9;
+ values[max] = 11;
+ System.out.println("testAllEqual setting min/max at " + min + "/" + max +
+ "(" + sorter.getClass().getName() + ")");
+ s = new SampleSortable(values);
+ sorter.sort(s, 0, SAMPLE);
+ check = s.getSorted();
+ Arrays.sort(values);
+ assertTrue(check[0] == 9);
+ assertTrue(check[SAMPLE - 1] == 11);
+ assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+ Arrays.toString(check), Arrays.equals(values, check));
+ }
+
+ public void sortSorted(IndexedSorter sorter) throws Exception {
+ final int SAMPLE = 500;
+ int[] values = new int[SAMPLE];
+ Random r = new Random();
+ long seed = r.nextLong();
+ r.setSeed(seed);
+ System.out.println("testSorted seed: " + seed +
+ "(" + sorter.getClass().getName() + ")");
+ for (int i = 0; i < SAMPLE; ++i) {
+ values[i] = r.nextInt(100);
+ }
+ Arrays.sort(values);
+ SampleSortable s = new SampleSortable(values);
+ sorter.sort(s, 0, SAMPLE);
+ int[] check = s.getSorted();
+ assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+ Arrays.toString(check), Arrays.equals(values, check));
+ }
+
+ public void sortSequential(IndexedSorter sorter) throws Exception {
+ final int SAMPLE = 500;
+ int[] values = new int[SAMPLE];
+ for (int i = 0; i < SAMPLE; ++i) {
+ values[i] = i;
+ }
+ SampleSortable s = new SampleSortable(values);
+ sorter.sort(s, 0, SAMPLE);
+ int[] check = s.getSorted();
+ assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+ Arrays.toString(check), Arrays.equals(values, check));
+ }
+
+ public void sortSingleRecord(IndexedSorter sorter) throws Exception {
+ final int SAMPLE = 1;
+ SampleSortable s = new SampleSortable(SAMPLE);
+ int[] values = s.getValues();
+ sorter.sort(s, 0, SAMPLE);
+ int[] check = s.getSorted();
+ assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+ Arrays.toString(check), Arrays.equals(values, check));
+ }
+
+ public void sortRandom(IndexedSorter sorter) throws Exception {
+ final int SAMPLE = 256 * 1024;
+ SampleSortable s = new SampleSortable(SAMPLE);
+ long seed = s.getSeed();
+ System.out.println("sortRandom seed: " + seed +
+ "(" + sorter.getClass().getName() + ")");
+ int[] values = s.getValues();
+ Arrays.sort(values);
+ sorter.sort(s, 0, SAMPLE);
+ int[] check = s.getSorted();
+ assertTrue("seed: " + seed + "\ndoesn't match\n",
+ Arrays.equals(values, check));
+ }
+
+ public void sortWritable(IndexedSorter sorter) throws Exception {
+ final int SAMPLE = 1000;
+ WritableSortable s = new WritableSortable(SAMPLE);
+ long seed = s.getSeed();
+ System.out.println("sortWritable seed: " + seed +
+ "(" + sorter.getClass().getName() + ")");
+ String[] values = s.getValues();
+ Arrays.sort(values);
+ sorter.sort(s, 0, SAMPLE);
+ String[] check = s.getSorted();
+ assertTrue("seed: " + seed + "\ndoesn't match",
+ Arrays.equals(values, check));
+ }
+
+
+ public void testQuickSort() throws Exception {
+ QuickSort sorter = new QuickSort();
+ sortRandom(sorter);
+ sortSingleRecord(sorter);
+ sortSequential(sorter);
+ sortSorted(sorter);
+ sortAllEqual(sorter);
+ sortWritable(sorter);
+
+ // test degenerate case for median-of-three partitioning
+ // a_n, a_1, a_2, ..., a_{n-1}
+ final int DSAMPLE = 500;
+ int[] values = new int[DSAMPLE];
+ for (int i = 0; i < DSAMPLE; ++i) { values[i] = i; }
+ values[0] = values[DSAMPLE - 1] + 1;
+ SampleSortable s = new SampleSortable(values);
+ values = s.getValues();
+ final int DSS = (DSAMPLE / 2) * (DSAMPLE / 2);
+ // Worst case is (N/2)^2 comparisons, not including those effecting
+ // the median-of-three partitioning; impl should handle this case
+ MeasuredSortable m = new MeasuredSortable(s, DSS);
+ sorter.sort(m, 0, DSAMPLE);
+ System.out.println("QuickSort degen cmp/swp: " +
+ m.getCmp() + "/" + m.getSwp() +
+ "(" + sorter.getClass().getName() + ")");
+ Arrays.sort(values);
+ int[] check = s.getSorted();
+ assertTrue(Arrays.equals(values, check));
+ }
+
+ public void testHeapSort() throws Exception {
+ HeapSort sorter = new HeapSort();
+ sortRandom(sorter);
+ sortSingleRecord(sorter);
+ sortSequential(sorter);
+ sortSorted(sorter);
+ sortAllEqual(sorter);
+ sortWritable(sorter);
+ }
+
+ // Sortables //
+
private static class SampleSortable implements IndexedSortable {
private int[] valindex;
private int[] valindirect;
@@ -96,6 +236,45 @@
}
+ public static class MeasuredSortable implements IndexedSortable {
+
+ private int comparisions;
+ private int swaps;
+ private final int maxcmp;
+ private final int maxswp;
+ private IndexedSortable s;
+
+ public MeasuredSortable(IndexedSortable s) {
+ this(s, Integer.MAX_VALUE);
+ }
+
+ public MeasuredSortable(IndexedSortable s, int maxcmp) {
+ this(s, maxcmp, Integer.MAX_VALUE);
+ }
+
+ public MeasuredSortable(IndexedSortable s, int maxcmp, int maxswp) {
+ this.s = s;
+ this.maxcmp = maxcmp;
+ this.maxswp = maxswp;
+ }
+
+ public int getCmp() { return comparisions; }
+ public int getSwp() { return swaps; }
+
+ public int compare(int i, int j) {
+ assertTrue("Expected fewer than " + maxcmp + " comparisons",
+ ++comparisions < maxcmp);
+ return s.compare(i, j);
+ }
+
+ public void swap(int i, int j) {
+ assertTrue("Expected fewer than " + maxswp + " swaps",
+ ++swaps < maxswp);
+ s.swap(i, j);
+ }
+
+ }
+
private static class WritableSortable implements IndexedSortable {
private static Random r = new Random();
@@ -179,101 +358,4 @@
}
- public void testAllEqual() throws Exception {
- final int SAMPLE = 500;
- int[] values = new int[SAMPLE];
- Arrays.fill(values, 10);
- SampleSortable s = new SampleSortable(values);
- IndexedSorter sorter = new QuickSort();
- sorter.sort(s, 0, SAMPLE);
- int[] check = s.getSorted();
- assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
- Arrays.toString(check), Arrays.equals(values, check));
- // Set random min/max, re-sort.
- Random r = new Random();
- int min = r.nextInt(SAMPLE);
- int max = (min + 1 + r.nextInt(SAMPLE - 2)) % SAMPLE;
- values[min] = 9;
- values[max] = 11;
- System.out.println("testAllEqual setting min/max at " + min + "/" + max);
- s = new SampleSortable(values);
- sorter.sort(s, 0, SAMPLE);
- check = s.getSorted();
- Arrays.sort(values);
- assertTrue(check[0] == 9);
- assertTrue(check[SAMPLE - 1] == 11);
- assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
- Arrays.toString(check), Arrays.equals(values, check));
- }
-
- public void testSorted() throws Exception {
- final int SAMPLE = 500;
- int[] values = new int[SAMPLE];
- Random r = new Random();
- long seed = r.nextLong();
- r.setSeed(seed);
- System.out.println("testSorted seed: " + seed);
- for (int i = 0; i < SAMPLE; ++i) {
- values[i] = r.nextInt(100);
- }
- Arrays.sort(values);
- SampleSortable s = new SampleSortable(values);
- IndexedSorter sorter = new QuickSort();
- sorter.sort(s, 0, SAMPLE);
- int[] check = s.getSorted();
- assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
- Arrays.toString(check), Arrays.equals(values, check));
- }
-
- public void testSequential() throws Exception {
- final int SAMPLE = 500;
- int[] values = new int[SAMPLE];
- for (int i = 0; i < SAMPLE; ++i) {
- values[i] = i;
- }
- SampleSortable s = new SampleSortable(values);
- IndexedSorter sorter = new QuickSort();
- sorter.sort(s, 0, SAMPLE);
- int[] check = s.getSorted();
- assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
- Arrays.toString(check), Arrays.equals(values, check));
- }
-
- public void testSingleRecord() throws Exception {
- final int SAMPLE = 1;
- SampleSortable s = new SampleSortable(SAMPLE);
- int[] values = s.getValues();
- IndexedSorter sorter = new QuickSort();
- sorter.sort(s, 0, SAMPLE);
- int[] check = s.getSorted();
- assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
- Arrays.toString(check), Arrays.equals(values, check));
- }
-
- public void testQuickSort() throws Exception {
- final int SAMPLE = 100000;
- SampleSortable s = new SampleSortable(SAMPLE);
- System.out.println("testQuickSort seed: " + s.getSeed());
- int[] values = s.getValues();
- Arrays.sort(values);
- IndexedSorter sorter = new QuickSort();
- sorter.sort(s, 0, SAMPLE);
- int[] check = s.getSorted();
- assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
- Arrays.toString(check), Arrays.equals(values, check));
- }
-
- public void testWritable() throws Exception {
- final int SAMPLE = 1000;
- WritableSortable s = new WritableSortable(SAMPLE);
- System.out.println("testWritable seed: " + s.getSeed());
- String[] values = s.getValues();
- Arrays.sort(values);
- IndexedSorter sorter = new QuickSort();
- sorter.sort(s, 0, SAMPLE);
- String[] check = s.getSorted();
- assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
- Arrays.toString(check), Arrays.equals(values, check));
- }
-
}