You are viewing a plain text version of this content. The canonical link for it is here.
Posted to derby-commits@db.apache.org by mi...@apache.org on 2012/02/17 18:50:34 UTC

svn commit: r1245681 - in /db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests: master/st_reclaim_longcol.out tests/storetests/st_reclaim_longcol.java

Author: mikem
Date: Fri Feb 17 17:50:33 2012
New Revision: 1245681

URL: http://svn.apache.org/viewvc?rev=1245681&view=rev
Log:
DERBY-1913 storetests/st_reclaim_longcol.java fails intermittently

backported fix #1243622 from 10.8 branch to 10.6 branch.

The test was counting on being able to control the number of FREE pages,
but the number is very dependent on ability of background thread to run
in a timely manner. Changed the test to check number of allocated pages,
which at least correctly tests that what we think should be background reclaimed
eventually is. Still needs some wait logic which I think will work better
now. I tested against knut's patch to delay the daemon 1 second for every
piece of work and the old test always failed and the new test succeeded. 


Modified:
    db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/master/st_reclaim_longcol.out
    db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/tests/storetests/st_reclaim_longcol.java

Modified: db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/master/st_reclaim_longcol.out
URL: http://svn.apache.org/viewvc/db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/master/st_reclaim_longcol.out?rev=1245681&r1=1245680&r2=1245681&view=diff
==============================================================================
--- db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/master/st_reclaim_longcol.out (original)
+++ db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/master/st_reclaim_longcol.out Fri Feb 17 17:50:33 2012
@@ -1,4 +1,2 @@
 Beginning test: test1:insert/delete of 20 rows with blob(250000)
 Ending test: test1:insert/delete of 20 rows with blob(250000)
-Beginning test: test2:queue of 5 rows with blob(250000), total_work = 500
-Ending test: test2:queue of 5 rows with blob(250000), total_work = 500

Modified: db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/tests/storetests/st_reclaim_longcol.java
URL: http://svn.apache.org/viewvc/db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/tests/storetests/st_reclaim_longcol.java?rev=1245681&r1=1245680&r2=1245681&view=diff
==============================================================================
--- db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/tests/storetests/st_reclaim_longcol.java (original)
+++ db/derby/code/branches/10.6/java/testing/org/apache/derbyTesting/functionTests/tests/storetests/st_reclaim_longcol.java Fri Feb 17 17:50:33 2012
@@ -124,6 +124,15 @@ public class st_reclaim_longcol extends 
         // fit on the first page.  And even then before the fix the rows
         // on the first page are never reclaimed as the 1st one is never
         // deleted.
+        
+        // keep track of what we think should be worst case number
+        // of allocated pages based on total number of rows in the page
+        // after long column is deleted.  Table starts with 1 page, then 
+        // on each iteration the worst case is that the new short row goes
+        // onto a new page.  Start out with 5 for overhead.
+        int worst_case_max_allocated = 5;
+
+      
         for (int iter = 1; iter < num_rows; iter++)
         {
             // insert the long blob
@@ -140,22 +149,20 @@ public class st_reclaim_longcol extends 
             del_stmt.setInt(1, iter);
             del_stmt.executeUpdate();
 
+            // in the worst case each new set of 2 rows will result in
+            // the one undeleted row getting put on a new page.
+            worst_case_max_allocated++;
+
             // commit the xact, post commit should kick in to reclaim the
             // blob space sometime after the commit.
             conn.commit();
 
-            // sleep, just in case on this machine background
-            // post commit is slow.
-            try
-            {
-                Thread.sleep(20);
-            }
-            catch (Exception ex)
-            {
-                // just ignore interupts of sleep.
-            }
+            // after each commit give the background thread a chance to 
+            // reclaim the deleted rows.
+            wait_for_max_allocated(conn, "test1", worst_case_max_allocated);
         }
 
+        // get total pages = allocated pages + free pages
         int[] sp_info = getSpaceInfo(conn, "APP", "LONGCOL", true);
 
         int total_pages = 
@@ -163,55 +170,26 @@ public class st_reclaim_longcol extends 
 
         int total_expected_page_max = 12 + num_rows;
 
-        int MAX_WAIT_FOR_BG_THREAD = 100000;
-        int ms_waited              = 20;
-
         while (total_pages > total_expected_page_max)
         {
-            if (ms_waited < MAX_WAIT_FOR_BG_THREAD)
-            {
-                // The result is dependent on background activity which may
-                // differ from machine to machine.  Loop, sleeping in this
-                // thread to allow background thread to run.
+            // for the above test case we expect the following space:
+            //     page 0
+            //     page 1 
+            //     free space from 1 blob - 9 pages per blob
+            //     allocated page per long/short blob insert.  Each long
+            //         inserts onto a new page to try and fit it entirely
+            //         on a page.  Then the short blob goes to last inserted
+            //         page.  This process repeats.  The previous pages are
+            //         marked "half-filled" and can be used in future for
+            //         short rows that don't fit on the last page inserted.
 
-                try
-                {
-                    ms_waited += 1000;
-                    Thread.sleep(1000);
-                }
-                catch (Exception ex)
-                {
-                    // just ignore interupts of sleep.
-                }
-
-                sp_info = getSpaceInfo(conn, "APP", "LONGCOL", true);
-
-                total_pages = 
-                    sp_info[SPACE_INFO_NUM_ALLOC] + 
-                    sp_info[SPACE_INFO_NUM_FREE];
-            }
-            else
-            {
-                // for the above test case we expect the following space:
-                //     page 0
-                //     page 1 
-                //     free space from 1 blob - 9 pages per blob
-                //     allocated page per long/short blob insert.  Each long
-                //         inserts onto a new page to try and fit it entirely
-                //         on a page.  Then the short blob goes to last inserted
-                //         page.  This process repeats.  The previous pages are
-                //         marked "half-filled" and can be used in future for
-                //         short rows that don't fit on the last page inserted.
-
-                System.out.println(
-                    "Test 1 failed, expected less than " + 
-                    total_expected_page_max + " pages - count is:\n" +
-                    "free pages     : "   + sp_info[SPACE_INFO_NUM_FREE] +
-                    "\nallocated pages: " + sp_info[SPACE_INFO_NUM_ALLOC] +
-                    "\nWaited " + ms_waited + "ms. for background work.");
+            System.out.println(
+                "Test 1 failed, expected less than " + 
+                total_expected_page_max + " pages - count is:\n" +
+                "free pages     : "   + sp_info[SPACE_INFO_NUM_FREE] +
+                "\nallocated pages: " + sp_info[SPACE_INFO_NUM_ALLOC]);
 
-                break;
-            }
+            break;
         }
 
         if (verbose)
@@ -236,13 +214,14 @@ public class st_reclaim_longcol extends 
     }
 
     /**
-     * Test reclaim of sequence of deleted blobs.
+     * DERBY-1913
      * <p>
-     * Simulates a "queue" of work of input "work_size".  Inserts "work_size"
-     * elements, and then subsequently in each transaction inserts a new 
-     * work item and deletes the oldest work item.  Checks that the used
-     * space reaches a steady state, rather than constantly growing.
-     *
+     * test2 is too sensitive to machine speed and background thread
+     * processing.  It would be better suited as a long running stress
+     * test if someone has the inclination.  Disabling this test for
+     * now.  test1 covers the original intent to test that blobs are
+     * immediately marked for post commit on individual delete, rather
+     * than waiting for all rows on a page to be deleted.
      **/
     public void test2(
     Connection  conn, 
@@ -284,14 +263,12 @@ public class st_reclaim_longcol extends 
             ins_stmt.setBytes(2, long_byteVal);
             ins_stmt.executeUpdate();
 
-            // commit the xact, post commit should kick in to reclaim the
-            // blob space sometime after the commit.
         }
         conn.commit();
 
 
         // for each subsequent work item, queue it to the end and delete
-        // the oldes existing work item.
+        // the oldest existing work item.
         for (int iter = work_size; iter < total_work; iter++)
         {
             // insert the long blob
@@ -299,30 +276,16 @@ public class st_reclaim_longcol extends 
             ins_stmt.setBytes(2, long_byteVal);
             ins_stmt.executeUpdate();
 
-
             // delete the long blob
-            del_stmt.setInt(1, iter - work_size - 1);
+            del_stmt.setInt(1, iter - work_size);
             del_stmt.executeUpdate();
 
             // commit the xact, post commit should kick in to reclaim the
             // blob space sometime after the commit.
             conn.commit();
-
-            try
-            {
-                Thread.sleep(20);
-            }
-            catch (Exception ex)
-            {
-                // just ignore interupts of sleep.
-            }
         }
 
 
-        int[] sp_info = getSpaceInfo(conn, "APP", "LONGCOL", true);
-
-        int total_pages = 
-            sp_info[SPACE_INFO_NUM_ALLOC] + sp_info[SPACE_INFO_NUM_FREE];
 
         // Expect at least allocated pages * 10 for each item in work_size, 
         // plus some overhead for 1st page and such.
@@ -335,48 +298,18 @@ public class st_reclaim_longcol extends 
         // per work size, and then a guess at how fast post commit can keep
         // up with free pages.  Run the test with total_work reasonably 
         // bigger than worksize, something like work_size=5 and total_work >100
-        int total_expected_page_max = 30 * work_size; 
-
-        int MAX_WAIT_FOR_BG_THREAD = 100000;
-        int ms_waited              = 20;
-
-        while (total_pages > total_expected_page_max)
-        {
-            if (ms_waited < MAX_WAIT_FOR_BG_THREAD)
-            {
-                // The result is dependent on background activity which may
-                // differ from machine to machine.  Loop, sleeping in this
-                // thread to allow background thread to run.
 
-                try
-                {
-                    ms_waited += 1000;
-                    Thread.sleep(1000);
-                }
-                catch (Exception ex)
-                {
-                    // just ignore interupts of sleep.
-                }
-
-                sp_info = getSpaceInfo(conn, "APP", "LONGCOL", true);
-
-                total_pages = 
-                    sp_info[SPACE_INFO_NUM_ALLOC] + 
-                    sp_info[SPACE_INFO_NUM_FREE];
-            }
-            else
-            {
+        // Wait for background thread to convert all deleted rows to 
+        // free pages.  Total number of free pages is very variable, dependent
+        // on availability of background cpu, so just wait to get under
+        // an expected max of allocated pages.  Expect 10 allocated pages per
+        // item in work size and add 5 pages for misc overhead.
+        wait_for_max_allocated(conn, "test2", (10 * work_size) + 5);
 
-                System.out.println(
-                    "Test 2 failed, expected less than " + 
-                    total_expected_page_max + " pages - count is:\n" +
-                    "free pages     : "   + sp_info[SPACE_INFO_NUM_FREE] +
-                    "\nallocated pages: " + sp_info[SPACE_INFO_NUM_ALLOC] +
-                    "\nWaited " + ms_waited + "ms. for background work.");
+        int[] sp_info = getSpaceInfo(conn, "APP", "LONGCOL", true);
 
-                break;
-            }
-        }
+        int total_pages = 
+            sp_info[SPACE_INFO_NUM_ALLOC] + sp_info[SPACE_INFO_NUM_FREE];
 
         if (verbose)
         {
@@ -392,42 +325,41 @@ public class st_reclaim_longcol extends 
 
         // Run another iteration of the work loop, by now memory should 
         // have gotten to constant.
-        for (int iter = work_size + total_work; iter < (total_work * 2); iter++)
+        for (int iter = total_work; iter < (total_work * 2); iter++)
         {
             // insert the long blob
             ins_stmt.setInt(  1, iter);
             ins_stmt.setBytes(2, long_byteVal);
             ins_stmt.executeUpdate();
 
-
             // delete the long blob
-            del_stmt.setInt(1, iter - work_size - 1);
+            del_stmt.setInt(1, iter - work_size);
             del_stmt.executeUpdate();
 
             // commit the xact, post commit should kick in to reclaim the
             // blob space sometime after the commit.
             conn.commit();
-
-            try
-            {
-                Thread.sleep(100);
-            }
-            catch (Exception ex)
-            {
-                // just ignore interupts of sleep.
-            }
         }
 
+        // Wait for background thread to convert all deleted rows to 
+        // free pages.  Total number of free pages is very variable, dependent
+        // on availability of background cpu, so just wait to get under
+        // an expected max of allocated pages.  Expect 10 allocated pages per
+        // item in work size and add 5 pages for misc overhead.
+        wait_for_max_allocated(conn, "test2_2", (10 * work_size) + 5);
+
 
         int[] second_sp_info = getSpaceInfo(conn, "APP", "LONGCOL", true);
 
         int second_total_pages = 
             sp_info[SPACE_INFO_NUM_ALLOC] + sp_info[SPACE_INFO_NUM_FREE];
 
+        // This could fail due to machine variability, leaving it for now
+        // as I have not seen this failure reported.
         if (total_pages != second_total_pages)
         {
             System.out.println(
-                "Test failed, expected constant memory after second run." +
+                "Test 2 failed, expected constant memory after second run." +
                 "initial total = " + total_pages +
                 "second total = " + second_total_pages);
         }
@@ -452,11 +384,112 @@ public class st_reclaim_longcol extends 
     }
 
 
+    /**
+     * wait for background thread to convert allocated pages to free pages
+     * <p>
+     * Wait until the total number of allocated pages is <= alloc_wait_count.
+     * The expectation is that the test has performed some deletes and 
+     * committed allowing the background task converted empty allocated pages
+     * with only deleted rows into free pages.
+     *
+     * On an machine with some idle processors only a short wait should
+     * be necessary.  But on machines with lots of load, possibly other
+     * tests running just sleeping does not guarantee background thread
+     * an immediate chance to run.  Without this extra wait some nightly's
+     * were seeing failures, see DERBY-1913.
+     **/
+    private void wait_for_max_allocated(
+    Connection  conn,
+    String      test_name,
+    int         alloc_wait_count)
+        throws SQLException 
+    {
+        // an initial 1/10 of second which should work for most environments.
+        try
+        {
+            Thread.sleep(100);
+        }
+        catch (Exception ex)
+        {
+            // just ignore interupts of sleep.
+        }
+
+        // get number of allocated pages
+        int[] sp_info     = getSpaceInfo(conn, "APP", "LONGCOL", true);
+        int   total_alloc = sp_info[SPACE_INFO_NUM_ALLOC];
+        int   save_total_alloc = total_alloc;
+
+        // wait for maximum 100 seconds.
+
+        // wait 10 seconds and give up if it has mad no progress.
+        int max_wait_for_bg_thread = 10000;
+        int ms_waited              = 100;
+
+        while (total_alloc > alloc_wait_count)
+        {
+            if (ms_waited < max_wait_for_bg_thread)
+            {
+                // The result is dependent on background activity which may
+                // differ from machine to machine.  Loop, sleeping in this
+                // thread to allow background thread to run.
+
+                try
+                {
+                    ms_waited += 1000;
+                    Thread.sleep(1000);
+                }
+                catch (Exception ex)
+                {
+                    // just ignore interupts of sleep.
+                }
+
+                // get number of allocated pages
+                sp_info     = getSpaceInfo(conn, "APP", "LONGCOL", true);
+                total_alloc = sp_info[SPACE_INFO_NUM_ALLOC];
+
+            }
+            else if (total_alloc < save_total_alloc)
+            {
+                // background thread did make progress, give it another
+                // 10 seconds.
+                save_total_alloc = total_alloc;
+                max_wait_for_bg_thread += 10000;
+            }
+            else
+            {
+                // for the above test case we expect the following space:
+                //     page 0
+                //     page 1 
+                //     free space from 1 blob - 9 pages per blob
+                //     allocated page per long/short blob insert.  Each long
+                //         inserts onto a new page to try and fit it entirely
+                //         on a page.  Then the short blob goes to last inserted
+                //         page.  This process repeats.  The previous pages are
+                //         marked "half-filled" and can be used in future for
+                //         short rows that don't fit on the last page inserted.
+
+                System.out.println(
+                    "Test " + test_name + 
+                    " failed in wait_for_max_allocated(), expected less than " + 
+                    alloc_wait_count + " allocated pages:\n" +
+                    "free pages     : "   + sp_info[SPACE_INFO_NUM_FREE] +
+                    "\nallocated pages: " + sp_info[SPACE_INFO_NUM_ALLOC] +
+                    "\nWaited " + ms_waited + "ms. for background work.");
+
+                break;
+            }
+        }
+    }
+
+
     public void testList(Connection conn)
         throws SQLException
     {
         test1(conn, 250000, 20);
-        test2(conn, 250000, 5, 500);
+
+        // DERBY-1913 - disabling test2 as it is too sensitive to background
+        // processing.
+        // test2(conn, 250000, 5, 500);
     }
 
     public static void main(String[] argv)