You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@river.apache.org by Peter Firmstone <ji...@zeus.net.au> on 2013/04/01 00:31:11 UTC

test failure repeatability

The following test fails 30 times in a run of 130 tests:

[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1
[java] 	# 	of 	tests 	passed 	= 	0
[java] 	# 	of 	tests 	passed 	= 	1

	
	
	
	
	
	100




Buildfile: build.xml

qa.run-tests:

james-brown:
   [delete] Deleting directory 
/opt/src/River_Fixed/peterConcurrentPolicy/qa/soul
    [mkdir] Created dir: /opt/src/River_Fixed/peterConcurrentPolicy/qa/soul
    [touch] Creating 
/opt/src/River_Fixed/peterConcurrentPolicy/qa/soul/soul.201303312239034808

run-tests:
     [java]
     [java] -----------------------------------------
     [java] CONFIGURATION FILE:
     [java]
     [java]    
/opt/src/River_Fixed/peterConcurrentPolicy/qa/src/com/sun/jini/test/resources/qaHarness.prop
     [java]
     [java] -----------------------------------------
     [java] SETTING UP THE TEST LIST:
     [java]
     [java]    Adding test: 
com/sun/jini/test/spec/javaspace/conformance/snapshot/SnapshotExpirationNotifyTest.td
     [java]
     [java] -----------------------------------------
     [java] GENERAL HARNESS CONFIGURATION INFORMATION:
     [java]
     [java]    Date started:
     [java]       Sun Mar 31 22:39:37 EST 2013
     [java]    Installation directory of the JSK:
     [java]       
com.sun.jini.jsk.home=/opt/src/River_Fixed/peterConcurrentPolicy
     [java]    Installation directory of the harness:
     [java]       
com.sun.jini.qa.home=/opt/src/River_Fixed/peterConcurrentPolicy/qa
     [java]    Categories being tested:
     [java]       categories=No Categories
     [java] -----------------------------------------
     [java] ENVIRONMENT PROPERTIES:
     [java]
     [java]    JVM information:
     [java]       Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
     [java]       Sun Microsystems Inc.
     [java]    OS information:
     [java]       SunOS, 5.10, sparc
     [java]
     [java] -----------------------------------------
     [java] STARTING TO RUN THE TESTS
     [java]
     [java]
     [java] Running 
com/sun/jini/test/spec/javaspace/conformance/snapshot/SnapshotExpirationNotifyTest.td
     [java] Time is Sun Mar 31 22:39:38 EST 2013
     [java] Starting test in separate process with command:
     [java] /usr/jdk/jdk1.6.0_30/jre/bin/java 
-Djava.security.manager=org.apache.river.api.security.CombinerSecurityManager 
-Djava.security.policy=file:/opt/src/River_Fixed/peterConcurrentPolicy/qa/harness/policy/defaulttest.policy 
-Djava.rmi.server.codebase=http://bluto:9082/qa1-javaspace-dl.jar -cp 
/opt/src/River_Fixed/peterConcurrentPolicy/qa/lib/jiniharness.jar:/opt/src/River_Fixed/peterConcurrentPolicy/qa/lib/jinitests.jar:/opt/src/River_Fixed/peterConcurrentPolicy/lib/jsk-platform.jar:/opt/src/River_Fixed/peterConcurrentPolicy/lib/jsk-lib.jar:/opt/src/River_Fixed/peterConcurrentPolicy/lib/high-scale-lib.jar:/opt/src/River_Fixed/peterConcurrentPolicy/lib/custard-apple-1.0.2.jar 
-ea -esa -client 
-Djava.ext.dirs=/usr/jdk/jdk1.6.0_30/jre/lib/ext:/usr/jdk/packages/lib/ext:/opt/src/River_Fixed/peterConcurrentPolicy/qa/lib-ext:/opt/src/River_Fixed/peterConcurrentPolicy/lib-ext 
-Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081 
-Dcom.sun.jini.jsk.home=/opt/src/River_Fixed/peterConcurrentPolicy 
-Dcom.sun.jini.qa.home=/opt/src/River_Fixed/peterConcurrentPolicy/qa 
-Dcom.sun.jini.qa.harness.harnessJar=/opt/src/River_Fixed/peterConcurrentPolicy/qa/lib/jiniharness.jar 
-Dcom.sun.jini.qa.harness.testJar=/opt/src/River_Fixed/peterConcurrentPolicy/qa/lib/jinitests.jar 
-Dcom.sun.jini.qa.harness.runjiniserver=true 
-Dcom.sun.jini.qa.harness.runkitserver=true 
-Djava.security.properties=file:/opt/src/River_Fixed/peterConcurrentPolicy/qa/harness/trust/dynamic-policy.properties 
-Dcom.sun.jini.qa.harness.testhosts= 
-Djava.util.logging.config.file=/home/peter/logging.properties 
-Dcom.sun.jini.test.home=/opt/src/River_Fixed/peterConcurrentPolicy/qa 
-Dcom.sun.jini.test.port=9082 
-Dcom.sun.jini.qa.harness.policies=file:/opt/src/River_Fixed/peterConcurrentPolicy/qa/src/com/sun/jini/test/resources/jinitest.policy 
-Djava.ext.dirs=/usr/jdk/jdk1.6.0_30/jre/lib/ext:/usr/jdk/packages/lib/ext:/opt/src/River_Fixed/peterConcurrentPolicy/qa/lib-ext:/opt/src/River_Fixed/peterConcurrentPolicy/lib-ext 
com.sun.jini.qa.harness.MasterTest 
com/sun/jini/test/spec/javaspace/conformance/snapshot/SnapshotExpirationNotifyTest.td
     [java] com.sun.jini.qa.harness.TestException: Not all listeners've 
got expected number of events.
     [java]     at 
com.sun.jini.test.spec.javaspace.conformance.snapshot.SnapshotExpirationNotifyTest.run(SnapshotExpirationNotifyTest.java:370)
     [java]     at 
com.sun.jini.qa.harness.MasterTest.doTest(MasterTest.java:256)
     [java]     at 
com.sun.jini.qa.harness.MasterTest.main(MasterTest.java:144)
     [java]
     [java] TIME: 10:42:54 PM
     [java]
     [java] Test process was destroyed and returned code 1
     [java] 
com/sun/jini/test/spec/javaspace/conformance/snapshot/SnapshotExpirationNotifyTest.td
     [java] Test Failed: Test Failed: 
com.sun.jini.qa.harness.TestException: Not all listeners've got expected 
number of events.
     [java]
     [java]
     [java] -----------------------------------------
     [java]
     [java] SUMMARY =================================
     [java]
     [java] 
com/sun/jini/test/spec/javaspace/conformance/snapshot/SnapshotExpirationNotifyTest.td
     [java] Test Failed: Test Failed: 
com.sun.jini.qa.harness.TestException: Not all listeners've got expected 
number of events.
     [java]
     [java] -----------------------------------------
     [java]
     [java] # of tests started   = 1
     [java] # of tests completed = 1
     [java] # of tests passed    = 0
     [java] # of tests failed    = 1
     [java]
     [java] -----------------------------------------
     [java]
     [java]    Date finished:
     [java]       Sun Mar 31 22:42:59 EST 2013
     [java]    Time elapsed:
     [java]       201 seconds
     [java]
     [java] Java Result: 1

collect-result:

BUILD FAILED
/opt/src/River_Fixed/peterConcurrentPolicy/build.xml:2105: The following 
error occurred while executing this line:
/opt/src/River_Fixed/peterConcurrentPolicy/qa/build.xml:357: condition 
satisfied

Total time: 3 minutes 30 seconds


Re: test failure repeatability

Posted by Peter <ji...@zeus.net.au>.
Good point, the network should be faster, so there's a high likelihood it's a concurrency bug.

----- Original message -----
> So...
>
> On 1 April 2013 21:03, Peter Firmstone <ji...@zeus.net.au> wrote:
>
> > I understand, better the devil you know argument, if someone wants to make
> > a 2.2.1 release, I'm certainly not against it.
> >
> >
> Fair enough.
>
>
> > It won't take me long to track down & fix the concurrency bug, if that's
> > the cause.
> > I tend to agree with Greg's assessment that the tests are contrived.  The
> > tests make a poor attempt at determining whether the leased resource is
> > still available by checking within a time window around lease expiry, it's
> > subject to so many assumptions that I can't see why that determination is
> > even made.
> >
>
> Well they seem contrived but given the size of the lease times we're
> dealing with and the room between them for clean expiry, I'm a little
> concerned. No question, they aren't the best tests, crude for sure, but
> they don't look broke as such. I'll keep looking...
>
>
> > I don't think we can guarantee a lease will expire during a specific time
> > window, most programmers will hopefully renew a lease long before it's due
> > to expire.  Until we make that assessment, I think we'll struggle with the
> > tests.
> >
> >
> I think the point is a lease should expire exactly to the millisecond. What
> you can't guarantee is when an event gets issued against the leased object,
> before of after. Nevertheless with big enough windows before expiry, it
> should work so...
>
>
> > Remember were now dealing with concurrency over network connections.  The
> > so called failures may just be due to timing.
> >
> > The network is asynchronous.
> >
> >
> Yep, preaching to the converted.
>
>
> > The old code appears to slow down network connections enough to make them
> > less asynchronous, heck even really bad concurrency race conditions and
> > obvious bugs didn't manifest during testing.
> >
> >
> Mmmm, see my gut says that if the network is "faster" these tests should be
> less likely to fail as it's more likely events are delivered in the
> time-frames required, not less.
>
>
> > Example of how old code makes network less asynchronous:
> >
> >    1. Thread runs, opens network connection, security check is performed.
> >    2. Second thread runs, opens network connection, security check is
> >          performed, first thread is synchronized on a DNS check, the second
> >          thread waits.
> >    3. Second thread will complete after first.
> >
> > New code:
> >
> >    1. Thread runs, opens network connection, security check is performed
> >          and returns without contention.
> >    2. Second thread runs opens network connection, security check is
> >          performed and returns without contention.
> >    3. Second thread by chance finishes communication prior to first.
> >
> > If we look at ClassLoader, the story is similar.
> >
> > Regards,
> >
> > Peter.
> >
> > Dan Creswell wrote:
> >
> > > I feel Bryan has a point.
> > >
> > > Putting out a release with just the Levels fix is better than the current
> > > release which won't run at all under various circumstances. i.e. It is a
> > > small step forward and in no way a backward step.
> > >
> > > Such a release mightn't be as much of a step forward as one also
> > > featuring the concurrency work *but* the concurrency work is also seemingly
> > > amplifying the occurrence of preexisting concurrency problems. It can be
> > > said then to perform both better and worse than previous releases.
> > >
> > > In essence, the small fix release is a known quantity, the new one less
> > > so. We maybe ought to give users the choice as to how much unknown'ness
> > > they wish to indulge in.
> > >
> > >
> > >
> > >
> > > On 1 April 2013 13:10, Peter <jini@zeus.net.au <ma...@zeus.net.au>>
> > > wrote:
> > >
> > >        The big problem is there are less concurrency bugs in the present
> > >        branch and there are definitely more in the previous release, they
> > >        just haven't presented in test in previous releases, although they
> > >        have in deployment.
> > >
> > >        The issue appears to be with javaspaces / lease / transactions.
> > >
> > >        It's also a possibility that there's just a timing issue with the
> > >        tests.
> > >
> > >        I'd like to fix the tests before releasing, it's unfortunate the
> > >        last fix to jeri seemed to expose these new test failures.  It's a
> > >        little like fixing rust in an old car, the more you remove, the
> > >        more you find.
> > >
> > >        What I really need is more help, I don't work on this full time it
> > >        takes longer than a team woking on commercial software.
> > >
> > >        Peter.
> > >        ----- Original message -----
> > >        > I am uncomfortable with a release that has known concurrency
> > >        problems.  I
> > >        > am also uncomfortable that the custom Levels serialization change by
> > >        > Oracle [1] has broken the current release for new JVMs.  I would
> > >        like to
> > >        > see a minor release which fixes that serialization problem and a
> > >        candidate
> > >        > release which gives people a chance to discovery concurrent
> > >        issues without
> > >        > risking a release that is known to be unstable.
> > >        >
> > >        > I'm happy to review a few of the classes with known concurrency
> > >        problems
> > >        > to see if I can help nail some of these bugs.  Since I do not
> > >        know the
> > >        > river internals, I would only be able to spot concurrency
> > >        problems that
> > >        > exist within a class.  I am not in a good position to comment on
> > >        > concurrency problems that might arise through the interactions among
> > >        > classes.
> > >        >
> > >        > Bryan
> > >        >
> > >        > [1]
> > > https://issues.apache.org/**jira/browse/RIVER-416<https://issues.apache.org/jira/browse/RIVER-416>
> > >        >        > On 4/1/13 6:14 AM, "Peter Firmstone" <jini@zeus.net.au
> > >        <ma...@zeus.net.au>> wrote:
> > >        >
> > >        > > The attachments will be removed from the list, so I've cc'd
> > >        you, anyone
> > >        > > who's interested, let me know I can forward the attachments.
> > > They can
> > >        > > be opened with jvisualvm.
> > >        > >
> > >        > > The profiling isn't perfect, the test runs for about 8.5
> > >        minutes, so
> > >        > > hotspot should have kicked in relatively early in both test runs.
> > >        > >
> > >        > > I guess a significant problem is; the more I remove performance
> > >        > > impedances, like unnecessary DNS calls, the faster
> > >        multithreading and
> > >        > > context switching gets.
> > >        > >
> > >        > > Not only did the old policy providers create contention, but
> > >        it was
> > >        > > slower for single threaded performance (I'll have to run the
> > >        previous
> > >        > > release branch for comparison when I get some time).
> > >        > >
> > >        > > The URIGrant.implies call is now down to .228 ms per
> > >        invocation, down
> > >        > > from 1.68 ms per invocation this week, which was already quite
> > >        good (on
> > >        > > old UltraSparcII hardware), during stress tests this method is
> > >        called
> > >        > > almost 40,000 times.
> > >        > >
> > >        > > In comparison the old policy provider which required a DNS
> > >        call (every
> > >        > > time CodeSource.implies is called, functionality now replaced by
> > >        > > URIGrant.implies), the old policy provider also cached all
> > >        Permission's
> > >        > > in highly contended PermissionCollection's, which during
> > >        network calls
> > >        > > invoked SocketPermission.implies, possibly for every
> > >        SocketPermission in
> > >        > > the PermissionCollection, DNS is also consulted by
> > >        > > SocketPermission.implies, while synchronized, ouch!
> > >        > >
> > >        > > URIGrant.implies is non blocking, that's right zero contention.
> > >        > > SocketPermission.implies DNS calls can be avoided in most cases if
> > >        > > PermissionComparator finds an exact match or wild card.
> > >        > >
> > >        > > This is without the CombinerSecurityManager, which improves
> > >        security
> > >        > > performance by a factor of 10 (SocketPermission is only
> > >        checked once for
> > >        > > each AccessControlContext).
> > >        > >
> > >        > > PreferredClassProvider no longer creates unecessary DNS calls, and
> > >        > > neither does SecureClassLoader,  URLClassLoader or
> > >        PreferredClassLoader.
> > >        > >
> > >        > > So the good news is the next release will feel much faster,
> > >        the bad news
> > >        > > is that existing concurrency bugs that previously didn't
> > >        appear during
> > >        > > test runs, but likely to manifest during production are now
> > >        occurring
> > >        > > during testing.  There's more good news, we fixed a number of
> > >        > > concurrency bugs since the last release too, I guess I have to
> > >        draw the
> > >        > > line somewhere and cut a release.
> > >        > >
> > >        > > DNS calls haven't been completely eliminated as some are still
> > >        > > necessary, but a multitude of unnecessary DNS calls have been
> > >        eliminated.
> > >        > >
> > >        > > Regards,
> > >        > >
> > >        > > Peter.
> > >        > >
> > >        > > Peter Firmstone wrote:
> > >        > > > They've passed more consistently in the past, they're either
> > >        > > > concurrency bugs or network timing related, it would be nice
> > >        to at
> > >        > > > least determine if it's the former or latter.
> > >        > > >
> > >        > > > Cheers,
> > >        > > >
> > >        > > > Peter.
> > >        > > >
> > >        > > > Tom Hobbs wrote:
> > >        > > > > Are these all new failures, i.e. were they working
> > >        before?  Or are they
> > >        > > > > "new" failures in that the test categories have only
> > >        recently been
> > >        > > > > reactivated and the failures discovered?
> > >        > > > >
> > >        > > > > Are they a big enough blocker to stop a release?
> > >        > > > >
> > >        > > > >
> > >        > > > > On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone
> > >        <jini@zeus.net.au <ma...@zeus.net.au>>
> > >
> > >        > > > > wrote:
> > >        > > > >
> > >        > > > >
> > >        > > > > > The following test fails 30 times in a run of 130 tests:
> > >        > > > > >
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            0
> > >        > > > > > [java]  #                      of                  tests      passed  =
> > >            1
> > >        > > > > >
> > >        > > > > >
> > >        > > > > >
> > >        > > > > >
> > >        > > > > >
> > >        > > > > >
> > >        > > > > >                              100
> > >        > > > > >
> > >        > > > > >
> > >        > > > > >
> > >        > > > > >
> > >        > > > > > Buildfile: build.xml
> > >        > > > > >
> > >        > > > > > qa.run-tests:
> > >        > > > > >
> > >        > > > > > james-brown:
> > >        > > > > >      [delete] Deleting directory /opt/src/River_Fixed/**
> > >        > > > > > peterConcurrentPolicy/qa/soul
> > >        > > > > >          [mkdir] Created dir: /opt/src/River_Fixed/**
> > >        > > > > > peterConcurrentPolicy/qa/soul
> > >        > > > > >          [touch] Creating
> > >        > > > > > /opt/src/River_Fixed/****peterConcurrentPolicy/qa/soul/***
> > >        > > > > > *soul.201303312239034808
> > >        > > > > >
> > >        > > > > > run-tests:
> > >        > > > > >              [java]
> > >        > > > > >              [java] ------------------------------****-----------
> > >        > > > > >              [java] CONFIGURATION FILE:
> > >        > > > > >              [java]
> > >        > > > > >              [java]                /opt/src/River_Fixed/****
> > > peterConcurrentPolicy/qa/src/****
> > >        > > > > > com/sun/jini/test/resources/****qaHarness.prop
> > >        > > > > >              [java]
> > >        > > > > >              [java] ------------------------------****-----------
> > >        > > > > >              [java] SETTING UP THE TEST LIST:
> > >        > > > > >              [java]
> > >        > > > > >              [java]          Adding test:
> > >        > > > > > com/sun/jini/test/spec/****javaspace/conformance/
> > >        > > > > > **snapshot/****SnapshotExpirationNotifyTest.****td
> > >        > > > > >              [java]
> > >        > > > > >              [java] ------------------------------****-----------
> > >        > > > > >              [java] GENERAL HARNESS CONFIGURATION INFORMATION:
> > >        > > > > >              [java]
> > >        > > > > >              [java]          Date started:
> > >        > > > > >              [java]                      Sun Mar 31 22:39:37 EST 2013
> > >        > > > > >              [java]          Installation directory of the JSK:
> > >        > > > > >              [java]                            com.sun.jini.jsk.home=/opt/****
> > > src/River_Fixed/**
> > >        > > > > > peterConcurrentPolicy
> > >        > > > > >              [java]          Installation directory of the harness:
> > >        > > > > >              [java]                            com.sun.jini.qa.home=/opt/src/*
> > > ***River_Fixed/**
> > >        > > > > > peterConcurrentPolicy/qa
> > >        > > > > >              [java]          Categories being tested:
> > >        > > > > >              [java]                      categories=No Categories
> > >        > > > > >              [java] ------------------------------****-----------
> > >        > > > > >              [java] ENVIRONMENT PROPERTIES:
> > >        > > > > >              [java]
> > >        > > > > >              [java]          JVM information:
> > >        > > > > >              [java]                      Java HotSpot(TM) Server VM,
> > >        20.5-b03, 32 bit VM mode
> > >        > > > > >              [java]                      Sun Microsystems Inc.
> > >        > > > > >              [java]          OS information:
> > >        > > > > >              [java]                      SunOS, 5.10, sparc
> > >        > > > > >              [java]
> > >        > > > > >              [java] ------------------------------****-----------
> > >        > > > > >              [java] STARTING TO RUN THE TESTS
> > >        > > > > >              [java]
> > >        > > > > >              [java]
> > >        > > > > >              [java] Running
> > >        com/sun/jini/test/spec/****javaspace/conformance/**
> > >        > > > > > snapshot/****SnapshotExpirationNotifyTest.****td
> > >        > > > > >              [java] Time is Sun Mar 31 22:39:38 EST 2013
> > >        > > > > >              [java] Starting test in separate process with
> > >        command:
> > >        > > > > >              [java] /usr/jdk/jdk1.6.0_30/jre/bin/****java
> > >        > > > > >
> > >        > > > > >
> > >        -Djava.security.manager=org.****apache.river.api.security.****
> > > CombinerSecur
> > >        > > > > > ityManager
> > >        > > > > >
> > >        > > > > > -Djava.security.policy=file:/****opt/src/River_Fixed/**
> > >        > > > > >
> > >        peterConcurrentPolicy/qa/****harness/policy/defaulttest.****policy
> > >        > > > > >
> > >        > > > > >
> > >        -Djava.rmi.server.codebase=**htt**p://bluto:9082/qa1-**
> > > javaspace-**dl.jar<h
> > >        > > > > > ttp://bluto:9082/qa1-**javaspace-dl.jar>-cp
> > >        > > > > > /opt/src/River_Fixed/
> > >        > > > > >
> > >        > > > > >
> > >        **peterConcurrentPolicy/qa/**lib/**jiniharness.jar:/opt/**
> > > src/**River_Fixed
> > >        > > > > > /**
> > >        > > > > >
> > >        > > > > >
> > >        peterConcurrentPolicy/qa/lib/****jinitests.jar:/opt/src/River_****
> > >        > > > > >
> > >        > > > > >
> > >        Fixed/peterConcurrentPolicy/****lib/jsk-platform.jar:/opt/src/**
> > > **River_Fix
> > >        > > > > > ed/
> > >        > > > > >
> > >        > > > > >
> > >        **peterConcurrentPolicy/lib/**jsk-**lib.jar:/opt/src/River_**Fixed/**
> > >        > > > > >
> > >        > > > > >
> > >        peterConcurrentPolicy/lib/****high-scale-lib.jar:/opt/src/****
> > > River_Fixed/*
> > >        > > > > > *
> > >        > > > > >
> > >        > > > > > peterConcurrentPolicy/lib/****custard-apple-1.0.2.jar -ea
> > >        -esa -client
> > >        > > > > >
> > >        -Djava.ext.dirs=/usr/jdk/jdk1.****6.0_30/jre/lib/ext:/usr/jdk/****
> > >        > > > > >
> > >        > > > > >
> > >        packages/lib/ext:/opt/src/****River_Fixed/****
> > > peterConcurrentPolicy/qa/lib-
> > >        > > > > > **
> > >        > > > > >
> > >        > > > > > ext:/opt/src/River_Fixed/****peterConcurrentPolicy/lib-ext
> > >        > > > > > -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
> > >        > > > > >
> > >        -Dcom.sun.jini.jsk.home=/opt/****src/River_Fixed/****
> > > peterConcurrentPolicy
> > >        > > > > >
> > >        > > > > >
> > >        -Dcom.sun.jini.qa.home=/opt/****src/River_Fixed/****
> > > peterConcurrentPolicy/q
> > >        > > > > > a
> > >        > > > > >
> > >        > > > > > -Dcom.sun.jini.qa.harness.****harnessJar=/opt/src/River_**
> > >        > > > > > Fixed/peterConcurrentPolicy/****qa/lib/jiniharness.jar
> > >        > > > > > -Dcom.sun.jini.qa.harness.****
> > > testJar=/opt/src/River_Fixed/****
> > >        > > > > > peterConcurrentPolicy/qa/lib/****jinitests.jar
> > >        > > > > > -Dcom.sun.jini.qa.harness.****runjiniserver=true
> > >        > > > > > -Dcom.sun.jini.qa.harness.****runkitserver=true
> > >        > > > > > -Djava.security.properties=*
> > >        > > > > > *file:/opt/src/River_Fixed/****peterConcurrentPolicy/qa/**
> > >        > > > > > harness/trust/dynamic-policy.****properties
> > >        > > > > > -Dcom.sun.jini.qa.harness.****testhosts=
> > >        > > > > >
> > >        -Djava.util.logging.config.****file=/home/peter/logging.****
> > > properties
> > >        > > > > >
> > >        > > > > >
> > >        -Dcom.sun.jini.test.home=/opt/****src/River_Fixed/****
> > > peterConcurrentPolicy
> > >        > > > > > /qa
> > >        > > > > >
> > >        > > > > > -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
> > >        > > > > >
> > >        policies=file:/opt/src/River_****Fixed/peterConcurrentPolicy/****
> > >        > > > > > qa/src/com/sun/jini/test/****resources/jinitest.policy
> > >        > > > > >
> > >        -Djava.ext.dirs=/usr/jdk/jdk1.****6.0_30/jre/lib/ext:/usr/jdk/****
> > >        > > > > >
> > >        > > > > >
> > >        packages/lib/ext:/opt/src/****River_Fixed/****
> > > peterConcurrentPolicy/qa/lib-
> > >        > > > > > **
> > >        > > > > >
> > >        > > > > > ext:/opt/src/River_Fixed/****peterConcurrentPolicy/lib-ext
> > >        > > > > > com.sun.jini.qa.harness.****MasterTest
> > >        com/sun/jini/test/spec/**
> > >        > > > > >
> > >        javaspace/conformance/****snapshot/****SnapshotExpirationNotifyTest.*
> > > ***td
> > >        > > > > >              [java] com.sun.jini.qa.harness.****TestException:
> > >        Not all
> > >        > > > > > listeners've
> > >        > > > > > got expected number of events.
> > >        > > > > >              [java]              at
> > >        com.sun.jini.test.spec.****javaspace.conformance.**
> > >        > > > > > snapshot.****SnapshotExpirationNotifyTest.****run(**
> > >        > > > > > SnapshotExpirationNotifyTest.****java:370)
> > >        > > > > >              [java]              at
> > >        > > > > > com.sun.jini.qa.harness.****MasterTest.doTest(MasterTest.*
> > >        > > > > > *java:256)
> > >        > > > > >              [java]              at
> > >        > > > > > com.sun.jini.qa.harness.****MasterTest.main(MasterTest.**
> > >        > > > > > java:144)
> > >        > > > > >              [java]
> > >        > > > > >              [java] TIME: 10:42:54 PM
> > >        > > > > >              [java]
> > >        > > > > >              [java] Test process was destroyed and returned code 1
> > >        > > > > >              [java]
> > >        > > > > > com/sun/jini/test/spec/****javaspace/conformance/****
> > > snapshot/**
> > >        > > > > > SnapshotExpirationNotifyTest.****td
> > >        > > > > >              [java] Test Failed: Test Failed:
> > >        > > > > > com.sun.jini.qa.harness.****TestException:
> > >        > > > > > Not all listeners've got expected number of events.
> > >        > > > > >              [java]
> > >        > > > > >              [java]
> > >        > > > > >              [java] ------------------------------****-----------
> > >        > > > > >              [java]
> > >        > > > > >              [java] SUMMARY ==============================****===
> > >        > > > > >              [java]
> > >        > > > > >              [java]
> > >        > > > > > com/sun/jini/test/spec/****javaspace/conformance/****
> > > snapshot/**
> > >        > > > > > SnapshotExpirationNotifyTest.****td
> > >        > > > > >              [java] Test Failed: Test Failed:
> > >        > > > > > com.sun.jini.qa.harness.****TestException:
> > >        > > > > > Not all listeners've got expected number of events.
> > >        > > > > >              [java]
> > >        > > > > >              [java] ------------------------------****-----------
> > >        > > > > >              [java]
> > >        > > > > >              [java] # of tests started      = 1
> > >        > > > > >              [java] # of tests completed = 1
> > >        > > > > >              [java] # of tests passed          = 0
> > >        > > > > >              [java] # of tests failed          = 1
> > >        > > > > >              [java]
> > >        > > > > >              [java] ------------------------------****-----------
> > >        > > > > >              [java]
> > >        > > > > >              [java]          Date finished:
> > >        > > > > >              [java]                      Sun Mar 31 22:42:59 EST 2013
> > >        > > > > >              [java]          Time elapsed:
> > >        > > > > >              [java]                      201 seconds
> > >        > > > > >              [java]
> > >        > > > > >              [java] Java Result: 1
> > >        > > > > >
> > >        > > > > > collect-result:
> > >        > > > > >
> > >        > > > > > BUILD FAILED
> > >        > > > > >
> > >        /opt/src/River_Fixed/****peterConcurrentPolicy/build.****xml:2105:
> > > The
> > >        > > > > > following error occurred while executing this line:
> > >        > > > > >
> > >        /opt/src/River_Fixed/****peterConcurrentPolicy/qa/****build.xml:357:
> > >        > > > > > condition satisfied
> > >        > > > > >
> > >        > > > > > Total time: 3 minutes 30 seconds
> > >        > > > > >
> > >        > > > > >
> > >        > > > > >
> > >        > > > >
> > >        > > > >
> > >        > > >
> > >        > > >
> > >        > >
> > >        >
> > >
> > >
> > >
> >


Re: test failure repeatability

Posted by Dan Creswell <da...@gmail.com>.
So...

On 1 April 2013 21:03, Peter Firmstone <ji...@zeus.net.au> wrote:

> I understand, better the devil you know argument, if someone wants to make
> a 2.2.1 release, I'm certainly not against it.
>
>
Fair enough.


> It won't take me long to track down & fix the concurrency bug, if that's
> the cause.
> I tend to agree with Greg's assessment that the tests are contrived.  The
> tests make a poor attempt at determining whether the leased resource is
> still available by checking within a time window around lease expiry, it's
> subject to so many assumptions that I can't see why that determination is
> even made.
>

Well they seem contrived but given the size of the lease times we're
dealing with and the room between them for clean expiry, I'm a little
concerned. No question, they aren't the best tests, crude for sure, but
they don't look broke as such. I'll keep looking...


> I don't think we can guarantee a lease will expire during a specific time
> window, most programmers will hopefully renew a lease long before it's due
> to expire.  Until we make that assessment, I think we'll struggle with the
> tests.
>
>
I think the point is a lease should expire exactly to the millisecond. What
you can't guarantee is when an event gets issued against the leased object,
before of after. Nevertheless with big enough windows before expiry, it
should work so...


> Remember were now dealing with concurrency over network connections.  The
> so called failures may just be due to timing.
>
> The network is asynchronous.
>
>
Yep, preaching to the converted.


> The old code appears to slow down network connections enough to make them
> less asynchronous, heck even really bad concurrency race conditions and
> obvious bugs didn't manifest during testing.
>
>
Mmmm, see my gut says that if the network is "faster" these tests should be
less likely to fail as it's more likely events are delivered in the
time-frames required, not less.


> Example of how old code makes network less asynchronous:
>
>   1. Thread runs, opens network connection, security check is performed.
>   2. Second thread runs, opens network connection, security check is
>      performed, first thread is synchronized on a DNS check, the second
>      thread waits.
>   3. Second thread will complete after first.
>
> New code:
>
>   1. Thread runs, opens network connection, security check is performed
>      and returns without contention.
>   2. Second thread runs opens network connection, security check is
>      performed and returns without contention.
>   3. Second thread by chance finishes communication prior to first.
>
> If we look at ClassLoader, the story is similar.
>
> Regards,
>
> Peter.
>
> Dan Creswell wrote:
>
>> I feel Bryan has a point.
>>
>> Putting out a release with just the Levels fix is better than the current
>> release which won't run at all under various circumstances. i.e. It is a
>> small step forward and in no way a backward step.
>>
>> Such a release mightn't be as much of a step forward as one also
>> featuring the concurrency work *but* the concurrency work is also seemingly
>> amplifying the occurrence of preexisting concurrency problems. It can be
>> said then to perform both better and worse than previous releases.
>>
>> In essence, the small fix release is a known quantity, the new one less
>> so. We maybe ought to give users the choice as to how much unknown'ness
>> they wish to indulge in.
>>
>>
>>
>>
>> On 1 April 2013 13:10, Peter <jini@zeus.net.au <ma...@zeus.net.au>>
>> wrote:
>>
>>     The big problem is there are less concurrency bugs in the present
>>     branch and there are definitely more in the previous release, they
>>     just haven't presented in test in previous releases, although they
>>     have in deployment.
>>
>>     The issue appears to be with javaspaces / lease / transactions.
>>
>>     It's also a possibility that there's just a timing issue with the
>>     tests.
>>
>>     I'd like to fix the tests before releasing, it's unfortunate the
>>     last fix to jeri seemed to expose these new test failures.  It's a
>>     little like fixing rust in an old car, the more you remove, the
>>     more you find.
>>
>>     What I really need is more help, I don't work on this full time it
>>     takes longer than a team woking on commercial software.
>>
>>     Peter.
>>     ----- Original message -----
>>     > I am uncomfortable with a release that has known concurrency
>>     problems.  I
>>     > am also uncomfortable that the custom Levels serialization change by
>>     > Oracle [1] has broken the current release for new JVMs.  I would
>>     like to
>>     > see a minor release which fixes that serialization problem and a
>>     candidate
>>     > release which gives people a chance to discovery concurrent
>>     issues without
>>     > risking a release that is known to be unstable.
>>     >
>>     > I'm happy to review a few of the classes with known concurrency
>>     problems
>>     > to see if I can help nail some of these bugs.  Since I do not
>>     know the
>>     > river internals, I would only be able to spot concurrency
>>     problems that
>>     > exist within a class.  I am not in a good position to comment on
>>     > concurrency problems that might arise through the interactions among
>>     > classes.
>>     >
>>     > Bryan
>>     >
>>     > [1] https://issues.apache.org/**jira/browse/RIVER-416<https://issues.apache.org/jira/browse/RIVER-416>
>>     >
>>     > On 4/1/13 6:14 AM, "Peter Firmstone" <jini@zeus.net.au
>>     <ma...@zeus.net.au>> wrote:
>>     >
>>     > > The attachments will be removed from the list, so I've cc'd
>>     you, anyone
>>     > > who's interested, let me know I can forward the attachments.
>> They can
>>     > > be opened with jvisualvm.
>>     > >
>>     > > The profiling isn't perfect, the test runs for about 8.5
>>     minutes, so
>>     > > hotspot should have kicked in relatively early in both test runs.
>>     > >
>>     > > I guess a significant problem is; the more I remove performance
>>     > > impedances, like unnecessary DNS calls, the faster
>>     multithreading and
>>     > > context switching gets.
>>     > >
>>     > > Not only did the old policy providers create contention, but
>>     it was
>>     > > slower for single threaded performance (I'll have to run the
>>     previous
>>     > > release branch for comparison when I get some time).
>>     > >
>>     > > The URIGrant.implies call is now down to .228 ms per
>>     invocation, down
>>     > > from 1.68 ms per invocation this week, which was already quite
>>     good (on
>>     > > old UltraSparcII hardware), during stress tests this method is
>>     called
>>     > > almost 40,000 times.
>>     > >
>>     > > In comparison the old policy provider which required a DNS
>>     call (every
>>     > > time CodeSource.implies is called, functionality now replaced by
>>     > > URIGrant.implies), the old policy provider also cached all
>>     Permission's
>>     > > in highly contended PermissionCollection's, which during
>>     network calls
>>     > > invoked SocketPermission.implies, possibly for every
>>     SocketPermission in
>>     > > the PermissionCollection, DNS is also consulted by
>>     > > SocketPermission.implies, while synchronized, ouch!
>>     > >
>>     > > URIGrant.implies is non blocking, that's right zero contention.
>>     > > SocketPermission.implies DNS calls can be avoided in most cases if
>>     > > PermissionComparator finds an exact match or wild card.
>>     > >
>>     > > This is without the CombinerSecurityManager, which improves
>>     security
>>     > > performance by a factor of 10 (SocketPermission is only
>>     checked once for
>>     > > each AccessControlContext).
>>     > >
>>     > > PreferredClassProvider no longer creates unecessary DNS calls, and
>>     > > neither does SecureClassLoader,  URLClassLoader or
>>     PreferredClassLoader.
>>     > >
>>     > > So the good news is the next release will feel much faster,
>>     the bad news
>>     > > is that existing concurrency bugs that previously didn't
>>     appear during
>>     > > test runs, but likely to manifest during production are now
>>     occurring
>>     > > during testing.  There's more good news, we fixed a number of
>>     > > concurrency bugs since the last release too, I guess I have to
>>     draw the
>>     > > line somewhere and cut a release.
>>     > >
>>     > > DNS calls haven't been completely eliminated as some are still
>>     > > necessary, but a multitude of unnecessary DNS calls have been
>>     eliminated.
>>     > >
>>     > > Regards,
>>     > >
>>     > > Peter.
>>     > >
>>     > > Peter Firmstone wrote:
>>     > > > They've passed more consistently in the past, they're either
>>     > > > concurrency bugs or network timing related, it would be nice
>>     to at
>>     > > > least determine if it's the former or latter.
>>     > > >
>>     > > > Cheers,
>>     > > >
>>     > > > Peter.
>>     > > >
>>     > > > Tom Hobbs wrote:
>>     > > > > Are these all new failures, i.e. were they working
>>     before?  Or are they
>>     > > > > "new" failures in that the test categories have only
>>     recently been
>>     > > > > reactivated and the failures discovered?
>>     > > > >
>>     > > > > Are they a big enough blocker to stop a release?
>>     > > > >
>>     > > > >
>>     > > > > On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone
>>     <jini@zeus.net.au <ma...@zeus.net.au>>
>>
>>     > > > > wrote:
>>     > > > >
>>     > > > >
>>     > > > > > The following test fails 30 times in a run of 130 tests:
>>     > > > > >
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > > [java]  #            of          tests    passed  =
>>       0
>>     > > > > > [java]  #            of          tests    passed  =
>>       1
>>     > > > > >
>>     > > > > >
>>     > > > > >
>>     > > > > >
>>     > > > > >
>>     > > > > >
>>     > > > > >                100
>>     > > > > >
>>     > > > > >
>>     > > > > >
>>     > > > > >
>>     > > > > > Buildfile: build.xml
>>     > > > > >
>>     > > > > > qa.run-tests:
>>     > > > > >
>>     > > > > > james-brown:
>>     > > > > >    [delete] Deleting directory /opt/src/River_Fixed/**
>>     > > > > > peterConcurrentPolicy/qa/soul
>>     > > > > >      [mkdir] Created dir: /opt/src/River_Fixed/**
>>     > > > > > peterConcurrentPolicy/qa/soul
>>     > > > > >      [touch] Creating
>>     > > > > > /opt/src/River_Fixed/****peterConcurrentPolicy/qa/soul/***
>>     > > > > > *soul.201303312239034808
>>     > > > > >
>>     > > > > > run-tests:
>>     > > > > >        [java]
>>     > > > > >        [java] ------------------------------****-----------
>>     > > > > >        [java] CONFIGURATION FILE:
>>     > > > > >        [java]
>>     > > > > >        [java]         /opt/src/River_Fixed/****
>> peterConcurrentPolicy/qa/src/****
>>     > > > > > com/sun/jini/test/resources/****qaHarness.prop
>>     > > > > >        [java]
>>     > > > > >        [java] ------------------------------****-----------
>>     > > > > >        [java] SETTING UP THE TEST LIST:
>>     > > > > >        [java]
>>     > > > > >        [java]      Adding test:
>>     > > > > > com/sun/jini/test/spec/****javaspace/conformance/
>>     > > > > > **snapshot/****SnapshotExpirationNotifyTest.****td
>>     > > > > >        [java]
>>     > > > > >        [java] ------------------------------****-----------
>>     > > > > >        [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>>     > > > > >        [java]
>>     > > > > >        [java]      Date started:
>>     > > > > >        [java]            Sun Mar 31 22:39:37 EST 2013
>>     > > > > >        [java]      Installation directory of the JSK:
>>     > > > > >        [java]               com.sun.jini.jsk.home=/opt/****
>> src/River_Fixed/**
>>     > > > > > peterConcurrentPolicy
>>     > > > > >        [java]      Installation directory of the harness:
>>     > > > > >        [java]               com.sun.jini.qa.home=/opt/src/*
>> ***River_Fixed/**
>>     > > > > > peterConcurrentPolicy/qa
>>     > > > > >        [java]      Categories being tested:
>>     > > > > >        [java]            categories=No Categories
>>     > > > > >        [java] ------------------------------****-----------
>>     > > > > >        [java] ENVIRONMENT PROPERTIES:
>>     > > > > >        [java]
>>     > > > > >        [java]      JVM information:
>>     > > > > >        [java]            Java HotSpot(TM) Server VM,
>>     20.5-b03, 32 bit VM mode
>>     > > > > >        [java]            Sun Microsystems Inc.
>>     > > > > >        [java]      OS information:
>>     > > > > >        [java]            SunOS, 5.10, sparc
>>     > > > > >        [java]
>>     > > > > >        [java] ------------------------------****-----------
>>     > > > > >        [java] STARTING TO RUN THE TESTS
>>     > > > > >        [java]
>>     > > > > >        [java]
>>     > > > > >        [java] Running
>>     com/sun/jini/test/spec/****javaspace/conformance/**
>>     > > > > > snapshot/****SnapshotExpirationNotifyTest.****td
>>     > > > > >        [java] Time is Sun Mar 31 22:39:38 EST 2013
>>     > > > > >        [java] Starting test in separate process with
>>     command:
>>     > > > > >        [java] /usr/jdk/jdk1.6.0_30/jre/bin/****java
>>     > > > > >
>>     > > > > >
>>     -Djava.security.manager=org.****apache.river.api.security.****
>> CombinerSecur
>>     > > > > > ityManager
>>     > > > > >
>>     > > > > > -Djava.security.policy=file:/****opt/src/River_Fixed/**
>>     > > > > >
>>     peterConcurrentPolicy/qa/****harness/policy/defaulttest.****policy
>>     > > > > >
>>     > > > > >
>>     -Djava.rmi.server.codebase=**htt**p://bluto:9082/qa1-**
>> javaspace-**dl.jar<h
>>     > > > > > ttp://bluto:9082/qa1-**javaspace-dl.jar>-cp
>>     > > > > > /opt/src/River_Fixed/
>>     > > > > >
>>     > > > > >
>>     **peterConcurrentPolicy/qa/**lib/**jiniharness.jar:/opt/**
>> src/**River_Fixed
>>     > > > > > /**
>>     > > > > >
>>     > > > > >
>>     peterConcurrentPolicy/qa/lib/****jinitests.jar:/opt/src/River_****
>>     > > > > >
>>     > > > > >
>>     Fixed/peterConcurrentPolicy/****lib/jsk-platform.jar:/opt/src/**
>> **River_Fix
>>     > > > > > ed/
>>     > > > > >
>>     > > > > >
>>     **peterConcurrentPolicy/lib/**jsk-**lib.jar:/opt/src/River_**Fixed/**
>>     > > > > >
>>     > > > > >
>>     peterConcurrentPolicy/lib/****high-scale-lib.jar:/opt/src/****
>> River_Fixed/*
>>     > > > > > *
>>     > > > > >
>>     > > > > > peterConcurrentPolicy/lib/****custard-apple-1.0.2.jar -ea
>>     -esa -client
>>     > > > > >
>>     -Djava.ext.dirs=/usr/jdk/jdk1.****6.0_30/jre/lib/ext:/usr/jdk/****
>>     > > > > >
>>     > > > > >
>>     packages/lib/ext:/opt/src/****River_Fixed/****
>> peterConcurrentPolicy/qa/lib-
>>     > > > > > **
>>     > > > > >
>>     > > > > > ext:/opt/src/River_Fixed/****peterConcurrentPolicy/lib-ext
>>     > > > > > -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>>     > > > > >
>>     -Dcom.sun.jini.jsk.home=/opt/****src/River_Fixed/****
>> peterConcurrentPolicy
>>     > > > > >
>>     > > > > >
>>     -Dcom.sun.jini.qa.home=/opt/****src/River_Fixed/****
>> peterConcurrentPolicy/q
>>     > > > > > a
>>     > > > > >
>>     > > > > > -Dcom.sun.jini.qa.harness.****harnessJar=/opt/src/River_**
>>     > > > > > Fixed/peterConcurrentPolicy/****qa/lib/jiniharness.jar
>>     > > > > > -Dcom.sun.jini.qa.harness.****
>> testJar=/opt/src/River_Fixed/****
>>     > > > > > peterConcurrentPolicy/qa/lib/****jinitests.jar
>>     > > > > > -Dcom.sun.jini.qa.harness.****runjiniserver=true
>>     > > > > > -Dcom.sun.jini.qa.harness.****runkitserver=true
>>     > > > > > -Djava.security.properties=*
>>     > > > > > *file:/opt/src/River_Fixed/****peterConcurrentPolicy/qa/**
>>     > > > > > harness/trust/dynamic-policy.****properties
>>     > > > > > -Dcom.sun.jini.qa.harness.****testhosts=
>>     > > > > >
>>     -Djava.util.logging.config.****file=/home/peter/logging.****
>> properties
>>     > > > > >
>>     > > > > >
>>     -Dcom.sun.jini.test.home=/opt/****src/River_Fixed/****
>> peterConcurrentPolicy
>>     > > > > > /qa
>>     > > > > >
>>     > > > > > -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>>     > > > > >
>>     policies=file:/opt/src/River_****Fixed/peterConcurrentPolicy/****
>>     > > > > > qa/src/com/sun/jini/test/****resources/jinitest.policy
>>     > > > > >
>>     -Djava.ext.dirs=/usr/jdk/jdk1.****6.0_30/jre/lib/ext:/usr/jdk/****
>>     > > > > >
>>     > > > > >
>>     packages/lib/ext:/opt/src/****River_Fixed/****
>> peterConcurrentPolicy/qa/lib-
>>     > > > > > **
>>     > > > > >
>>     > > > > > ext:/opt/src/River_Fixed/****peterConcurrentPolicy/lib-ext
>>     > > > > > com.sun.jini.qa.harness.****MasterTest
>>     com/sun/jini/test/spec/**
>>     > > > > >
>>     javaspace/conformance/****snapshot/****SnapshotExpirationNotifyTest.*
>> ***td
>>     > > > > >        [java] com.sun.jini.qa.harness.****TestException:
>>     Not all
>>     > > > > > listeners've
>>     > > > > > got expected number of events.
>>     > > > > >        [java]        at
>>     com.sun.jini.test.spec.****javaspace.conformance.**
>>     > > > > > snapshot.****SnapshotExpirationNotifyTest.****run(**
>>     > > > > > SnapshotExpirationNotifyTest.****java:370)
>>     > > > > >        [java]        at
>>     > > > > > com.sun.jini.qa.harness.****MasterTest.doTest(MasterTest.*
>>     > > > > > *java:256)
>>     > > > > >        [java]        at
>>     > > > > > com.sun.jini.qa.harness.****MasterTest.main(MasterTest.**
>>     > > > > > java:144)
>>     > > > > >        [java]
>>     > > > > >        [java] TIME: 10:42:54 PM
>>     > > > > >        [java]
>>     > > > > >        [java] Test process was destroyed and returned code 1
>>     > > > > >        [java]
>>     > > > > > com/sun/jini/test/spec/****javaspace/conformance/****
>> snapshot/**
>>     > > > > > SnapshotExpirationNotifyTest.****td
>>     > > > > >        [java] Test Failed: Test Failed:
>>     > > > > > com.sun.jini.qa.harness.****TestException:
>>     > > > > > Not all listeners've got expected number of events.
>>     > > > > >        [java]
>>     > > > > >        [java]
>>     > > > > >        [java] ------------------------------****-----------
>>     > > > > >        [java]
>>     > > > > >        [java] SUMMARY ==============================****===
>>     > > > > >        [java]
>>     > > > > >        [java]
>>     > > > > > com/sun/jini/test/spec/****javaspace/conformance/****
>> snapshot/**
>>     > > > > > SnapshotExpirationNotifyTest.****td
>>     > > > > >        [java] Test Failed: Test Failed:
>>     > > > > > com.sun.jini.qa.harness.****TestException:
>>     > > > > > Not all listeners've got expected number of events.
>>     > > > > >        [java]
>>     > > > > >        [java] ------------------------------****-----------
>>     > > > > >        [java]
>>     > > > > >        [java] # of tests started    = 1
>>     > > > > >        [java] # of tests completed = 1
>>     > > > > >        [java] # of tests passed      = 0
>>     > > > > >        [java] # of tests failed      = 1
>>     > > > > >        [java]
>>     > > > > >        [java] ------------------------------****-----------
>>     > > > > >        [java]
>>     > > > > >        [java]      Date finished:
>>     > > > > >        [java]            Sun Mar 31 22:42:59 EST 2013
>>     > > > > >        [java]      Time elapsed:
>>     > > > > >        [java]            201 seconds
>>     > > > > >        [java]
>>     > > > > >        [java] Java Result: 1
>>     > > > > >
>>     > > > > > collect-result:
>>     > > > > >
>>     > > > > > BUILD FAILED
>>     > > > > >
>>     /opt/src/River_Fixed/****peterConcurrentPolicy/build.****xml:2105:
>> The
>>     > > > > > following error occurred while executing this line:
>>     > > > > >
>>     /opt/src/River_Fixed/****peterConcurrentPolicy/qa/****build.xml:357:
>>     > > > > > condition satisfied
>>     > > > > >
>>     > > > > > Total time: 3 minutes 30 seconds
>>     > > > > >
>>     > > > > >
>>     > > > > >
>>     > > > >
>>     > > > >
>>     > > >
>>     > > >
>>     > >
>>     >
>>
>>
>>
>

Re: test failure repeatability

Posted by Peter Firmstone <ji...@zeus.net.au>.
I understand, better the devil you know argument, if someone wants to 
make a 2.2.1 release, I'm certainly not against it.

It won't take me long to track down & fix the concurrency bug, if that's 
the cause. 

I tend to agree with Greg's assessment that the tests are contrived.  
The tests make a poor attempt at determining whether the leased resource 
is still available by checking within a time window around lease expiry, 
it's subject to so many assumptions that I can't see why that 
determination is even made. 

I don't think we can guarantee a lease will expire during a specific 
time window, most programmers will hopefully renew a lease long before 
it's due to expire.  Until we make that assessment, I think we'll 
struggle with the tests.

Remember were now dealing with concurrency over network connections.  
The so called failures may just be due to timing.

The network is asynchronous.

The old code appears to slow down network connections enough to make 
them less asynchronous, heck even really bad concurrency race conditions 
and obvious bugs didn't manifest during testing.

Example of how old code makes network less asynchronous:

   1. Thread runs, opens network connection, security check is performed.
   2. Second thread runs, opens network connection, security check is
      performed, first thread is synchronized on a DNS check, the second
      thread waits.
   3. Second thread will complete after first.

New code:

   1. Thread runs, opens network connection, security check is performed
      and returns without contention.
   2. Second thread runs opens network connection, security check is
      performed and returns without contention.
   3. Second thread by chance finishes communication prior to first.

If we look at ClassLoader, the story is similar.

Regards,

Peter.

Dan Creswell wrote:
> I feel Bryan has a point.
>
> Putting out a release with just the Levels fix is better than the 
> current release which won't run at all under various circumstances. 
> i.e. It is a small step forward and in no way a backward step.
>
> Such a release mightn't be as much of a step forward as one also 
> featuring the concurrency work *but* the concurrency work is also 
> seemingly amplifying the occurrence of preexisting concurrency 
> problems. It can be said then to perform both better and worse than 
> previous releases.
>
> In essence, the small fix release is a known quantity, the new one 
> less so. We maybe ought to give users the choice as to how much 
> unknown'ness they wish to indulge in.
>
>
>
>
> On 1 April 2013 13:10, Peter <jini@zeus.net.au 
> <ma...@zeus.net.au>> wrote:
>
>     The big problem is there are less concurrency bugs in the present
>     branch and there are definitely more in the previous release, they
>     just haven't presented in test in previous releases, although they
>     have in deployment.
>
>     The issue appears to be with javaspaces / lease / transactions.
>
>     It's also a possibility that there's just a timing issue with the
>     tests.
>
>     I'd like to fix the tests before releasing, it's unfortunate the
>     last fix to jeri seemed to expose these new test failures.  It's a
>     little like fixing rust in an old car, the more you remove, the
>     more you find.
>
>     What I really need is more help, I don't work on this full time it
>     takes longer than a team woking on commercial software.
>
>     Peter.
>     ----- Original message -----
>     > I am uncomfortable with a release that has known concurrency
>     problems.  I
>     > am also uncomfortable that the custom Levels serialization change by
>     > Oracle [1] has broken the current release for new JVMs.  I would
>     like to
>     > see a minor release which fixes that serialization problem and a
>     candidate
>     > release which gives people a chance to discovery concurrent
>     issues without
>     > risking a release that is known to be unstable.
>     >
>     > I'm happy to review a few of the classes with known concurrency
>     problems
>     > to see if I can help nail some of these bugs.  Since I do not
>     know the
>     > river internals, I would only be able to spot concurrency
>     problems that
>     > exist within a class.  I am not in a good position to comment on
>     > concurrency problems that might arise through the interactions among
>     > classes.
>     >
>     > Bryan
>     >
>     > [1] https://issues.apache.org/jira/browse/RIVER-416
>     >
>     > On 4/1/13 6:14 AM, "Peter Firmstone" <jini@zeus.net.au
>     <ma...@zeus.net.au>> wrote:
>     >
>     > > The attachments will be removed from the list, so I've cc'd
>     you, anyone
>     > > who's interested, let me know I can forward the attachments. 
>     They can
>     > > be opened with jvisualvm.
>     > >
>     > > The profiling isn't perfect, the test runs for about 8.5
>     minutes, so
>     > > hotspot should have kicked in relatively early in both test runs.
>     > >
>     > > I guess a significant problem is; the more I remove performance
>     > > impedances, like unnecessary DNS calls, the faster
>     multithreading and
>     > > context switching gets.
>     > >
>     > > Not only did the old policy providers create contention, but
>     it was
>     > > slower for single threaded performance (I'll have to run the
>     previous
>     > > release branch for comparison when I get some time).
>     > >
>     > > The URIGrant.implies call is now down to .228 ms per
>     invocation, down
>     > > from 1.68 ms per invocation this week, which was already quite
>     good (on
>     > > old UltraSparcII hardware), during stress tests this method is
>     called
>     > > almost 40,000 times.
>     > >
>     > > In comparison the old policy provider which required a DNS
>     call (every
>     > > time CodeSource.implies is called, functionality now replaced by
>     > > URIGrant.implies), the old policy provider also cached all
>     Permission's
>     > > in highly contended PermissionCollection's, which during
>     network calls
>     > > invoked SocketPermission.implies, possibly for every
>     SocketPermission in
>     > > the PermissionCollection, DNS is also consulted by
>     > > SocketPermission.implies, while synchronized, ouch!
>     > >
>     > > URIGrant.implies is non blocking, that's right zero contention.
>     > > SocketPermission.implies DNS calls can be avoided in most cases if
>     > > PermissionComparator finds an exact match or wild card.
>     > >
>     > > This is without the CombinerSecurityManager, which improves
>     security
>     > > performance by a factor of 10 (SocketPermission is only
>     checked once for
>     > > each AccessControlContext).
>     > >
>     > > PreferredClassProvider no longer creates unecessary DNS calls, and
>     > > neither does SecureClassLoader,  URLClassLoader or
>     PreferredClassLoader.
>     > >
>     > > So the good news is the next release will feel much faster,
>     the bad news
>     > > is that existing concurrency bugs that previously didn't
>     appear during
>     > > test runs, but likely to manifest during production are now
>     occurring
>     > > during testing.  There's more good news, we fixed a number of
>     > > concurrency bugs since the last release too, I guess I have to
>     draw the
>     > > line somewhere and cut a release.
>     > >
>     > > DNS calls haven't been completely eliminated as some are still
>     > > necessary, but a multitude of unnecessary DNS calls have been
>     eliminated.
>     > >
>     > > Regards,
>     > >
>     > > Peter.
>     > >
>     > > Peter Firmstone wrote:
>     > > > They've passed more consistently in the past, they're either
>     > > > concurrency bugs or network timing related, it would be nice
>     to at
>     > > > least determine if it's the former or latter.
>     > > >
>     > > > Cheers,
>     > > >
>     > > > Peter.
>     > > >
>     > > > Tom Hobbs wrote:
>     > > > > Are these all new failures, i.e. were they working
>     before?  Or are they
>     > > > > "new" failures in that the test categories have only
>     recently been
>     > > > > reactivated and the failures discovered?
>     > > > >
>     > > > > Are they a big enough blocker to stop a release?
>     > > > >
>     > > > >
>     > > > > On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone
>     <jini@zeus.net.au <ma...@zeus.net.au>>
>     > > > > wrote:
>     > > > >
>     > > > >
>     > > > > > The following test fails 30 times in a run of 130 tests:
>     > > > > >
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > > [java]  #            of          tests    passed  =     
>           0
>     > > > > > [java]  #            of          tests    passed  =     
>           1
>     > > > > >
>     > > > > >
>     > > > > >
>     > > > > >
>     > > > > >
>     > > > > >
>     > > > > >                100
>     > > > > >
>     > > > > >
>     > > > > >
>     > > > > >
>     > > > > > Buildfile: build.xml
>     > > > > >
>     > > > > > qa.run-tests:
>     > > > > >
>     > > > > > james-brown:
>     > > > > >    [delete] Deleting directory /opt/src/River_Fixed/**
>     > > > > > peterConcurrentPolicy/qa/soul
>     > > > > >      [mkdir] Created dir: /opt/src/River_Fixed/**
>     > > > > > peterConcurrentPolicy/qa/soul
>     > > > > >      [touch] Creating
>     > > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
>     > > > > > *soul.201303312239034808
>     > > > > >
>     > > > > > run-tests:
>     > > > > >        [java]
>     > > > > >        [java] ------------------------------**-----------
>     > > > > >        [java] CONFIGURATION FILE:
>     > > > > >        [java]
>     > > > > >        [java]     
>     /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
>     > > > > > com/sun/jini/test/resources/**qaHarness.prop
>     > > > > >        [java]
>     > > > > >        [java] ------------------------------**-----------
>     > > > > >        [java] SETTING UP THE TEST LIST:
>     > > > > >        [java]
>     > > > > >        [java]      Adding test:
>     > > > > > com/sun/jini/test/spec/**javaspace/conformance/
>     > > > > > **snapshot/**SnapshotExpirationNotifyTest.**td
>     > > > > >        [java]
>     > > > > >        [java] ------------------------------**-----------
>     > > > > >        [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>     > > > > >        [java]
>     > > > > >        [java]      Date started:
>     > > > > >        [java]            Sun Mar 31 22:39:37 EST 2013
>     > > > > >        [java]      Installation directory of the JSK:
>     > > > > >        [java]           
>     com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
>     > > > > > peterConcurrentPolicy
>     > > > > >        [java]      Installation directory of the harness:
>     > > > > >        [java]           
>     com.sun.jini.qa.home=/opt/src/**River_Fixed/**
>     > > > > > peterConcurrentPolicy/qa
>     > > > > >        [java]      Categories being tested:
>     > > > > >        [java]            categories=No Categories
>     > > > > >        [java] ------------------------------**-----------
>     > > > > >        [java] ENVIRONMENT PROPERTIES:
>     > > > > >        [java]
>     > > > > >        [java]      JVM information:
>     > > > > >        [java]            Java HotSpot(TM) Server VM,
>     20.5-b03, 32 bit VM mode
>     > > > > >        [java]            Sun Microsystems Inc.
>     > > > > >        [java]      OS information:
>     > > > > >        [java]            SunOS, 5.10, sparc
>     > > > > >        [java]
>     > > > > >        [java] ------------------------------**-----------
>     > > > > >        [java] STARTING TO RUN THE TESTS
>     > > > > >        [java]
>     > > > > >        [java]
>     > > > > >        [java] Running
>     com/sun/jini/test/spec/**javaspace/conformance/**
>     > > > > > snapshot/**SnapshotExpirationNotifyTest.**td
>     > > > > >        [java] Time is Sun Mar 31 22:39:38 EST 2013
>     > > > > >        [java] Starting test in separate process with
>     command:
>     > > > > >        [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
>     > > > > >
>     > > > > >
>     -Djava.security.manager=org.**apache.river.api.security.**CombinerSecur
>     > > > > > ityManager
>     > > > > >
>     > > > > > -Djava.security.policy=file:/**opt/src/River_Fixed/**
>     > > > > >
>     peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
>     > > > > >
>     > > > > >
>     -Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<h
>     > > > > > ttp://bluto:9082/qa1-javaspace-dl.jar>-cp
>     > > > > > /opt/src/River_Fixed/
>     > > > > >
>     > > > > >
>     **peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed
>     > > > > > /**
>     > > > > >
>     > > > > >
>     peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
>     > > > > >
>     > > > > >
>     Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fix
>     > > > > > ed/
>     > > > > >
>     > > > > >
>     **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
>     > > > > >
>     > > > > >
>     peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/*
>     > > > > > *
>     > > > > >
>     > > > > > peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea
>     -esa -client
>     > > > > >
>     -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>     > > > > >
>     > > > > >
>     packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
>     > > > > > **
>     > > > > >
>     > > > > > ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>     > > > > > -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>     > > > > >
>     -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>     > > > > >
>     > > > > >
>     -Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/q
>     > > > > > a
>     > > > > >
>     > > > > > -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
>     > > > > > Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
>     > > > > > -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
>     > > > > > peterConcurrentPolicy/qa/lib/**jinitests.jar
>     > > > > > -Dcom.sun.jini.qa.harness.**runjiniserver=true
>     > > > > > -Dcom.sun.jini.qa.harness.**runkitserver=true
>     > > > > > -Djava.security.properties=*
>     > > > > > *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
>     > > > > > harness/trust/dynamic-policy.**properties
>     > > > > > -Dcom.sun.jini.qa.harness.**testhosts=
>     > > > > >
>     -Djava.util.logging.config.**file=/home/peter/logging.**properties
>     > > > > >
>     > > > > >
>     -Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>     > > > > > /qa
>     > > > > >
>     > > > > > -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>     > > > > >
>     policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
>     > > > > > qa/src/com/sun/jini/test/**resources/jinitest.policy
>     > > > > >
>     -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>     > > > > >
>     > > > > >
>     packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
>     > > > > > **
>     > > > > >
>     > > > > > ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>     > > > > > com.sun.jini.qa.harness.**MasterTest
>     com/sun/jini/test/spec/**
>     > > > > >
>     javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
>     > > > > >        [java] com.sun.jini.qa.harness.**TestException:
>     Not all
>     > > > > > listeners've
>     > > > > > got expected number of events.
>     > > > > >        [java]        at
>     com.sun.jini.test.spec.**javaspace.conformance.**
>     > > > > > snapshot.**SnapshotExpirationNotifyTest.**run(**
>     > > > > > SnapshotExpirationNotifyTest.**java:370)
>     > > > > >        [java]        at
>     > > > > > com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
>     > > > > > *java:256)
>     > > > > >        [java]        at
>     > > > > > com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
>     > > > > > java:144)
>     > > > > >        [java]
>     > > > > >        [java] TIME: 10:42:54 PM
>     > > > > >        [java]
>     > > > > >        [java] Test process was destroyed and returned code 1
>     > > > > >        [java]
>     > > > > > com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>     > > > > > SnapshotExpirationNotifyTest.**td
>     > > > > >        [java] Test Failed: Test Failed:
>     > > > > > com.sun.jini.qa.harness.**TestException:
>     > > > > > Not all listeners've got expected number of events.
>     > > > > >        [java]
>     > > > > >        [java]
>     > > > > >        [java] ------------------------------**-----------
>     > > > > >        [java]
>     > > > > >        [java] SUMMARY ==============================**===
>     > > > > >        [java]
>     > > > > >        [java]
>     > > > > > com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>     > > > > > SnapshotExpirationNotifyTest.**td
>     > > > > >        [java] Test Failed: Test Failed:
>     > > > > > com.sun.jini.qa.harness.**TestException:
>     > > > > > Not all listeners've got expected number of events.
>     > > > > >        [java]
>     > > > > >        [java] ------------------------------**-----------
>     > > > > >        [java]
>     > > > > >        [java] # of tests started    = 1
>     > > > > >        [java] # of tests completed = 1
>     > > > > >        [java] # of tests passed      = 0
>     > > > > >        [java] # of tests failed      = 1
>     > > > > >        [java]
>     > > > > >        [java] ------------------------------**-----------
>     > > > > >        [java]
>     > > > > >        [java]      Date finished:
>     > > > > >        [java]            Sun Mar 31 22:42:59 EST 2013
>     > > > > >        [java]      Time elapsed:
>     > > > > >        [java]            201 seconds
>     > > > > >        [java]
>     > > > > >        [java] Java Result: 1
>     > > > > >
>     > > > > > collect-result:
>     > > > > >
>     > > > > > BUILD FAILED
>     > > > > >
>     /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105: The
>     > > > > > following error occurred while executing this line:
>     > > > > >
>     /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
>     > > > > > condition satisfied
>     > > > > >
>     > > > > > Total time: 3 minutes 30 seconds
>     > > > > >
>     > > > > >
>     > > > > >
>     > > > >
>     > > > >
>     > > >
>     > > >
>     > >
>     >
>
>


Re: test failure repeatability

Posted by Dan Creswell <da...@gmail.com>.
I feel Bryan has a point.

Putting out a release with just the Levels fix is better than the current
release which won't run at all under various circumstances. i.e. It is a
small step forward and in no way a backward step.

Such a release mightn't be as much of a step forward as one also featuring
the concurrency work *but* the concurrency work is also seemingly
amplifying the occurrence of preexisting concurrency problems. It can be
said then to perform both better and worse than previous releases.

In essence, the small fix release is a known quantity, the new one less so.
We maybe ought to give users the choice as to how much unknown'ness they
wish to indulge in.




On 1 April 2013 13:10, Peter <ji...@zeus.net.au> wrote:

> The big problem is there are less concurrency bugs in the present branch
> and there are definitely more in the previous release, they just haven't
> presented in test in previous releases, although they have in deployment.
>
> The issue appears to be with javaspaces / lease / transactions.
>
> It's also a possibility that there's just a timing issue with the tests.
>
> I'd like to fix the tests before releasing, it's unfortunate the last fix
> to jeri seemed to expose these new test failures.  It's a little like
> fixing rust in an old car, the more you remove, the more you find.
>
> What I really need is more help, I don't work on this full time it takes
> longer than a team woking on commercial software.
>
> Peter.
> ----- Original message -----
> > I am uncomfortable with a release that has known concurrency problems.  I
> > am also uncomfortable that the custom Levels serialization change by
> > Oracle [1] has broken the current release for new JVMs.  I would like to
> > see a minor release which fixes that serialization problem and a
> candidate
> > release which gives people a chance to discovery concurrent issues
> without
> > risking a release that is known to be unstable.
> >
> > I'm happy to review a few of the classes with known concurrency problems
> > to see if I can help nail some of these bugs.  Since I do not know the
> > river internals, I would only be able to spot concurrency problems that
> > exist within a class.  I am not in a good position to comment on
> > concurrency problems that might arise through the interactions among
> > classes.
> >
> > Bryan
> >
> > [1] https://issues.apache.org/jira/browse/RIVER-416
> >
> > On 4/1/13 6:14 AM, "Peter Firmstone" <ji...@zeus.net.au> wrote:
> >
> > > The attachments will be removed from the list, so I've cc'd you, anyone
> > > who's interested, let me know I can forward the attachments.  They can
> > > be opened with jvisualvm.
> > >
> > > The profiling isn't perfect, the test runs for about 8.5 minutes, so
> > > hotspot should have kicked in relatively early in both test runs.
> > >
> > > I guess a significant problem is; the more I remove performance
> > > impedances, like unnecessary DNS calls, the faster multithreading and
> > > context switching gets.
> > >
> > > Not only did the old policy providers create contention, but it was
> > > slower for single threaded performance (I'll have to run the previous
> > > release branch for comparison when I get some time).
> > >
> > > The URIGrant.implies call is now down to .228 ms per invocation, down
> > > from 1.68 ms per invocation this week, which was already quite good (on
> > > old UltraSparcII hardware), during stress tests this method is called
> > > almost 40,000 times.
> > >
> > > In comparison the old policy provider which required a DNS call (every
> > > time CodeSource.implies is called, functionality now replaced by
> > > URIGrant.implies), the old policy provider also cached all Permission's
> > > in highly contended PermissionCollection's, which during network calls
> > > invoked SocketPermission.implies, possibly for every SocketPermission
> in
> > > the PermissionCollection, DNS is also consulted by
> > > SocketPermission.implies, while synchronized, ouch!
> > >
> > > URIGrant.implies is non blocking, that's right zero contention.
> > > SocketPermission.implies DNS calls can be avoided in most cases if
> > > PermissionComparator finds an exact match or wild card.
> > >
> > > This is without the CombinerSecurityManager, which improves security
> > > performance by a factor of 10 (SocketPermission is only checked once
> for
> > > each AccessControlContext).
> > >
> > > PreferredClassProvider no longer creates unecessary DNS calls, and
> > > neither does SecureClassLoader,  URLClassLoader or
> PreferredClassLoader.
> > >
> > > So the good news is the next release will feel much faster, the bad
> news
> > > is that existing concurrency bugs that previously didn't appear during
> > > test runs, but likely to manifest during production are now occurring
> > > during testing.  There's more good news, we fixed a number of
> > > concurrency bugs since the last release too, I guess I have to draw the
> > > line somewhere and cut a release.
> > >
> > > DNS calls haven't been completely eliminated as some are still
> > > necessary, but a multitude of unnecessary DNS calls have been
> eliminated.
> > >
> > > Regards,
> > >
> > > Peter.
> > >
> > > Peter Firmstone wrote:
> > > > They've passed more consistently in the past, they're either
> > > > concurrency bugs or network timing related, it would be nice to at
> > > > least determine if it's the former or latter.
> > > >
> > > > Cheers,
> > > >
> > > > Peter.
> > > >
> > > > Tom Hobbs wrote:
> > > > > Are these all new failures, i.e. were they working before?  Or are
> they
> > > > > "new" failures in that the test categories have only recently been
> > > > > reactivated and the failures discovered?
> > > > >
> > > > > Are they a big enough blocker to stop a release?
> > > > >
> > > > >
> > > > > On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <
> jini@zeus.net.au>
> > > > > wrote:
> > > > >
> > > > >
> > > > > > The following test fails 30 times in a run of 130 tests:
> > > > > >
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > > [java]  #            of          tests    passed  =            0
> > > > > > [java]  #            of          tests    passed  =            1
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > >                100
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > Buildfile: build.xml
> > > > > >
> > > > > > qa.run-tests:
> > > > > >
> > > > > > james-brown:
> > > > > >    [delete] Deleting directory /opt/src/River_Fixed/**
> > > > > > peterConcurrentPolicy/qa/soul
> > > > > >      [mkdir] Created dir: /opt/src/River_Fixed/**
> > > > > > peterConcurrentPolicy/qa/soul
> > > > > >      [touch] Creating
> > > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
> > > > > > *soul.201303312239034808
> > > > > >
> > > > > > run-tests:
> > > > > >        [java]
> > > > > >        [java] ------------------------------**-----------
> > > > > >        [java] CONFIGURATION FILE:
> > > > > >        [java]
> > > > > >        [java]
> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
> > > > > > com/sun/jini/test/resources/**qaHarness.prop
> > > > > >        [java]
> > > > > >        [java] ------------------------------**-----------
> > > > > >        [java] SETTING UP THE TEST LIST:
> > > > > >        [java]
> > > > > >        [java]      Adding test:
> > > > > > com/sun/jini/test/spec/**javaspace/conformance/
> > > > > > **snapshot/**SnapshotExpirationNotifyTest.**td
> > > > > >        [java]
> > > > > >        [java] ------------------------------**-----------
> > > > > >        [java] GENERAL HARNESS CONFIGURATION INFORMATION:
> > > > > >        [java]
> > > > > >        [java]      Date started:
> > > > > >        [java]            Sun Mar 31 22:39:37 EST 2013
> > > > > >        [java]      Installation directory of the JSK:
> > > > > >        [java]
> com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
> > > > > > peterConcurrentPolicy
> > > > > >        [java]      Installation directory of the harness:
> > > > > >        [java]
> com.sun.jini.qa.home=/opt/src/**River_Fixed/**
> > > > > > peterConcurrentPolicy/qa
> > > > > >        [java]      Categories being tested:
> > > > > >        [java]            categories=No Categories
> > > > > >        [java] ------------------------------**-----------
> > > > > >        [java] ENVIRONMENT PROPERTIES:
> > > > > >        [java]
> > > > > >        [java]      JVM information:
> > > > > >        [java]            Java HotSpot(TM) Server VM, 20.5-b03,
> 32 bit VM mode
> > > > > >        [java]            Sun Microsystems Inc.
> > > > > >        [java]      OS information:
> > > > > >        [java]            SunOS, 5.10, sparc
> > > > > >        [java]
> > > > > >        [java] ------------------------------**-----------
> > > > > >        [java] STARTING TO RUN THE TESTS
> > > > > >        [java]
> > > > > >        [java]
> > > > > >        [java] Running
> com/sun/jini/test/spec/**javaspace/conformance/**
> > > > > > snapshot/**SnapshotExpirationNotifyTest.**td
> > > > > >        [java] Time is Sun Mar 31 22:39:38 EST 2013
> > > > > >        [java] Starting test in separate process with command:
> > > > > >        [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
> > > > > >
> > > > > >
> -Djava.security.manager=org.**apache.river.api.security.**CombinerSecur
> > > > > > ityManager
> > > > > >
> > > > > > -Djava.security.policy=file:/**opt/src/River_Fixed/**
> > > > > > peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
> > > > > >
> > > > > >
> -Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<h
> > > > > > ttp://bluto:9082/qa1-javaspace-dl.jar>-cp
> > > > > > /opt/src/River_Fixed/
> > > > > >
> > > > > >
> **peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed
> > > > > > /**
> > > > > >
> > > > > > peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
> > > > > >
> > > > > >
> Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fix
> > > > > > ed/
> > > > > >
> > > > > > **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
> > > > > >
> > > > > >
> peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/*
> > > > > > *
> > > > > >
> > > > > > peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa
> -client
> > > > > > -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
> > > > > >
> > > > > >
> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
> > > > > > **
> > > > > >
> > > > > > ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
> > > > > > -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
> > > > > >
> -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
> > > > > >
> > > > > >
> -Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/q
> > > > > > a
> > > > > >
> > > > > > -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
> > > > > > Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
> > > > > > -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
> > > > > > peterConcurrentPolicy/qa/lib/**jinitests.jar
> > > > > > -Dcom.sun.jini.qa.harness.**runjiniserver=true
> > > > > > -Dcom.sun.jini.qa.harness.**runkitserver=true
> > > > > > -Djava.security.properties=*
> > > > > > *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
> > > > > > harness/trust/dynamic-policy.**properties
> > > > > > -Dcom.sun.jini.qa.harness.**testhosts=
> > > > > >
> -Djava.util.logging.config.**file=/home/peter/logging.**properties
> > > > > >
> > > > > >
> -Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
> > > > > > /qa
> > > > > >
> > > > > > -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
> > > > > > policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
> > > > > > qa/src/com/sun/jini/test/**resources/jinitest.policy
> > > > > > -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
> > > > > >
> > > > > >
> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
> > > > > > **
> > > > > >
> > > > > > ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
> > > > > > com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
> > > > > >
> javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
> > > > > >        [java] com.sun.jini.qa.harness.**TestException: Not all
> > > > > > listeners've
> > > > > > got expected number of events.
> > > > > >        [java]        at
> com.sun.jini.test.spec.**javaspace.conformance.**
> > > > > > snapshot.**SnapshotExpirationNotifyTest.**run(**
> > > > > > SnapshotExpirationNotifyTest.**java:370)
> > > > > >        [java]        at
> > > > > > com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
> > > > > > *java:256)
> > > > > >        [java]        at
> > > > > > com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
> > > > > > java:144)
> > > > > >        [java]
> > > > > >        [java] TIME: 10:42:54 PM
> > > > > >        [java]
> > > > > >        [java] Test process was destroyed and returned code 1
> > > > > >        [java]
> > > > > > com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
> > > > > > SnapshotExpirationNotifyTest.**td
> > > > > >        [java] Test Failed: Test Failed:
> > > > > > com.sun.jini.qa.harness.**TestException:
> > > > > > Not all listeners've got expected number of events.
> > > > > >        [java]
> > > > > >        [java]
> > > > > >        [java] ------------------------------**-----------
> > > > > >        [java]
> > > > > >        [java] SUMMARY ==============================**===
> > > > > >        [java]
> > > > > >        [java]
> > > > > > com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
> > > > > > SnapshotExpirationNotifyTest.**td
> > > > > >        [java] Test Failed: Test Failed:
> > > > > > com.sun.jini.qa.harness.**TestException:
> > > > > > Not all listeners've got expected number of events.
> > > > > >        [java]
> > > > > >        [java] ------------------------------**-----------
> > > > > >        [java]
> > > > > >        [java] # of tests started    = 1
> > > > > >        [java] # of tests completed = 1
> > > > > >        [java] # of tests passed      = 0
> > > > > >        [java] # of tests failed      = 1
> > > > > >        [java]
> > > > > >        [java] ------------------------------**-----------
> > > > > >        [java]
> > > > > >        [java]      Date finished:
> > > > > >        [java]            Sun Mar 31 22:42:59 EST 2013
> > > > > >        [java]      Time elapsed:
> > > > > >        [java]            201 seconds
> > > > > >        [java]
> > > > > >        [java] Java Result: 1
> > > > > >
> > > > > > collect-result:
> > > > > >
> > > > > > BUILD FAILED
> > > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105:
> The
> > > > > > following error occurred while executing this line:
> > > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
> > > > > > condition satisfied
> > > > > >
> > > > > > Total time: 3 minutes 30 seconds
> > > > > >
> > > > > >
> > > > > >
> > > > >
> > > > >
> > > >
> > > >
> > >
> >
>
>

Re: test failure repeatability

Posted by Bryan Thompson <br...@systap.com>.
Peter,

As I said, I am happy to take on a few classes and see if I can find some
problems or discuss possible concurrency failures.  I am pretty good at
recognizing concurrency problems, at least within the range of coding
styles that I am familiar with.  But I do not know the river internals.
This will of necessity limit my ability to recognize problems with respect
to the overall architecture.

Bryan

On 4/1/13 8:10 AM, "Peter" <ji...@zeus.net.au> wrote:

>The big problem is there are less concurrency bugs in the present branch
>and there are definitely more in the previous release, they just haven't
>presented in test in previous releases, although they have in deployment.
>
>The issue appears to be with javaspaces / lease / transactions.
>
>It's also a possibility that there's just a timing issue with the tests.
>
>I'd like to fix the tests before releasing, it's unfortunate the last fix
>to jeri seemed to expose these new test failures.  It's a little like
>fixing rust in an old car, the more you remove, the more you find.
>
>What I really need is more help, I don't work on this full time it takes
>longer than a team woking on commercial software.
>
>Peter.
>----- Original message -----
>> I am uncomfortable with a release that has known concurrency problems.
>>I
>> am also uncomfortable that the custom Levels serialization change by
>> Oracle [1] has broken the current release for new JVMs.  I would like to
>> see a minor release which fixes that serialization problem and a
>>candidate
>> release which gives people a chance to discovery concurrent issues
>>without
>> risking a release that is known to be unstable.
>>
>> I'm happy to review a few of the classes with known concurrency problems
>> to see if I can help nail some of these bugs.  Since I do not know the
>> river internals, I would only be able to spot concurrency problems that
>> exist within a class.  I am not in a good position to comment on
>> concurrency problems that might arise through the interactions among
>> classes.
>>
>> Bryan
>>
>> [1] https://issues.apache.org/jira/browse/RIVER-416
>>
>> On 4/1/13 6:14 AM, "Peter Firmstone" <ji...@zeus.net.au> wrote:
>>
>> > The attachments will be removed from the list, so I've cc'd you,
>>anyone
>> > who's interested, let me know I can forward the attachments.  They can
>> > be opened with jvisualvm.
>> >
>> > The profiling isn't perfect, the test runs for about 8.5 minutes, so
>> > hotspot should have kicked in relatively early in both test runs.
>> >
>> > I guess a significant problem is; the more I remove performance
>> > impedances, like unnecessary DNS calls, the faster multithreading and
>> > context switching gets.
>> >
>> > Not only did the old policy providers create contention, but it was
>> > slower for single threaded performance (I'll have to run the previous
>> > release branch for comparison when I get some time).
>> >
>> > The URIGrant.implies call is now down to .228 ms per invocation, down
>> > from 1.68 ms per invocation this week, which was already quite good
>>(on
>> > old UltraSparcII hardware), during stress tests this method is called
>> > almost 40,000 times.
>> >
>> > In comparison the old policy provider which required a DNS call (every
>> > time CodeSource.implies is called, functionality now replaced by
>> > URIGrant.implies), the old policy provider also cached all
>>Permission's
>> > in highly contended PermissionCollection's, which during network calls
>> > invoked SocketPermission.implies, possibly for every SocketPermission
>>in
>> > the PermissionCollection, DNS is also consulted by
>> > SocketPermission.implies, while synchronized, ouch!
>> >
>> > URIGrant.implies is non blocking, that's right zero contention.
>> > SocketPermission.implies DNS calls can be avoided in most cases if
>> > PermissionComparator finds an exact match or wild card.
>> >
>> > This is without the CombinerSecurityManager, which improves security
>> > performance by a factor of 10 (SocketPermission is only checked once
>>for
>> > each AccessControlContext).
>> >
>> > PreferredClassProvider no longer creates unecessary DNS calls, and
>> > neither does SecureClassLoader,  URLClassLoader or
>>PreferredClassLoader.
>> >
>> > So the good news is the next release will feel much faster, the bad
>>news
>> > is that existing concurrency bugs that previously didn't appear during
>> > test runs, but likely to manifest during production are now occurring
>> > during testing.  There's more good news, we fixed a number of
>> > concurrency bugs since the last release too, I guess I have to draw
>>the
>> > line somewhere and cut a release.
>> >
>> > DNS calls haven't been completely eliminated as some are still
>> > necessary, but a multitude of unnecessary DNS calls have been
>>eliminated.
>> >
>> > Regards,
>> >
>> > Peter.
>> >
>> > Peter Firmstone wrote:
>> > > They've passed more consistently in the past, they're either
>> > > concurrency bugs or network timing related, it would be nice to at
>> > > least determine if it's the former or latter.
>> > >
>> > > Cheers,
>> > >
>> > > Peter.
>> > >
>> > > Tom Hobbs wrote:
>> > > > Are these all new failures, i.e. were they working before?  Or
>>are they
>> > > > "new" failures in that the test categories have only recently been
>> > > > reactivated and the failures discovered?
>> > > >
>> > > > Are they a big enough blocker to stop a release?
>> > > >
>> > > >
>> > > > On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone
>><ji...@zeus.net.au>
>> > > > wrote:
>> > > >
>> > > >
>> > > > > The following test fails 30 times in a run of 130 tests:
>> > > > >
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > > [java]  #            of          tests    passed  =            0
>> > > > > [java]  #            of          tests    passed  =            1
>> > > > >
>> > > > >
>> > > > >
>> > > > >
>> > > > >
>> > > > >
>> > > > >                100
>> > > > >
>> > > > >
>> > > > >
>> > > > >
>> > > > > Buildfile: build.xml
>> > > > >
>> > > > > qa.run-tests:
>> > > > >
>> > > > > james-brown:
>> > > > >    [delete] Deleting directory /opt/src/River_Fixed/**
>> > > > > peterConcurrentPolicy/qa/soul
>> > > > >      [mkdir] Created dir: /opt/src/River_Fixed/**
>> > > > > peterConcurrentPolicy/qa/soul
>> > > > >      [touch] Creating
>> > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
>> > > > > *soul.201303312239034808
>> > > > >
>> > > > > run-tests:
>> > > > >        [java]
>> > > > >        [java] ------------------------------**-----------
>> > > > >        [java] CONFIGURATION FILE:
>> > > > >        [java]
>> > > > >        [java]
>>/opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
>> > > > > com/sun/jini/test/resources/**qaHarness.prop
>> > > > >        [java]
>> > > > >        [java] ------------------------------**-----------
>> > > > >        [java] SETTING UP THE TEST LIST:
>> > > > >        [java]
>> > > > >        [java]      Adding test:
>> > > > > com/sun/jini/test/spec/**javaspace/conformance/
>> > > > > **snapshot/**SnapshotExpirationNotifyTest.**td
>> > > > >        [java]
>> > > > >        [java] ------------------------------**-----------
>> > > > >        [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>> > > > >        [java]
>> > > > >        [java]      Date started:
>> > > > >        [java]            Sun Mar 31 22:39:37 EST 2013
>> > > > >        [java]      Installation directory of the JSK:
>> > > > >        [java]
>>com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
>> > > > > peterConcurrentPolicy
>> > > > >        [java]      Installation directory of the harness:
>> > > > >        [java]
>>com.sun.jini.qa.home=/opt/src/**River_Fixed/**
>> > > > > peterConcurrentPolicy/qa
>> > > > >        [java]      Categories being tested:
>> > > > >        [java]            categories=No Categories
>> > > > >        [java] ------------------------------**-----------
>> > > > >        [java] ENVIRONMENT PROPERTIES:
>> > > > >        [java]
>> > > > >        [java]      JVM information:
>> > > > >        [java]            Java HotSpot(TM) Server VM, 20.5-b03,
>>32 bit VM mode
>> > > > >        [java]            Sun Microsystems Inc.
>> > > > >        [java]      OS information:
>> > > > >        [java]            SunOS, 5.10, sparc
>> > > > >        [java]
>> > > > >        [java] ------------------------------**-----------
>> > > > >        [java] STARTING TO RUN THE TESTS
>> > > > >        [java]
>> > > > >        [java]
>> > > > >        [java] Running
>>com/sun/jini/test/spec/**javaspace/conformance/**
>> > > > > snapshot/**SnapshotExpirationNotifyTest.**td
>> > > > >        [java] Time is Sun Mar 31 22:39:38 EST 2013
>> > > > >        [java] Starting test in separate process with command:
>> > > > >        [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
>> > > > >
>> > > > >
>>-Djava.security.manager=org.**apache.river.api.security.**CombinerSecur
>> > > > > ityManager
>> > > > >
>> > > > > -Djava.security.policy=file:/**opt/src/River_Fixed/**
>> > > > > peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
>> > > > >
>> > > > >
>>-Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<h
>> > > > > ttp://bluto:9082/qa1-javaspace-dl.jar>-cp
>> > > > > /opt/src/River_Fixed/
>> > > > >
>> > > > >
>>**peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed
>> > > > > /**
>> > > > >
>> > > > > peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
>> > > > >
>> > > > >
>>Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fix
>> > > > > ed/
>> > > > >
>> > > > >
>>**peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
>> > > > >
>> > > > >
>>peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/*
>> > > > > *
>> > > > >
>> > > > > peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa
>>-client
>> > > > > -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>> > > > >
>> > > > >
>>packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
>> > > > > **
>> > > > >
>> > > > > ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>> > > > > -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>> > > > >
>>-Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>> > > > >
>> > > > >
>>-Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/q
>> > > > > a
>> > > > >
>> > > > > -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
>> > > > > Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
>> > > > > -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
>> > > > > peterConcurrentPolicy/qa/lib/**jinitests.jar
>> > > > > -Dcom.sun.jini.qa.harness.**runjiniserver=true
>> > > > > -Dcom.sun.jini.qa.harness.**runkitserver=true
>> > > > > -Djava.security.properties=*
>> > > > > *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
>> > > > > harness/trust/dynamic-policy.**properties
>> > > > > -Dcom.sun.jini.qa.harness.**testhosts=
>> > > > >
>>-Djava.util.logging.config.**file=/home/peter/logging.**properties
>> > > > >
>> > > > >
>>-Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>> > > > > /qa
>> > > > >
>> > > > > -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>> > > > > policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
>> > > > > qa/src/com/sun/jini/test/**resources/jinitest.policy
>> > > > > -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>> > > > >
>> > > > >
>>packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
>> > > > > **
>> > > > >
>> > > > > ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>> > > > > com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
>> > > > >
>>javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
>> > > > >        [java] com.sun.jini.qa.harness.**TestException: Not all
>> > > > > listeners've
>> > > > > got expected number of events.
>> > > > >        [java]        at
>>com.sun.jini.test.spec.**javaspace.conformance.**
>> > > > > snapshot.**SnapshotExpirationNotifyTest.**run(**
>> > > > > SnapshotExpirationNotifyTest.**java:370)
>> > > > >        [java]        at
>> > > > > com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
>> > > > > *java:256)
>> > > > >        [java]        at
>> > > > > com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
>> > > > > java:144)
>> > > > >        [java]
>> > > > >        [java] TIME: 10:42:54 PM
>> > > > >        [java]
>> > > > >        [java] Test process was destroyed and returned code 1
>> > > > >        [java]
>> > > > > com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>> > > > > SnapshotExpirationNotifyTest.**td
>> > > > >        [java] Test Failed: Test Failed:
>> > > > > com.sun.jini.qa.harness.**TestException:
>> > > > > Not all listeners've got expected number of events.
>> > > > >        [java]
>> > > > >        [java]
>> > > > >        [java] ------------------------------**-----------
>> > > > >        [java]
>> > > > >        [java] SUMMARY ==============================**===
>> > > > >        [java]
>> > > > >        [java]
>> > > > > com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>> > > > > SnapshotExpirationNotifyTest.**td
>> > > > >        [java] Test Failed: Test Failed:
>> > > > > com.sun.jini.qa.harness.**TestException:
>> > > > > Not all listeners've got expected number of events.
>> > > > >        [java]
>> > > > >        [java] ------------------------------**-----------
>> > > > >        [java]
>> > > > >        [java] # of tests started    = 1
>> > > > >        [java] # of tests completed = 1
>> > > > >        [java] # of tests passed      = 0
>> > > > >        [java] # of tests failed      = 1
>> > > > >        [java]
>> > > > >        [java] ------------------------------**-----------
>> > > > >        [java]
>> > > > >        [java]      Date finished:
>> > > > >        [java]            Sun Mar 31 22:42:59 EST 2013
>> > > > >        [java]      Time elapsed:
>> > > > >        [java]            201 seconds
>> > > > >        [java]
>> > > > >        [java] Java Result: 1
>> > > > >
>> > > > > collect-result:
>> > > > >
>> > > > > BUILD FAILED
>> > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105:
>>The
>> > > > > following error occurred while executing this line:
>> > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
>> > > > > condition satisfied
>> > > > >
>> > > > > Total time: 3 minutes 30 seconds
>> > > > >
>> > > > >
>> > > > >
>> > > >
>> > > >
>> > >
>> > >
>> >
>>
>


Re: test failure repeatability

Posted by Peter <ji...@zeus.net.au>.
The big problem is there are less concurrency bugs in the present branch and there are definitely more in the previous release, they just haven't presented in test in previous releases, although they have in deployment.

The issue appears to be with javaspaces / lease / transactions.

It's also a possibility that there's just a timing issue with the tests.

I'd like to fix the tests before releasing, it's unfortunate the last fix to jeri seemed to expose these new test failures.  It's a little like fixing rust in an old car, the more you remove, the more you find.

What I really need is more help, I don't work on this full time it takes longer than a team woking on commercial software.

Peter.
----- Original message -----
> I am uncomfortable with a release that has known concurrency problems.  I
> am also uncomfortable that the custom Levels serialization change by
> Oracle [1] has broken the current release for new JVMs.  I would like to
> see a minor release which fixes that serialization problem and a candidate
> release which gives people a chance to discovery concurrent issues without
> risking a release that is known to be unstable.
>
> I'm happy to review a few of the classes with known concurrency problems
> to see if I can help nail some of these bugs.  Since I do not know the
> river internals, I would only be able to spot concurrency problems that
> exist within a class.  I am not in a good position to comment on
> concurrency problems that might arise through the interactions among
> classes.
>
> Bryan
>
> [1] https://issues.apache.org/jira/browse/RIVER-416
>
> On 4/1/13 6:14 AM, "Peter Firmstone" <ji...@zeus.net.au> wrote:
>
> > The attachments will be removed from the list, so I've cc'd you, anyone
> > who's interested, let me know I can forward the attachments.  They can
> > be opened with jvisualvm.
> >
> > The profiling isn't perfect, the test runs for about 8.5 minutes, so
> > hotspot should have kicked in relatively early in both test runs.
> >
> > I guess a significant problem is; the more I remove performance
> > impedances, like unnecessary DNS calls, the faster multithreading and
> > context switching gets.
> >
> > Not only did the old policy providers create contention, but it was
> > slower for single threaded performance (I'll have to run the previous
> > release branch for comparison when I get some time).
> >
> > The URIGrant.implies call is now down to .228 ms per invocation, down
> > from 1.68 ms per invocation this week, which was already quite good (on
> > old UltraSparcII hardware), during stress tests this method is called
> > almost 40,000 times.
> >
> > In comparison the old policy provider which required a DNS call (every
> > time CodeSource.implies is called, functionality now replaced by
> > URIGrant.implies), the old policy provider also cached all Permission's
> > in highly contended PermissionCollection's, which during network calls
> > invoked SocketPermission.implies, possibly for every SocketPermission in
> > the PermissionCollection, DNS is also consulted by
> > SocketPermission.implies, while synchronized, ouch!
> >
> > URIGrant.implies is non blocking, that's right zero contention.
> > SocketPermission.implies DNS calls can be avoided in most cases if
> > PermissionComparator finds an exact match or wild card.
> >
> > This is without the CombinerSecurityManager, which improves security
> > performance by a factor of 10 (SocketPermission is only checked once for
> > each AccessControlContext).
> >
> > PreferredClassProvider no longer creates unecessary DNS calls, and
> > neither does SecureClassLoader,  URLClassLoader or PreferredClassLoader.
> >
> > So the good news is the next release will feel much faster, the bad news
> > is that existing concurrency bugs that previously didn't appear during
> > test runs, but likely to manifest during production are now occurring
> > during testing.  There's more good news, we fixed a number of
> > concurrency bugs since the last release too, I guess I have to draw the
> > line somewhere and cut a release.
> >
> > DNS calls haven't been completely eliminated as some are still
> > necessary, but a multitude of unnecessary DNS calls have been eliminated.
> >
> > Regards,
> >
> > Peter.
> >
> > Peter Firmstone wrote:
> > > They've passed more consistently in the past, they're either
> > > concurrency bugs or network timing related, it would be nice to at
> > > least determine if it's the former or latter.
> > >
> > > Cheers,
> > >
> > > Peter.
> > >
> > > Tom Hobbs wrote:
> > > > Are these all new failures, i.e. were they working before?  Or are they
> > > > "new" failures in that the test categories have only recently been
> > > > reactivated and the failures discovered?
> > > >
> > > > Are they a big enough blocker to stop a release?
> > > >
> > > >
> > > > On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <ji...@zeus.net.au>
> > > > wrote:
> > > >
> > > >
> > > > > The following test fails 30 times in a run of 130 tests:
> > > > >
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > > [java]  #            of          tests    passed  =            0
> > > > > [java]  #            of          tests    passed  =            1
> > > > >
> > > > >
> > > > >
> > > > >
> > > > >
> > > > >
> > > > >                100
> > > > >
> > > > >
> > > > >
> > > > >
> > > > > Buildfile: build.xml
> > > > >
> > > > > qa.run-tests:
> > > > >
> > > > > james-brown:
> > > > >    [delete] Deleting directory /opt/src/River_Fixed/**
> > > > > peterConcurrentPolicy/qa/soul
> > > > >      [mkdir] Created dir: /opt/src/River_Fixed/**
> > > > > peterConcurrentPolicy/qa/soul
> > > > >      [touch] Creating
> > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
> > > > > *soul.201303312239034808
> > > > >
> > > > > run-tests:
> > > > >        [java]
> > > > >        [java] ------------------------------**-----------
> > > > >        [java] CONFIGURATION FILE:
> > > > >        [java]
> > > > >        [java]      /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
> > > > > com/sun/jini/test/resources/**qaHarness.prop
> > > > >        [java]
> > > > >        [java] ------------------------------**-----------
> > > > >        [java] SETTING UP THE TEST LIST:
> > > > >        [java]
> > > > >        [java]      Adding test:
> > > > > com/sun/jini/test/spec/**javaspace/conformance/
> > > > > **snapshot/**SnapshotExpirationNotifyTest.**td
> > > > >        [java]
> > > > >        [java] ------------------------------**-----------
> > > > >        [java] GENERAL HARNESS CONFIGURATION INFORMATION:
> > > > >        [java]
> > > > >        [java]      Date started:
> > > > >        [java]            Sun Mar 31 22:39:37 EST 2013
> > > > >        [java]      Installation directory of the JSK:
> > > > >        [java]            com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
> > > > > peterConcurrentPolicy
> > > > >        [java]      Installation directory of the harness:
> > > > >        [java]            com.sun.jini.qa.home=/opt/src/**River_Fixed/**
> > > > > peterConcurrentPolicy/qa
> > > > >        [java]      Categories being tested:
> > > > >        [java]            categories=No Categories
> > > > >        [java] ------------------------------**-----------
> > > > >        [java] ENVIRONMENT PROPERTIES:
> > > > >        [java]
> > > > >        [java]      JVM information:
> > > > >        [java]            Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
> > > > >        [java]            Sun Microsystems Inc.
> > > > >        [java]      OS information:
> > > > >        [java]            SunOS, 5.10, sparc
> > > > >        [java]
> > > > >        [java] ------------------------------**-----------
> > > > >        [java] STARTING TO RUN THE TESTS
> > > > >        [java]
> > > > >        [java]
> > > > >        [java] Running com/sun/jini/test/spec/**javaspace/conformance/**
> > > > > snapshot/**SnapshotExpirationNotifyTest.**td
> > > > >        [java] Time is Sun Mar 31 22:39:38 EST 2013
> > > > >        [java] Starting test in separate process with command:
> > > > >        [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
> > > > >
> > > > > -Djava.security.manager=org.**apache.river.api.security.**CombinerSecur
> > > > > ityManager
> > > > >
> > > > > -Djava.security.policy=file:/**opt/src/River_Fixed/**
> > > > > peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
> > > > >
> > > > > -Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<h
> > > > > ttp://bluto:9082/qa1-javaspace-dl.jar>-cp
> > > > > /opt/src/River_Fixed/
> > > > >
> > > > > **peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed
> > > > > /**
> > > > >
> > > > > peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
> > > > >
> > > > > Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fix
> > > > > ed/
> > > > >
> > > > > **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
> > > > >
> > > > > peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/*
> > > > > *
> > > > >
> > > > > peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa -client
> > > > > -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
> > > > >
> > > > > packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
> > > > > **
> > > > >
> > > > > ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
> > > > > -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
> > > > > -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
> > > > >
> > > > > -Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/q
> > > > > a
> > > > >
> > > > > -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
> > > > > Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
> > > > > -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
> > > > > peterConcurrentPolicy/qa/lib/**jinitests.jar
> > > > > -Dcom.sun.jini.qa.harness.**runjiniserver=true
> > > > > -Dcom.sun.jini.qa.harness.**runkitserver=true
> > > > > -Djava.security.properties=*
> > > > > *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
> > > > > harness/trust/dynamic-policy.**properties
> > > > > -Dcom.sun.jini.qa.harness.**testhosts=
> > > > > -Djava.util.logging.config.**file=/home/peter/logging.**properties
> > > > >
> > > > > -Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
> > > > > /qa
> > > > >
> > > > > -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
> > > > > policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
> > > > > qa/src/com/sun/jini/test/**resources/jinitest.policy
> > > > > -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
> > > > >
> > > > > packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
> > > > > **
> > > > >
> > > > > ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
> > > > > com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
> > > > > javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
> > > > >        [java] com.sun.jini.qa.harness.**TestException: Not all
> > > > > listeners've
> > > > > got expected number of events.
> > > > >        [java]        at com.sun.jini.test.spec.**javaspace.conformance.**
> > > > > snapshot.**SnapshotExpirationNotifyTest.**run(**
> > > > > SnapshotExpirationNotifyTest.**java:370)
> > > > >        [java]        at
> > > > > com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
> > > > > *java:256)
> > > > >        [java]        at
> > > > > com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
> > > > > java:144)
> > > > >        [java]
> > > > >        [java] TIME: 10:42:54 PM
> > > > >        [java]
> > > > >        [java] Test process was destroyed and returned code 1
> > > > >        [java]
> > > > > com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
> > > > > SnapshotExpirationNotifyTest.**td
> > > > >        [java] Test Failed: Test Failed:
> > > > > com.sun.jini.qa.harness.**TestException:
> > > > > Not all listeners've got expected number of events.
> > > > >        [java]
> > > > >        [java]
> > > > >        [java] ------------------------------**-----------
> > > > >        [java]
> > > > >        [java] SUMMARY ==============================**===
> > > > >        [java]
> > > > >        [java]
> > > > > com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
> > > > > SnapshotExpirationNotifyTest.**td
> > > > >        [java] Test Failed: Test Failed:
> > > > > com.sun.jini.qa.harness.**TestException:
> > > > > Not all listeners've got expected number of events.
> > > > >        [java]
> > > > >        [java] ------------------------------**-----------
> > > > >        [java]
> > > > >        [java] # of tests started    = 1
> > > > >        [java] # of tests completed = 1
> > > > >        [java] # of tests passed      = 0
> > > > >        [java] # of tests failed      = 1
> > > > >        [java]
> > > > >        [java] ------------------------------**-----------
> > > > >        [java]
> > > > >        [java]      Date finished:
> > > > >        [java]            Sun Mar 31 22:42:59 EST 2013
> > > > >        [java]      Time elapsed:
> > > > >        [java]            201 seconds
> > > > >        [java]
> > > > >        [java] Java Result: 1
> > > > >
> > > > > collect-result:
> > > > >
> > > > > BUILD FAILED
> > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105: The
> > > > > following error occurred while executing this line:
> > > > > /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
> > > > > condition satisfied
> > > > >
> > > > > Total time: 3 minutes 30 seconds
> > > > >
> > > > >
> > > > >
> > > >
> > > >
> > >
> > >
> >
>


Re: test failure repeatability

Posted by Peter Firmstone <ji...@zeus.net.au>.
On 1/04/2013 8:14 PM, Peter Firmstone wrote:
> The attachments will be removed from the list, so I've cc'd you, 
> anyone who's interested, let me know I can forward the attachments.  
> They can be opened with jvisualvm.
>
> The profiling isn't perfect, the test runs for about 8.5 minutes, so 
> hotspot should have kicked in relatively early in both test runs.
>
> I guess a significant problem is; the more I remove performance 
> impedances, like unnecessary DNS calls, the faster multithreading and 
> context switching gets.
>
> Not only did the old policy providers create contention, but it was 
> slower for single threaded performance (I'll have to run the previous 
> release branch for comparison when I get some time).
>
> The URIGrant.implies call is now down to .228 ms per invocation, down 
> from 1.68 ms per invocation this week, which was already quite good 
> (on old UltraSparcII hardware), during stress tests this method is 
> called almost 40,000 times.

On more recent hardware the call takes 0.035 ms per invocation (2.3GHz 
AMD Turion).  Compare that to a DNS call or contented lock.

>
> In comparison the old policy provider which required a DNS call (every 
> time CodeSource.implies is called, functionality now replaced by 
> URIGrant.implies), the old policy provider also cached all 
> Permission's in highly contended PermissionCollection's, which during 
> network calls invoked SocketPermission.implies, possibly for every 
> SocketPermission in the PermissionCollection, DNS is also consulted by 
> SocketPermission.implies, while synchronized, ouch!
>
> URIGrant.implies is non blocking, that's right zero contention.  
> SocketPermission.implies DNS calls can be avoided in most cases if 
> PermissionComparator finds an exact match or wild card.
>
> This is without the CombinerSecurityManager, which improves security 
> performance by a factor of 10 (SocketPermission is only checked once 
> for each AccessControlContext).
>
> PreferredClassProvider no longer creates unecessary DNS calls, and 
> neither does SecureClassLoader,  URLClassLoader or PreferredClassLoader.
>
> So the good news is the next release will feel much faster, the bad 
> news is that existing concurrency bugs that previously didn't appear 
> during test runs, but likely to manifest during production are now 
> occurring during testing.  There's more good news, we fixed a number 
> of concurrency bugs since the last release too, I guess I have to draw 
> the line somewhere and cut a release.
>
> DNS calls haven't been completely eliminated as some are still 
> necessary, but a multitude of unnecessary DNS calls have been eliminated.
>
> Regards,
>
> Peter.
>
> Peter Firmstone wrote:
>> They've passed more consistently in the past, they're either 
>> concurrency bugs or network timing related, it would be nice to at 
>> least determine if it's the former or latter.
>>
>> Cheers,
>>
>> Peter.
>>
>> Tom Hobbs wrote:
>>> Are these all new failures, i.e. were they working before?  Or are they
>>> "new" failures in that the test categories have only recently been
>>> reactivated and the failures discovered?
>>>
>>> Are they a big enough blocker to stop a release?
>>>
>>>
>>> On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <ji...@zeus.net.au> 
>>> wrote:
>>>
>>>
>>>> The following test fails 30 times in a run of 130 tests:
>>>>
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>         100
>>>>
>>>>
>>>>
>>>>
>>>> Buildfile: build.xml
>>>>
>>>> qa.run-tests:
>>>>
>>>> james-brown:
>>>>   [delete] Deleting directory /opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/qa/soul
>>>>    [mkdir] Created dir: /opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/qa/soul
>>>>    [touch] Creating 
>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
>>>> *soul.201303312239034808
>>>>
>>>> run-tests:
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java] CONFIGURATION FILE:
>>>>     [java]
>>>>     [java]    /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
>>>> com/sun/jini/test/resources/**qaHarness.prop
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java] SETTING UP THE TEST LIST:
>>>>     [java]
>>>>     [java]    Adding test: 
>>>> com/sun/jini/test/spec/**javaspace/conformance/
>>>> **snapshot/**SnapshotExpirationNotifyTest.**td
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>>>>     [java]
>>>>     [java]    Date started:
>>>>     [java]       Sun Mar 31 22:39:37 EST 2013
>>>>     [java]    Installation directory of the JSK:
>>>>     [java]       com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
>>>> peterConcurrentPolicy
>>>>     [java]    Installation directory of the harness:
>>>>     [java]       com.sun.jini.qa.home=/opt/src/**River_Fixed/**
>>>> peterConcurrentPolicy/qa
>>>>     [java]    Categories being tested:
>>>>     [java]       categories=No Categories
>>>>     [java] ------------------------------**-----------
>>>>     [java] ENVIRONMENT PROPERTIES:
>>>>     [java]
>>>>     [java]    JVM information:
>>>>     [java]       Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
>>>>     [java]       Sun Microsystems Inc.
>>>>     [java]    OS information:
>>>>     [java]       SunOS, 5.10, sparc
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java] STARTING TO RUN THE TESTS
>>>>     [java]
>>>>     [java]
>>>>     [java] Running com/sun/jini/test/spec/**javaspace/conformance/**
>>>> snapshot/**SnapshotExpirationNotifyTest.**td
>>>>     [java] Time is Sun Mar 31 22:39:38 EST 2013
>>>>     [java] Starting test in separate process with command:
>>>>     [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
>>>> -Djava.security.manager=org.**apache.river.api.security.**CombinerSecurityManager 
>>>>
>>>> -Djava.security.policy=file:/**opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
>>>> -Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<http://bluto:9082/qa1-javaspace-dl.jar>-cp 
>>>> /opt/src/River_Fixed/
>>>> **peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed/** 
>>>>
>>>> peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
>>>> Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fixed/ 
>>>>
>>>> **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/** 
>>>>
>>>> peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa -client
>>>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>>>> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-** 
>>>>
>>>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>>>> -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>>>> -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>>>> -Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/qa 
>>>>
>>>> -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
>>>> Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
>>>> -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/qa/lib/**jinitests.jar 
>>>> -Dcom.sun.jini.qa.harness.**runjiniserver=true
>>>> -Dcom.sun.jini.qa.harness.**runkitserver=true 
>>>> -Djava.security.properties=*
>>>> *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
>>>> harness/trust/dynamic-policy.**properties 
>>>> -Dcom.sun.jini.qa.harness.**testhosts=
>>>> -Djava.util.logging.config.**file=/home/peter/logging.**properties
>>>> -Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/qa 
>>>>
>>>> -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>>>> policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
>>>> qa/src/com/sun/jini/test/**resources/jinitest.policy
>>>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>>>> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-** 
>>>>
>>>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>>>> com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
>>>> javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
>>>>     [java] com.sun.jini.qa.harness.**TestException: Not all 
>>>> listeners've
>>>> got expected number of events.
>>>>     [java]     at com.sun.jini.test.spec.**javaspace.conformance.**
>>>> snapshot.**SnapshotExpirationNotifyTest.**run(**
>>>> SnapshotExpirationNotifyTest.**java:370)
>>>>     [java]     at 
>>>> com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
>>>> *java:256)
>>>>     [java]     at 
>>>> com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
>>>> java:144)
>>>>     [java]
>>>>     [java] TIME: 10:42:54 PM
>>>>     [java]
>>>>     [java] Test process was destroyed and returned code 1
>>>>     [java] 
>>>> com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>>>> SnapshotExpirationNotifyTest.**td
>>>>     [java] Test Failed: Test Failed: 
>>>> com.sun.jini.qa.harness.**TestException:
>>>> Not all listeners've got expected number of events.
>>>>     [java]
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java]
>>>>     [java] SUMMARY ==============================**===
>>>>     [java]
>>>>     [java] 
>>>> com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>>>> SnapshotExpirationNotifyTest.**td
>>>>     [java] Test Failed: Test Failed: 
>>>> com.sun.jini.qa.harness.**TestException:
>>>> Not all listeners've got expected number of events.
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java]
>>>>     [java] # of tests started   = 1
>>>>     [java] # of tests completed = 1
>>>>     [java] # of tests passed    = 0
>>>>     [java] # of tests failed    = 1
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java]
>>>>     [java]    Date finished:
>>>>     [java]       Sun Mar 31 22:42:59 EST 2013
>>>>     [java]    Time elapsed:
>>>>     [java]       201 seconds
>>>>     [java]
>>>>     [java] Java Result: 1
>>>>
>>>> collect-result:
>>>>
>>>> BUILD FAILED
>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105: The
>>>> following error occurred while executing this line:
>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
>>>> condition satisfied
>>>>
>>>> Total time: 3 minutes 30 seconds
>>>>
>>>>
>>>
>>
>>
>


Re: test failure repeatability - TaskManager

Posted by Peter <ji...@zeus.net.au>.
----- Original message -----
> One possibility is that some cases may just be "these tasks must not run
> in parallel" rather than an actual ordering.
>
> Also, I'm not sure all ordering constraints that are needed are
> necessarily implemented. The whole thing feels messy to me.
>

Agreed, we should probably consider each case individually, I noticed there's a configuration property that allows a TaskManager instance to be injected on a number of occasions too, which suggests there might be some sharing.

Peter.

> On 4/3/2013 2:04 PM, Dan Creswell wrote:
> > I'm with you. My first step was going to be reviewing where runAfter is
> > used, how often etc. I'd first like to be convinced that all the ordering
> > constraints are actually required and can't be circumvented/dropped.
> >
> > On 3 April 2013 22:01, Patricia Shanahan <pa...@acm.org> wrote:
> >
> > > I agree with the idea of understanding the use cases before designing the
> > > solution, and with using standard API classes as much as possible. The
> > > table I sent you was intended as a first step towards that.
> > >
> > > I'm not convinced that the right solution is a single TaskManager
> > > successor. Different TaskManager instances may have different use cases,
> > > and separating them may lead to several simpler solutions, each of which as
> > > a narrower set of requirements.
>


Re: test failure repeatability - TaskManager

Posted by Patricia Shanahan <pa...@acm.org>.
One possibility is that some cases may just be "these tasks must not run 
in parallel" rather than an actual ordering.

Also, I'm not sure all ordering constraints that are needed are 
necessarily implemented. The whole thing feels messy to me.

On 4/3/2013 2:04 PM, Dan Creswell wrote:
> I'm with you. My first step was going to be reviewing where runAfter is
> used, how often etc. I'd first like to be convinced that all the ordering
> constraints are actually required and can't be circumvented/dropped.
>
> On 3 April 2013 22:01, Patricia Shanahan <pa...@acm.org> wrote:
>
>> I agree with the idea of understanding the use cases before designing the
>> solution, and with using standard API classes as much as possible. The
>> table I sent you was intended as a first step towards that.
>>
>> I'm not convinced that the right solution is a single TaskManager
>> successor. Different TaskManager instances may have different use cases,
>> and separating them may lead to several simpler solutions, each of which as
>> a narrower set of requirements.


Re: test failure repeatability - TaskManager

Posted by Dan Creswell <da...@gmail.com>.
I'm with you. My first step was going to be reviewing where runAfter is
used, how often etc. I'd first like to be convinced that all the ordering
constraints are actually required and can't be circumvented/dropped.

On 3 April 2013 22:01, Patricia Shanahan <pa...@acm.org> wrote:

> I agree with the idea of understanding the use cases before designing the
> solution, and with using standard API classes as much as possible. The
> table I sent you was intended as a first step towards that.
>
> I'm not convinced that the right solution is a single TaskManager
> successor. Different TaskManager instances may have different use cases,
> and separating them may lead to several simpler solutions, each of which as
> a narrower set of requirements.
>
> Patricia
>
>
>
> On 4/3/2013 1:55 PM, Peter wrote:
>
>> Gut feeling suggests the solution will be executor based, so you're
>> asking good questions, I think we need to understand the use cases
>> better and probably redesign dependant code too.
>>
>> One example of retry, the task will continue attemtping to retry for
>> an entire day.
>>
>> We might need some kind of delay queue, where dependant tasks can
>> signal to following tasks when it's ok to execute.
>>
>> ----- Original message -----
>>
>>> I am not clear on the semantics for runAfter, but maybe this can
>>> be achieved by wrapping a Runnable within another Runnable such
>>> that the 2nd runnable is automatically scheduled after the first
>>> has succeeded? Likewise, it is possible to wrap a Runnable in order
>>> to automatically retry if it throws an exception.
>>>
>>> There are people who are experts at these patterns, but an example
>>> is given (below my signature) for an Executor that wraps an
>>> ExecutorService and queues Runnable instances with limited
>>> parallelism.  It hooks the Runnable in its own run() method.
>>>
>>> If you use a ScheduledExecutorService, you can queue a task to run
>>> with an initial and repeated delay (or at a repeated interval).
>>> The task will be rescheduled *unless* it throws an exception.  This
>>> could be reused to periodically run-try a task after a timeout if
>>> we convert an error thrown in the task into "no error" (hence run
>>> after a fixed delay) and throw out a known exception if there is no
>>> error (to terminate the retry of the task).  A bit of a hack, but
>>> it leverages existing code for re-running a task with a fixed
>>> delay.
>>>
>>> Thanks, Bryan
>>>
>>> package com.bigdata.util.concurrent;
>>>
>>> import java.util.concurrent.**BlockingQueue; import
>>> java.util.concurrent.Callable; import
>>> java.util.concurrent.Executor; import
>>> java.util.concurrent.**ExecutorService; import
>>> java.util.concurrent.Future; import
>>> java.util.concurrent.**FutureTask; import
>>> java.util.concurrent.**LinkedBlockingDeque; import
>>> java.util.concurrent.**RejectedExecutionException; import
>>> java.util.concurrent.**Semaphore;
>>>
>>> import org.apache.log4j.Logger;
>>>
>>> /** * A fly weight helper class that runs tasks either sequentially
>>> or with limited * parallelism against some thread pool. Deadlock
>>> can arise when limited * parallelism is applied if there are
>>> dependencies among the tasks. Limited * parallelism is enforced
>>> using a counting {@link Semaphore}. New tasks can * start iff the
>>> latch is non-zero. The maximum parallelism is the minimum of * the
>>> value specified to the constructor and the potential parallelism
>>> of the * delegate service. * <p> * Note: The pattern for running
>>> tasks on this service is generally to * {@link #execute(Runnable)}
>>> a {@link Runnable} and to make that * {@link Runnable} a {@link
>>> FutureTask} if you want to await the {@link Future} * of a {@link
>>> Runnable} or {@link Callable} or otherwise manage its execution. *
>>> <p> * Note: This class can NOT be trivially wrapped as an {@link
>>> ExecutorService} * since the resulting delegation pattern for
>>> submit() winds up invoking * execute() on the delegate {@link
>>> ExecutorService} rather than on this class. * * @author <a
>>> href="mailto:thompsonbry@**users.sourceforge.net<th...@users.sourceforge.net>">Bryan
>>> Thompson</a>
>>> * @version $Id: LatchedExecutor.java 6749 2012-12-03 14:42:48Z
>>> thompsonbry $ */ public class LatchedExecutor implements Executor
>>> {
>>>
>>> private static final transient Logger log = Logger
>>> .getLogger(LatchedExecutor.**class);
>>>
>>> /** * The delegate executor. */ private final Executor executor;
>>>
>>> /** * This is used to limit the concurrency with which tasks
>>> submitted to this * class may execute on the delegate {@link
>>> #executor}. */ private final Semaphore semaphore;
>>>
>>> /** * A thread-safe blocking queue of pending tasks. * * @todo The
>>> capacity of this queue does not of necessity need to be *
>>> unbounded. */ private final BlockingQueue<Runnable> queue = new
>>> LinkedBlockingDeque<Runnable>(**/*unbounded*/);
>>>
>>> private final int nparallel;
>>>
>>> /** * Return the maximum parallelism allowed by this {@link
>>> Executor}. */ public int getNParallel() {
>>>
>>> return nparallel;
>>>
>>> }
>>>
>>> public LatchedExecutor(final Executor executor, final int
>>> nparallel) {
>>>
>>> if (executor == null) throw new IllegalArgumentException();
>>>
>>> if (nparallel < 1) throw new IllegalArgumentException();
>>>
>>> this.executor = executor;
>>>
>>> this.nparallel = nparallel;
>>>
>>> this.semaphore = new Semaphore(nparallel);
>>>
>>> }
>>>
>>> public void execute(final Runnable r) { if (!queue.offer(new
>>> Runnable() { /* * Wrap the Runnable in a class that will start the
>>> next Runnable * from the queue when it completes. */ public void
>>> run() { try { r.run(); } finally { scheduleNext(); } } })) { // The
>>> queue is full. throw new RejectedExecutionException(); } if
>>> (semaphore.tryAcquire()) { // We were able to obtain a permit, so
>>> start another task. scheduleNext(); } }
>>>
>>> /** * Schedule the next task if one is available (non-blocking). *
>>> <p> * Pre-condition: The caller has a permit. */ private void
>>> scheduleNext() { while (true) { Runnable next = null; if ((next =
>>> queue.poll()) != null) { try { executor.execute(next); return; }
>>> catch (RejectedExecutionException ex) { // log error and poll the
>>> queue again. log.error(ex, ex); continue; } } else {
>>> semaphore.release(); return; } } }
>>>
>>> }
>>>
>>>
>>>
>>
>>
>

Re: test failure repeatability - TaskManager

Posted by Patricia Shanahan <pa...@acm.org>.
I agree with the idea of understanding the use cases before designing 
the solution, and with using standard API classes as much as possible. 
The table I sent you was intended as a first step towards that.

I'm not convinced that the right solution is a single TaskManager 
successor. Different TaskManager instances may have different use cases, 
and separating them may lead to several simpler solutions, each of which 
as a narrower set of requirements.

Patricia


On 4/3/2013 1:55 PM, Peter wrote:
> Gut feeling suggests the solution will be executor based, so you're
> asking good questions, I think we need to understand the use cases
> better and probably redesign dependant code too.
>
> One example of retry, the task will continue attemtping to retry for
> an entire day.
>
> We might need some kind of delay queue, where dependant tasks can
> signal to following tasks when it's ok to execute.
>
> ----- Original message -----
>> I am not clear on the semantics for runAfter, but maybe this can
>> be achieved by wrapping a Runnable within another Runnable such
>> that the 2nd runnable is automatically scheduled after the first
>> has succeeded? Likewise, it is possible to wrap a Runnable in order
>> to automatically retry if it throws an exception.
>>
>> There are people who are experts at these patterns, but an example
>> is given (below my signature) for an Executor that wraps an
>> ExecutorService and queues Runnable instances with limited
>> parallelism.  It hooks the Runnable in its own run() method.
>>
>> If you use a ScheduledExecutorService, you can queue a task to run
>> with an initial and repeated delay (or at a repeated interval).
>> The task will be rescheduled *unless* it throws an exception.  This
>> could be reused to periodically run-try a task after a timeout if
>> we convert an error thrown in the task into "no error" (hence run
>> after a fixed delay) and throw out a known exception if there is no
>> error (to terminate the retry of the task).  A bit of a hack, but
>> it leverages existing code for re-running a task with a fixed
>> delay.
>>
>> Thanks, Bryan
>>
>> package com.bigdata.util.concurrent;
>>
>> import java.util.concurrent.BlockingQueue; import
>> java.util.concurrent.Callable; import
>> java.util.concurrent.Executor; import
>> java.util.concurrent.ExecutorService; import
>> java.util.concurrent.Future; import
>> java.util.concurrent.FutureTask; import
>> java.util.concurrent.LinkedBlockingDeque; import
>> java.util.concurrent.RejectedExecutionException; import
>> java.util.concurrent.Semaphore;
>>
>> import org.apache.log4j.Logger;
>>
>> /** * A fly weight helper class that runs tasks either sequentially
>> or with limited * parallelism against some thread pool. Deadlock
>> can arise when limited * parallelism is applied if there are
>> dependencies among the tasks. Limited * parallelism is enforced
>> using a counting {@link Semaphore}. New tasks can * start iff the
>> latch is non-zero. The maximum parallelism is the minimum of * the
>> value specified to the constructor and the potential parallelism
>> of the * delegate service. * <p> * Note: The pattern for running
>> tasks on this service is generally to * {@link #execute(Runnable)}
>> a {@link Runnable} and to make that * {@link Runnable} a {@link
>> FutureTask} if you want to await the {@link Future} * of a {@link
>> Runnable} or {@link Callable} or otherwise manage its execution. *
>> <p> * Note: This class can NOT be trivially wrapped as an {@link
>> ExecutorService} * since the resulting delegation pattern for
>> submit() winds up invoking * execute() on the delegate {@link
>> ExecutorService} rather than on this class. * * @author <a
>> href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
>> * @version $Id: LatchedExecutor.java 6749 2012-12-03 14:42:48Z
>> thompsonbry $ */ public class LatchedExecutor implements Executor
>> {
>>
>> private static final transient Logger log = Logger
>> .getLogger(LatchedExecutor.class);
>>
>> /** * The delegate executor. */ private final Executor executor;
>>
>> /** * This is used to limit the concurrency with which tasks
>> submitted to this * class may execute on the delegate {@link
>> #executor}. */ private final Semaphore semaphore;
>>
>> /** * A thread-safe blocking queue of pending tasks. * * @todo The
>> capacity of this queue does not of necessity need to be *
>> unbounded. */ private final BlockingQueue<Runnable> queue = new
>> LinkedBlockingDeque<Runnable>(/*unbounded*/);
>>
>> private final int nparallel;
>>
>> /** * Return the maximum parallelism allowed by this {@link
>> Executor}. */ public int getNParallel() {
>>
>> return nparallel;
>>
>> }
>>
>> public LatchedExecutor(final Executor executor, final int
>> nparallel) {
>>
>> if (executor == null) throw new IllegalArgumentException();
>>
>> if (nparallel < 1) throw new IllegalArgumentException();
>>
>> this.executor = executor;
>>
>> this.nparallel = nparallel;
>>
>> this.semaphore = new Semaphore(nparallel);
>>
>> }
>>
>> public void execute(final Runnable r) { if (!queue.offer(new
>> Runnable() { /* * Wrap the Runnable in a class that will start the
>> next Runnable * from the queue when it completes. */ public void
>> run() { try { r.run(); } finally { scheduleNext(); } } })) { // The
>> queue is full. throw new RejectedExecutionException(); } if
>> (semaphore.tryAcquire()) { // We were able to obtain a permit, so
>> start another task. scheduleNext(); } }
>>
>> /** * Schedule the next task if one is available (non-blocking). *
>> <p> * Pre-condition: The caller has a permit. */ private void
>> scheduleNext() { while (true) { Runnable next = null; if ((next =
>> queue.poll()) != null) { try { executor.execute(next); return; }
>> catch (RejectedExecutionException ex) { // log error and poll the
>> queue again. log.error(ex, ex); continue; } } else {
>> semaphore.release(); return; } } }
>>
>> }
>>
>>
>
>


Re: test failure repeatability - TaskManager

Posted by Peter <ji...@zeus.net.au>.
Gut feeling suggests the solution will be executor based, so you're asking good questions, I think we need to understand the use cases better and probably redesign dependant code too.

One example of retry, the task will continue attemtping to retry for an entire day.

We might need some kind of delay queue, where dependant tasks can signal to following tasks when it's ok to execute.

----- Original message -----
> I am not clear on the semantics for runAfter, but maybe this can be
> achieved by wrapping a Runnable within another Runnable such that the 2nd
> runnable is automatically scheduled after the first has succeeded?
> Likewise, it is possible to wrap a Runnable in order to automatically
> retry if it throws an exception.
>
> There are people who are experts at these patterns, but an example is
> given (below my signature) for an Executor that wraps an ExecutorService
> and queues Runnable instances with limited parallelism.  It hooks the
> Runnable in its own run() method.
>
> If you use a ScheduledExecutorService, you can queue a task to run with an
> initial and repeated delay (or at a repeated interval).  The task will be
> rescheduled *unless* it throws an exception.  This could be reused to
> periodically run-try a task after a timeout if we convert an error thrown
> in the task into "no error" (hence run after a fixed delay) and throw out
> a known exception if there is no error (to terminate the retry of the
> task).  A bit of a hack, but it leverages existing code for re-running a
> task with a fixed delay.
>
> Thanks,
> Bryan
>
> package com.bigdata.util.concurrent;
>
> import java.util.concurrent.BlockingQueue;
> import java.util.concurrent.Callable;
> import java.util.concurrent.Executor;
> import java.util.concurrent.ExecutorService;
> import java.util.concurrent.Future;
> import java.util.concurrent.FutureTask;
> import java.util.concurrent.LinkedBlockingDeque;
> import java.util.concurrent.RejectedExecutionException;
> import java.util.concurrent.Semaphore;
>
> import org.apache.log4j.Logger;
>
> /**
>  * A fly weight helper class that runs tasks either sequentially or with
> limited
>  * parallelism against some thread pool. Deadlock can arise when limited
>  * parallelism is applied if there are dependencies among the tasks.
> Limited
>  * parallelism is enforced using a counting {@link Semaphore}. New tasks
> can
>  * start iff the latch is non-zero. The maximum parallelism is the minimum
> of
>  * the value specified to the constructor and the potential parallelism of
> the
>  * delegate service.
>  * <p>
>  * Note: The pattern for running tasks on this service is generally to
>  * {@link #execute(Runnable)} a {@link Runnable} and to make that
>  * {@link Runnable} a {@link FutureTask} if you want to await the {@link
> Future}
>  * of a {@link Runnable} or {@link Callable} or otherwise manage its
> execution.
>  * <p>
>  * Note: This class can NOT be trivially wrapped as an {@link
> ExecutorService}
>  * since the resulting delegation pattern for submit() winds up invoking
>  * execute() on the delegate {@link ExecutorService} rather than on this
> class.
>  *
>  * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
> Thompson</a>
>  * @version $Id: LatchedExecutor.java 6749 2012-12-03 14:42:48Z
> thompsonbry $
> */
> public class LatchedExecutor implements Executor {
>
>        private static final transient Logger log = Logger
>                        .getLogger(LatchedExecutor.class);
>       
>        /**
>          * The delegate executor.
>          */
>        private final Executor executor;
>       
>        /**
>          * This is used to limit the concurrency with which tasks submitted to
> this
>          * class may execute on the delegate {@link #executor}.
>          */
>        private final Semaphore semaphore;
>       
>        /**
>          * A thread-safe blocking queue of pending tasks.
>          *
>          * @todo The capacity of this queue does not of necessity need to be
>          *            unbounded.
>          */
>        private final BlockingQueue<Runnable> queue = new
> LinkedBlockingDeque<Runnable>(/*unbounded*/);
>
>        private final int nparallel;
>       
>        /**
>          * Return the maximum parallelism allowed by this {@link Executor}.
>          */
>        public int getNParallel() {
>           
>            return nparallel;
>           
>        }
>       
>        public LatchedExecutor(final Executor executor, final int nparallel) {
>
>                if (executor == null)
>                        throw new IllegalArgumentException();
>
>                if (nparallel < 1)
>                        throw new IllegalArgumentException();
>
>                this.executor = executor;
>
>                this.nparallel = nparallel;
>               
>                this.semaphore = new Semaphore(nparallel);
>
>        }
>
>        public void execute(final Runnable r) {
>                if (!queue.offer(new Runnable() {
>                        /*
>                          * Wrap the Runnable in a class that will start the next
> Runnable
>                          * from the queue when it completes.
>                          */
>                        public void run() {
>                                try {
>                                        r.run();
>                                } finally {
>                                        scheduleNext();
>                                }
>                        }
>                })) {
>                        // The queue is full.
>                        throw new RejectedExecutionException();
>                }
>                if (semaphore.tryAcquire()) {
>                        // We were able to obtain a permit, so start another task.
>                        scheduleNext();
>                }
>        }
>
>        /**
>          * Schedule the next task if one is available (non-blocking).
>          * <p>
>          * Pre-condition: The caller has a permit.
>          */
>        private void scheduleNext() {
>                while (true) {
>                        Runnable next = null;
>                        if ((next = queue.poll()) != null) {
>                                try {
>                                        executor.execute(next);
>                                        return;
>                                } catch (RejectedExecutionException ex) {
>                                        // log error and poll the queue again.
>                                        log.error(ex, ex);
>                                        continue;
>                                }
>                        } else {
>                                semaphore.release();
>                                return;
>                        }
>                }
>        }
>
> }
>
>


Re: test failure repeatability - TaskManager

Posted by Patricia Shanahan <pa...@acm.org>.
runAfter is a method in the TaskManager.Task interface, implemented by
each of its tasks:

/**
* Return true if this task must be run after at least one task
* in the given task list with an index less than size (size may be
* less then tasks.size()).  Using List.get will be more efficient
* than List.iterator.
*
* @param tasks the tasks to consider.  A read-only List, with all
* elements instanceof Task.
* @param size elements with index less than size should be considered
*

The notes I sent to Peter were part of an effort on my part to improve
performance. This has O(N^2) tendencies, because whenever a task
finishes the TaskManager has to ask each waiting task whether it still
needs to wait for any older task. I wanted to change it so that
TaskManager would know which task was being waited for. It could then
associate with one task a list of tasks that need to be reconsidered
when it finishes.

I think runAfter has two possible uses, and I'm not sure which cases are 
for which purpose:

1. Mutual exclusion - two tasks should not be running at the same time.
That could be implemented by the younger returning true for a task list
containing the older. In this case the sort of overtaking I described
below does not matter.

2. Order preservation - A task needs a state change to have happened
that will not happen until after an older task has run.

Patricia

On 4/2/2013 2:17 PM, Bryan Thompson wrote:
> I am not clear on the semantics for runAfter, but maybe this can be
> achieved by wrapping a Runnable within another Runnable such that the 2nd
> runnable is automatically scheduled after the first has succeeded?
> Likewise, it is possible to wrap a Runnable in order to automatically
> retry if it throws an exception.
>
> There are people who are experts at these patterns, but an example is
> given (below my signature) for an Executor that wraps an ExecutorService
> and queues Runnable instances with limited parallelism.  It hooks the
> Runnable in its own run() method.
>
> If you use a ScheduledExecutorService, you can queue a task to run with an
> initial and repeated delay (or at a repeated interval).  The task will be
> rescheduled *unless* it throws an exception.  This could be reused to
> periodically run-try a task after a timeout if we convert an error thrown
> in the task into "no error" (hence run after a fixed delay) and throw out
> a known exception if there is no error (to terminate the retry of the
> task).  A bit of a hack, but it leverages existing code for re-running a
> task with a fixed delay.
>
> Thanks,
> Bryan
>
> package com.bigdata.util.concurrent;
>
> import java.util.concurrent.BlockingQueue;
> import java.util.concurrent.Callable;
> import java.util.concurrent.Executor;
> import java.util.concurrent.ExecutorService;
> import java.util.concurrent.Future;
> import java.util.concurrent.FutureTask;
> import java.util.concurrent.LinkedBlockingDeque;
> import java.util.concurrent.RejectedExecutionException;
> import java.util.concurrent.Semaphore;
>
> import org.apache.log4j.Logger;
>
> /**
>   * A fly weight helper class that runs tasks either sequentially or with
> limited
>   * parallelism against some thread pool. Deadlock can arise when limited
>   * parallelism is applied if there are dependencies among the tasks.
> Limited
>   * parallelism is enforced using a counting {@link Semaphore}. New tasks
> can
>   * start iff the latch is non-zero. The maximum parallelism is the minimum
> of
>   * the value specified to the constructor and the potential parallelism of
> the
>   * delegate service.
>   * <p>
>   * Note: The pattern for running tasks on this service is generally to
>   * {@link #execute(Runnable)} a {@link Runnable} and to make that
>   * {@link Runnable} a {@link FutureTask} if you want to await the {@link
> Future}
>   * of a {@link Runnable} or {@link Callable} or otherwise manage its
> execution.
>   * <p>
>   * Note: This class can NOT be trivially wrapped as an {@link
> ExecutorService}
>   * since the resulting delegation pattern for submit() winds up invoking
>   * execute() on the delegate {@link ExecutorService} rather than on this
> class.
>   *
>   * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
> Thompson</a>
>   * @version $Id: LatchedExecutor.java 6749 2012-12-03 14:42:48Z
> thompsonbry $
> */
> public class LatchedExecutor implements Executor {
>
>      private static final transient Logger log = Logger
>              .getLogger(LatchedExecutor.class);
>
>      /**
>       * The delegate executor.
>       */
>      private final Executor executor;
>
>      /**
>       * This is used to limit the concurrency with which tasks submitted to
> this
>       * class may execute on the delegate {@link #executor}.
>       */
>      private final Semaphore semaphore;
>
>      /**
>       * A thread-safe blocking queue of pending tasks.
>       *
>       * @todo The capacity of this queue does not of necessity need to be
>       *       unbounded.
>       */
>      private final BlockingQueue<Runnable> queue = new
> LinkedBlockingDeque<Runnable>(/*unbounded*/);
>
>      private final int nparallel;
>
>      /**
>       * Return the maximum parallelism allowed by this {@link Executor}.
>       */
>      public int getNParallel() {
>      	
>      	return nparallel;
>      	
>      }
>
>      public LatchedExecutor(final Executor executor, final int nparallel) {
>
>          if (executor == null)
>              throw new IllegalArgumentException();
>
>          if (nparallel < 1)
>              throw new IllegalArgumentException();
>
>          this.executor = executor;
>
>          this.nparallel = nparallel;
>
>          this.semaphore = new Semaphore(nparallel);
>
>      }
>
>      public void execute(final Runnable r) {
>          if (!queue.offer(new Runnable() {
>              /*
>               * Wrap the Runnable in a class that will start the next
> Runnable
>               * from the queue when it completes.
>               */
>              public void run() {
>                  try {
>                      r.run();
>                  } finally {
>                      scheduleNext();
>                  }
>              }
>          })) {
>              // The queue is full.
>              throw new RejectedExecutionException();
>          }
>          if (semaphore.tryAcquire()) {
>              // We were able to obtain a permit, so start another task.
>              scheduleNext();
>          }
>      }
>
>      /**
>       * Schedule the next task if one is available (non-blocking).
>       * <p>
>       * Pre-condition: The caller has a permit.
>       */
>      private void scheduleNext() {
>          while (true) {
>              Runnable next = null;
>              if ((next = queue.poll()) != null) {
>                  try {
>                      executor.execute(next);
>                      return;
>                  } catch (RejectedExecutionException ex) {
>                      // log error and poll the queue again.
>                      log.error(ex, ex);
>                      continue;
>                  }
>              } else {
>                  semaphore.release();
>                  return;
>              }
>          }
>      }
>
> }
>
>


Re: test failure repeatability - TaskManager

Posted by Bryan Thompson <br...@systap.com>.
I am not clear on the semantics for runAfter, but maybe this can be
achieved by wrapping a Runnable within another Runnable such that the 2nd
runnable is automatically scheduled after the first has succeeded?
Likewise, it is possible to wrap a Runnable in order to automatically
retry if it throws an exception.

There are people who are experts at these patterns, but an example is
given (below my signature) for an Executor that wraps an ExecutorService
and queues Runnable instances with limited parallelism.  It hooks the
Runnable in its own run() method.

If you use a ScheduledExecutorService, you can queue a task to run with an
initial and repeated delay (or at a repeated interval).  The task will be
rescheduled *unless* it throws an exception.  This could be reused to
periodically run-try a task after a timeout if we convert an error thrown
in the task into "no error" (hence run after a fixed delay) and throw out
a known exception if there is no error (to terminate the retry of the
task).  A bit of a hack, but it leverages existing code for re-running a
task with a fixed delay.

Thanks,
Bryan

package com.bigdata.util.concurrent;

import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.Semaphore;

import org.apache.log4j.Logger;

/**
 * A fly weight helper class that runs tasks either sequentially or with
limited
 * parallelism against some thread pool. Deadlock can arise when limited
 * parallelism is applied if there are dependencies among the tasks.
Limited
 * parallelism is enforced using a counting {@link Semaphore}. New tasks
can
 * start iff the latch is non-zero. The maximum parallelism is the minimum
of
 * the value specified to the constructor and the potential parallelism of
the
 * delegate service.
 * <p>
 * Note: The pattern for running tasks on this service is generally to
 * {@link #execute(Runnable)} a {@link Runnable} and to make that
 * {@link Runnable} a {@link FutureTask} if you want to await the {@link
Future}
 * of a {@link Runnable} or {@link Callable} or otherwise manage its
execution.
 * <p>
 * Note: This class can NOT be trivially wrapped as an {@link
ExecutorService}
 * since the resulting delegation pattern for submit() winds up invoking
 * execute() on the delegate {@link ExecutorService} rather than on this
class.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
Thompson</a>
 * @version $Id: LatchedExecutor.java 6749 2012-12-03 14:42:48Z
thompsonbry $
*/
public class LatchedExecutor implements Executor {

    private static final transient Logger log = Logger
            .getLogger(LatchedExecutor.class);
    
    /**
     * The delegate executor.
     */
    private final Executor executor;
    
    /**
     * This is used to limit the concurrency with which tasks submitted to
this
     * class may execute on the delegate {@link #executor}.
     */
    private final Semaphore semaphore;
    
    /**
     * A thread-safe blocking queue of pending tasks.
     * 
     * @todo The capacity of this queue does not of necessity need to be
     *       unbounded.
     */
    private final BlockingQueue<Runnable> queue = new
LinkedBlockingDeque<Runnable>(/*unbounded*/);

    private final int nparallel;
    
    /**
     * Return the maximum parallelism allowed by this {@link Executor}.
     */
    public int getNParallel() {
    	
    	return nparallel;
    	
    }
    
    public LatchedExecutor(final Executor executor, final int nparallel) {

        if (executor == null)
            throw new IllegalArgumentException();

        if (nparallel < 1)
            throw new IllegalArgumentException();

        this.executor = executor;

        this.nparallel = nparallel;
        
        this.semaphore = new Semaphore(nparallel);

    }

    public void execute(final Runnable r) {
        if (!queue.offer(new Runnable() {
            /*
             * Wrap the Runnable in a class that will start the next
Runnable
             * from the queue when it completes.
             */
            public void run() {
                try {
                    r.run();
                } finally {
                    scheduleNext();
                }
            }
        })) {
            // The queue is full.
            throw new RejectedExecutionException();
        }
        if (semaphore.tryAcquire()) {
            // We were able to obtain a permit, so start another task.
            scheduleNext();
        }
    }

    /**
     * Schedule the next task if one is available (non-blocking).
     * <p>
     * Pre-condition: The caller has a permit.
     */
    private void scheduleNext() {
        while (true) {
            Runnable next = null;
            if ((next = queue.poll()) != null) {
                try {
                    executor.execute(next);
                    return;
                } catch (RejectedExecutionException ex) {
                    // log error and poll the queue again.
                    log.error(ex, ex);
                    continue;
                }
            } else {
                semaphore.release();
                return;
            }
        }
    }

}



Re: test failure repeatability - TaskManager

Posted by Peter Firmstone <ji...@zeus.net.au>.
So there are some fundamental design flaws, not compatible with a 
distributed network environment we need to reconsider.

The Notifier used in Outrigger (JavaSpaces) and ProxyRegTask in 
JoinManager, both use retry and runAfter.

Implementations of RetryTask that don't use runAfter are ok.  So in 
other words RetryTask really needs to be an ordinary Runnable which is 
retried until it passes and it really needs to be idempotent.

Other tasks that require ordering probably require a sequence number and 
an executor that can re-request  a task with a particular sequence 
number, it could give up waiting after a reasonable amount of time.

Anyone have time to assist investigating a solution?

Regards,

Peter.

On 3/04/2013 1:01 AM, Patricia Shanahan wrote:
> My concern with RetryTask is related to your point about "If a task 
> completes before another task which it's supposed to runAfter but 
> isn't present in the queue; that could explain some issues."
>
> A RetryTask puts itself back on the end of the queue when it needs to 
> retry. Suppose taskA is a RetryTask, taskB is supposed to runAfter 
> taskA, and originally appears on the queue after taskA. The first 
> attempt at taskA fails, and it puts itself back on the TaskManager 
> queue, using the normal add call. Now taskA is after taskB.
>
> Patricia
>
> On 4/2/2013 12:38 AM, Peter Firmstone wrote:
>> The formatting didn't work out, I'll create a Jira issue to discuss.
>>
>> Patricia's done a great job detailing the dependencies and issues with
>> TaskManager's Task implementations.
>>
>> I recall a list discussion from the original Sun developers who had
>> intended to replace TaskManager, the runAfter method has issues.
>>
>> Being so prevalent, it's quite possible that TaskManager is causing
>> issues and it might also explain why as performance improves more issues
>> arise.
>>
>> If a task completes before another task which it's supposed to runAfter
>> but isn't present in the queue; that could explain some issues.
>>
>> I much prefer idempotent code myself.
>>
>> This could take some effort to fix, any volunteers?
>>
>> Dennis are you able to continue with your 2.2.1 branch release?
>>
>> Regards,
>>
>> Peter.
>>
>> On 2/04/2013 5:17 PM, Peter Firmstone wrote:
>>> I've appended Patricia's notes in html so we don't lose the table
>>> formatting, hopefully it will be accepted by the mailer.
>>>
>>> On 2/04/2013 1:38 PM, Patricia Shanahan wrote:
>>>> I've sent Peter some notes that I hope he can make available - I
>>>> don't think I can send attachments to the list.
>>>>
>>>> Rereading my notes has reminded me that I had special concerns with
>>>> RetryTask. Is that still used? If so, I'll explain the problem.
>>>>
>>>>
>>> *TaskManager notes*
>>>
>>>
>>>  Classes That Reference TaskManager
>>>
>>> Class
>>>
>>>
>>>
>>> Package
>>>
>>>
>>>
>>> Notes
>>>
>>> AbortJob
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> Subclass of Job. Passed a TaskManager as parameter. Uses
>>> ParticipantTask, no dependencies.
>>>
>>> CommitJob
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> Subclass of Job. Passed a TaskManager as parameter. Uses
>>> ParticipantTask, no dependencies.
>>>
>>> EventType
>>>
>>>
>>>
>>> com.sun.jini.norm.event
>>>
>>>
>>>
>>> Task type SendTask, subclass of RetryTask, no dependencies.
>>>
>>> EventTypeGenerator
>>>
>>>
>>>
>>> com.sun.jini.norm.event
>>>
>>>
>>>
>>> Supplies a TaskManager for use by the EventType objects it generates.
>>>
>>> FiddlerImpl
>>>
>>>
>>>
>>> com.sun.jini.fiddler
>>>
>>>
>>>
>>> Extensive use of TaskManager, with many different Task subtypes. No
>>> dependencies.
>>>
>>> Job
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> Manage performance of a job as a set of tasks all of which need to be
>>> created by the Job subclass. There is some dubious code in performWork
>>> that silently throws away an exception that would indicate internal
>>> inconsistency.
>>>
>>> JoinManager
>>>
>>>
>>>
>>> net.jini.lookup
>>>
>>>
>>>
>>> Uses ProxyRegTask, which extends RetryTask. Special problem - making
>>> sure a service gets exactly one ID. If the ID has already been
>>> allocated, no dependencies. If not, runAfter any ProxyRegTask with
>>> lower sequence number, ensuring that only the lowest sequence number
>>> ProxyRegTask in the TaskManager can run. Safe if, and only if, tasks
>>> are submitted in sequence number order, and there are no retries.
>>>
>>>
>>> LeaseRenewalManager
>>>
>>>
>>>
>>> net.jini.lease
>>>
>>>
>>>
>>> Uses QueuerTask and RenewTask. No dependencies.
>>>
>>> LookupDiscovery
>>>
>>>
>>>
>>> net.jini.discovery
>>>
>>>
>>>
>>> Uses DecodeAnnouncementTask and UnicastDiscoveryTask. No dependencies.
>>>
>>> LookupLocatorDiscovery
>>>
>>>
>>>
>>> net.jini.discovery
>>>
>>>
>>>
>>> Uses DiscoveryTask. No dependencies.
>>>
>>> MailboxImpl
>>>
>>>
>>>
>>> com.sun.jini.mercury
>>>
>>>
>>>
>>> Uses a NotifyTask, subclass of RetryTask, no dependencies.
>>>
>>> Notifier
>>>
>>>
>>>
>>> com.sun.jini.outrigger
>>>
>>>
>>>
>>> Uses its own NotifyTask, subclass of RetryTask. Dependency based on
>>> EventSender runAfter test. EventSender has two implementations. An
>>> EventRegistrationWatcher.BasicEventSender waits for any
>>> BasicEventSender belonging to the same EventRegistrationWatcher.
>>> VisibilityEventSender has no dependencies.
>>>
>>> ParticipantTask
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> No dependencies.
>>>
>>> PrepareAndCommitJob
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> Subclass of Job. Passed a TaskManager as parameter. Uses
>>> ParticipantTask, no dependencies.
>>>
>>> PrepareJob
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> Subclass of Job. Passed a TaskManager as parameter. Uses
>>> ParticipantTask, no dependencies.
>>>
>>> RegistrarImpl
>>>
>>>
>>>
>>> com.sun.jini.reggie
>>>
>>>
>>>
>>> Uses multiple Task types: AddressTask - no dependencies;
>>> DecodeRequestTask - no dependencies; EventTask - run after EventTask
>>> for same listener, "Keep events going to the same listener ordered";
>>> SocketTask - no dependencies.
>>>
>>> RetryTask
>>>
>>>
>>>
>>> com.sun.jini.thread
>>>
>>>
>>>
>>> Abstract class implementing Task. It provides for automatic retry of
>>> failed attempts, where an attempt is a call to tryOnce.
>>>
>>> ServiceDiscoveryManager
>>>
>>>
>>>
>>> net.jini.lookup
>>>
>>>
>>>
>>> Uses CacheTask - no dependencies; ServiceIdTask - run after
>>> ServiceIdTask with same ServiceId and lower sequence number. Its
>>> subclasses NewOldServiceTask and UnmapProxyTask inherit runAfter.
>>> ServiceIdTask's subclass NotifyEventTask runs after
>>> RegisterListenerTask or LookupTask with same ProxyReg and lower
>>> sequence, and also calls the ServiceId runAfter. Bug ID 6291851.
>>> Comment suggests the writer thought it was necessary to do a sequence
>>> number check to find the queue order: " and if those tasks were queued
>>> prior to this task (have lower sequence numbers)".
>>>
>>>
>>> /** Whenever a ServiceIdTask is created in this cache, it is assigned
>>>
>>> * a unique sequence number to allow such tasks associated with the
>>>
>>> * same ServiceID to be executed in the order in which they were
>>>
>>> * queued in the TaskManager. This field contains the value of
>>>
>>> * the sequence number assigned to the most recently created
>>>
>>> * ServiceIdTask.
>>>
>>> */
>>>
>>> *private**long*taskSeqN= 0;
>>>
>>>
>>> Synchronization window needs fixing. taskSeqN is protected by
>>> serviceIdMap synchronization, but it is released before calling
>>> cacheTaskMgr.add in addProxyReg
>>>
>>>
>>> SettlerTask
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> Subclass of RetryTask. No dependencies. Used in TxnManagerImpl.
>>>
>>> TxnManagerImpl
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> Uses SettlerTask and ParticipantTask. No dependencies.
>>>
>>> TxnManagerTransaction
>>>
>>>
>>>
>>> com.sun.jini.mahalo
>>>
>>>
>>>
>>> Creates a TaskManager, threadpool, and passes it around to e.g. Job
>>> and AbortJob.
>>>
>>> TxnMonitor
>>>
>>>
>>>
>>> com.sun.jini.outrigger
>>>
>>>
>>>
>>> Uses TxnMonitorTask.
>>>
>>> TxnMonitorTask
>>>
>>>
>>>
>>> com.sun.jini.outrigger
>>>
>>>
>>>
>>> Subclass of RetryTask. No dependencies.
>>>
>>>
>>>  Issues
>>>
>>>
>>>    RetryTask
>>>
>>> RetryTask is a Task implementation whose run method tries a subclass
>>> supplied method with a boolean result. If the method returns false,
>>> indicating failure, the RetryTask's run method schedules another try
>>> in the future, using a WakeupManager supplied to the RetryTask
>>> constructor.
>>>
>>> During the time between a failed attempt and its retry, there does not
>>> seem to be any control to prevent conflicting tasks from entering the
>>> same TaskManager. Some of those tasks would have waited for the task
>>> being retried, if it had been in the TaskManager at their time of
>>> arrival. Delayed retry and dependence on sequence number seem
>>> incompatible. Notifier.NotifyTask and JoinManager.ProxyRegTask both
>>> extend RetryTask and have dependencies. JoinManager.ProxyRegTask uses
>>> a sequence number, but probably does not need to, and should not. The
>>> intent seems to be to run tasks for a given service one-at-a-time
>>> until its ServiceId has been set.
>>>
>>>
>>>    ServiceDiscoveryManager.CacheTask
>>>
>>> Most subclasses inherit a "return false;" runAfter. The exceptions are
>>> ServiceIdTask, its subclasses, and LookupTask. Both have sequence
>>> number dependencies. It is not yet clear whether
>>> ServiceDiscoveryManager is ensuring that tasks enter the TaskManager
>>> in sequence number order. If it does, the code is correct, but wastes
>>> time with a trivially true check. If not, the code is incorrect
>>> relative to the comments, which seem to expect order.
>>>
>>>
>>>
>>>
>>>
>>>
>>
>


Re: test failure repeatability - TaskManager

Posted by Patricia Shanahan <pa...@acm.org>.
My concern with RetryTask is related to your point about "If a task 
completes before another task which it's supposed to runAfter but isn't 
present in the queue; that could explain some issues."

A RetryTask puts itself back on the end of the queue when it needs to 
retry. Suppose taskA is a RetryTask, taskB is supposed to runAfter 
taskA, and originally appears on the queue after taskA. The first 
attempt at taskA fails, and it puts itself back on the TaskManager 
queue, using the normal add call. Now taskA is after taskB.

Patricia

On 4/2/2013 12:38 AM, Peter Firmstone wrote:
> The formatting didn't work out, I'll create a Jira issue to discuss.
>
> Patricia's done a great job detailing the dependencies and issues with
> TaskManager's Task implementations.
>
> I recall a list discussion from the original Sun developers who had
> intended to replace TaskManager, the runAfter method has issues.
>
> Being so prevalent, it's quite possible that TaskManager is causing
> issues and it might also explain why as performance improves more issues
> arise.
>
> If a task completes before another task which it's supposed to runAfter
> but isn't present in the queue; that could explain some issues.
>
> I much prefer idempotent code myself.
>
> This could take some effort to fix, any volunteers?
>
> Dennis are you able to continue with your 2.2.1 branch release?
>
> Regards,
>
> Peter.
>
> On 2/04/2013 5:17 PM, Peter Firmstone wrote:
>> I've appended Patricia's notes in html so we don't lose the table
>> formatting, hopefully it will be accepted by the mailer.
>>
>> On 2/04/2013 1:38 PM, Patricia Shanahan wrote:
>>> I've sent Peter some notes that I hope he can make available - I
>>> don't think I can send attachments to the list.
>>>
>>> Rereading my notes has reminded me that I had special concerns with
>>> RetryTask. Is that still used? If so, I'll explain the problem.
>>>
>>>
>> *TaskManager notes*
>>
>>
>>  Classes That Reference TaskManager
>>
>> Class
>>
>>
>>
>> Package
>>
>>
>>
>> Notes
>>
>> AbortJob
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> Subclass of Job. Passed a TaskManager as parameter. Uses
>> ParticipantTask, no dependencies.
>>
>> CommitJob
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> Subclass of Job. Passed a TaskManager as parameter. Uses
>> ParticipantTask, no dependencies.
>>
>> EventType
>>
>>
>>
>> com.sun.jini.norm.event
>>
>>
>>
>> Task type SendTask, subclass of RetryTask, no dependencies.
>>
>> EventTypeGenerator
>>
>>
>>
>> com.sun.jini.norm.event
>>
>>
>>
>> Supplies a TaskManager for use by the EventType objects it generates.
>>
>> FiddlerImpl
>>
>>
>>
>> com.sun.jini.fiddler
>>
>>
>>
>> Extensive use of TaskManager, with many different Task subtypes. No
>> dependencies.
>>
>> Job
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> Manage performance of a job as a set of tasks all of which need to be
>> created by the Job subclass. There is some dubious code in performWork
>> that silently throws away an exception that would indicate internal
>> inconsistency.
>>
>> JoinManager
>>
>>
>>
>> net.jini.lookup
>>
>>
>>
>> Uses ProxyRegTask, which extends RetryTask. Special problem - making
>> sure a service gets exactly one ID. If the ID has already been
>> allocated, no dependencies. If not, runAfter any ProxyRegTask with
>> lower sequence number, ensuring that only the lowest sequence number
>> ProxyRegTask in the TaskManager can run. Safe if, and only if, tasks
>> are submitted in sequence number order, and there are no retries.
>>
>>
>> LeaseRenewalManager
>>
>>
>>
>> net.jini.lease
>>
>>
>>
>> Uses QueuerTask and RenewTask. No dependencies.
>>
>> LookupDiscovery
>>
>>
>>
>> net.jini.discovery
>>
>>
>>
>> Uses DecodeAnnouncementTask and UnicastDiscoveryTask. No dependencies.
>>
>> LookupLocatorDiscovery
>>
>>
>>
>> net.jini.discovery
>>
>>
>>
>> Uses DiscoveryTask. No dependencies.
>>
>> MailboxImpl
>>
>>
>>
>> com.sun.jini.mercury
>>
>>
>>
>> Uses a NotifyTask, subclass of RetryTask, no dependencies.
>>
>> Notifier
>>
>>
>>
>> com.sun.jini.outrigger
>>
>>
>>
>> Uses its own NotifyTask, subclass of RetryTask. Dependency based on
>> EventSender runAfter test. EventSender has two implementations. An
>> EventRegistrationWatcher.BasicEventSender waits for any
>> BasicEventSender belonging to the same EventRegistrationWatcher.
>> VisibilityEventSender has no dependencies.
>>
>> ParticipantTask
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> No dependencies.
>>
>> PrepareAndCommitJob
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> Subclass of Job. Passed a TaskManager as parameter. Uses
>> ParticipantTask, no dependencies.
>>
>> PrepareJob
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> Subclass of Job. Passed a TaskManager as parameter. Uses
>> ParticipantTask, no dependencies.
>>
>> RegistrarImpl
>>
>>
>>
>> com.sun.jini.reggie
>>
>>
>>
>> Uses multiple Task types: AddressTask - no dependencies;
>> DecodeRequestTask - no dependencies; EventTask - run after EventTask
>> for same listener, "Keep events going to the same listener ordered";
>> SocketTask - no dependencies.
>>
>> RetryTask
>>
>>
>>
>> com.sun.jini.thread
>>
>>
>>
>> Abstract class implementing Task. It provides for automatic retry of
>> failed attempts, where an attempt is a call to tryOnce.
>>
>> ServiceDiscoveryManager
>>
>>
>>
>> net.jini.lookup
>>
>>
>>
>> Uses CacheTask - no dependencies; ServiceIdTask - run after
>> ServiceIdTask with same ServiceId and lower sequence number. Its
>> subclasses NewOldServiceTask and UnmapProxyTask inherit runAfter.
>> ServiceIdTask's subclass NotifyEventTask runs after
>> RegisterListenerTask or LookupTask with same ProxyReg and lower
>> sequence, and also calls the ServiceId runAfter. Bug ID 6291851.
>> Comment suggests the writer thought it was necessary to do a sequence
>> number check to find the queue order: " and if those tasks were queued
>> prior to this task (have lower sequence numbers)".
>>
>>
>> /** Whenever a ServiceIdTask is created in this cache, it is assigned
>>
>> * a unique sequence number to allow such tasks associated with the
>>
>> * same ServiceID to be executed in the order in which they were
>>
>> * queued in the TaskManager. This field contains the value of
>>
>> * the sequence number assigned to the most recently created
>>
>> * ServiceIdTask.
>>
>> */
>>
>> *private**long*taskSeqN= 0;
>>
>>
>> Synchronization window needs fixing. taskSeqN is protected by
>> serviceIdMap synchronization, but it is released before calling
>> cacheTaskMgr.add in addProxyReg
>>
>>
>> SettlerTask
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> Subclass of RetryTask. No dependencies. Used in TxnManagerImpl.
>>
>> TxnManagerImpl
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> Uses SettlerTask and ParticipantTask. No dependencies.
>>
>> TxnManagerTransaction
>>
>>
>>
>> com.sun.jini.mahalo
>>
>>
>>
>> Creates a TaskManager, threadpool, and passes it around to e.g. Job
>> and AbortJob.
>>
>> TxnMonitor
>>
>>
>>
>> com.sun.jini.outrigger
>>
>>
>>
>> Uses TxnMonitorTask.
>>
>> TxnMonitorTask
>>
>>
>>
>> com.sun.jini.outrigger
>>
>>
>>
>> Subclass of RetryTask. No dependencies.
>>
>>
>>  Issues
>>
>>
>>    RetryTask
>>
>> RetryTask is a Task implementation whose run method tries a subclass
>> supplied method with a boolean result. If the method returns false,
>> indicating failure, the RetryTask's run method schedules another try
>> in the future, using a WakeupManager supplied to the RetryTask
>> constructor.
>>
>> During the time between a failed attempt and its retry, there does not
>> seem to be any control to prevent conflicting tasks from entering the
>> same TaskManager. Some of those tasks would have waited for the task
>> being retried, if it had been in the TaskManager at their time of
>> arrival. Delayed retry and dependence on sequence number seem
>> incompatible. Notifier.NotifyTask and JoinManager.ProxyRegTask both
>> extend RetryTask and have dependencies. JoinManager.ProxyRegTask uses
>> a sequence number, but probably does not need to, and should not. The
>> intent seems to be to run tasks for a given service one-at-a-time
>> until its ServiceId has been set.
>>
>>
>>    ServiceDiscoveryManager.CacheTask
>>
>> Most subclasses inherit a "return false;" runAfter. The exceptions are
>> ServiceIdTask, its subclasses, and LookupTask. Both have sequence
>> number dependencies. It is not yet clear whether
>> ServiceDiscoveryManager is ensuring that tasks enter the TaskManager
>> in sequence number order. If it does, the code is correct, but wastes
>> time with a trivially true check. If not, the code is incorrect
>> relative to the comments, which seem to expect order.
>>
>>
>>
>>
>>
>>
>


Re: Next steps after 2.2.1 release

Posted by Dan Creswell <da...@gmail.com>.
Peter,

I shall remind you of your statement elsewhere about behaviour in public.
Dude, I know you're a much better person that the below suggests.

Perhaps you wrote it in anger or frustration or fatigue or some
combination. Nevertheless it doesn't come off well and would point at you
needing to do just as much development of leadership skills as you assert
is required for Greg.

Trust has to be earned just as much as granted. It starts from respect and
quality dialogue.

On 7 April 2013 22:54, Peter <ji...@zeus.net.au> wrote:

> Greg, why have you repeated this message?
>
> I think this is a deliberate attack on the project because you haven't
> been following development in trunk and now you're scared because you see
> changes you don't understand.
>
> I've been following your developments in surrogates, an impressive amount
> of productivity.  Although I think you should consider upgrading
> apache.commons vfs to version 2 before releasing.
>
> Open your mind and ask questions, the code isn't set in stone, you have an
> obligation as project lead to encourage and nurture development, not stifle
> it.
>
> You strike me as someone who's a very good programmer, but still learning
> leadership because you lack faith in others and must do everything
> yourself.  Remember I offered to assist with Surrogates, but you wanted to
> work alone?
>
> You need to let go and give others a go too.
>
> How you handle this matter will be a test for your own personal
> development and an opportunity to grow as a leader.
>
> You also hold the future of this project in your hands, so I hope you find
> strength to let go.
>
> Regards,
>
> Peter.
>
> ----- Original message -----
> >
> > OK, so in my last message I talked about how (speaking only for myself)
> I'm a
> > little nervous about the state of the trunk.
> >
> > So what now?
> >
> > Problems we need to avoid in this discussion:
> > -------------------------------------------------------------
> >
> > - Conflation of source tree structure issues with build tool selection.
> > - Conflation of Maven build, Maven as codebase provider (artifact urls),
> and
> > posting artifacts to Maven Central - Wish lists of pet features
> > - Bruised egos and personal criticisms.
> >
> > Issues I see, in no particular order:
> > ----------------------------------------------
> > - We've done changes both to the test framework and the code, and lots
> of them.
> > We should do one or the other, or small amounts of coevolution, if
> absolutely
> > necessary. - Really, I'd like to see a completely separate integration
> test, and
> > have the TCK tests separated out again. - The source tree is
> incomprehensible -
> > The tests appear to be awfully sensitive to their environment.  Insofar
> as when
> > I run them locally on an untouched source tree, I get 280 failures. -
> There have
> > been changes to class loading and security subsystems.  These subsystems
> are
> > core to Jini, and the changes were made to the existing source, so
> there's no
> > way to "opt-out" of the changes.  I'd like to see radical changes be
> optional
> > until proven in the field, where possible.  In the case of policy
> providers and
> > class loaders, that should be easy to do. - Similarly, it seems there
> have been
> > some changes to the JERI framework. - There are ".jar" files in our
> repository.
> > I'll stipulate that the licensing has been checked, but it smells bad.
> >
> > Discussion
> > -----------------
> > I guess the biggest thing I'd like to see is stability in the test
> framework.
> > Perhaps it needs refactoring or reorganization, but if so, we need to be
> very
> > careful to separate it from changes to the core functionality.
> >
> > Next, I'd like for it to be easier to comprehend the source tree.  I
> think a
> > good way to do that is to separate out (carefully) the core Jini package
> > (basically the contents of jsk-platform.jar) and the service
> implementations.
> > There's no reason that we have to have one huge
> everything-but-the-kitchen-sink
> > distribution.  That's just a holdover from how Sun structured the JTSK -
> It was
> > literally a "starter kit".  To me it would be fine to have separate
> deliverables
> > for the platform and the services.
> >
> > While we're separating out the services, it might also be a decent time
> to
> > implement Maven-based builds if we think that's a good idea.  I'd start
> with
> > Reggie.  It would also be a good time to get rid of the "com.sun.jini"
> packages.
> >
> > Aside:  I'm personally ambivalent on Maven (which is to say I'm nowhere
> near as
> > negative on it as I once was).  I do agree with Dennis, though, that the
> jars
> > and appropriate poms need to be published to Maven Central.  There's no
> doubt
> > that users will appreciate that.
> >
> > Once we have a stable set of regression tests, then OK, we could think
> about
> > improving performance or using Maven repositories as the codebase server.
> >
> > I realize this won't be popular, but my gut feel is that we need to step
> back to
> > the 2.2 branch and retrace our steps a little, and go through the
> evolution
> > again in a more measured fashion.
> >
> > Proposal
> > ------------
> >
> > 1 - Release version 2.2.1 from the 2.2 branch.
> > 2 - Create a separate source tree for the test framework.  This could
> come from
> > the "qa_refactor" branch, but the goal should be to successfully test
> the 2.2.1
> > release.  Plus it should be a no-brainer to pull it down and run it on a
> local
> > machine. 3 - Release 2.2.2 from the pruned jtsk tree.  Release 1.0.0 of
> the test
> > framework. 4 - Pull out the infrastructure service implementations
> (Reggie,
> > Outrigger, Norm, etc) from the core into separate products.  Release
> 1.0.0 on
> > each of them.  Release 2.2.3 from the pruned jtsk tree. 5 - Adopt a fixed
> > release cycle.  Not sure if it should be quarterly or biennial, or
> whether it
> > should be all products at once or staggered releases.  We'll need to
> discuss. 6
> > - Then we can start making changes if necessary to the individual
> products.  And
> > also try to deal with making it easier for new users to use the
> technology.
> >
> > So there you go.  Opinions?
> >
> > Greg Trasuk.
> >
>
>

Re: Next steps after 2.2.1 release

Posted by Patricia Shanahan <pa...@acm.org>.
On 4/7/2013 5:03 PM, Greg Trasuk wrote:
...
> I'm honestly and truly not passing judgement on the quality of the
> code.  I honestly don't know if it's good or bad.  I have to confess
> that, given that Jini was written as a top-level project at Sun,
> sponsored by Bill Joy, when Sun was at the top of its game, and the Jini
> project team was a "who's-who" of distributed computing pioneers, the
> idea that it's riddled with concurrency bugs surprises me.  But mainly,
> I'm still trying to answer that question - "How do I know if it's good?"
...

I don't know whether it has concurrency bugs, and that is a problem in 
its own right. The theory of why does not suffer from concurrency 
problems is nowhere near clear.

I have no faith in the infallibility of Sun developers, because I used 
to be one. Some of them were very, very smart, but those were not 
necessarily the ones writing every line of code. The issue is not the 
distributed system design, but details of coding that may be leading to 
local concurrency problems within a program.

I am a little worried that my doubts about RetryTask may lead to over 
focus on that issue. It should be considered as a candidate, but I was 
never able to become certain there was a bug involving it. If I had, I 
would have created an issue for it and fixed it.

Patricia


Re: Next steps after 2.2.1 release

Posted by Peter <ji...@zeus.net.au>.
----- Original message -----
>
> On Sun, 2013-04-07 at 17:54, Peter wrote:
> > Greg, why have you repeated this message?
> >
>
> First time I sent it was from the wrong email address, so it got hung up
> in moderation.  I sent it again from my subscribed address.  I'm
> guessing someone just moderated the original through.
>
>

My apologies, it gave me the impression you were escallating an argument to roll trunk back 3 years.  

Unfortunately the tests can't prove the absence of errors, concurrency problems can lie dormant for years.  The tests passed previously with inadequate synchronization, it's plausible that client code could also have inadequate synchronization and experience issues.

There are a number of Jira's I need to follow up on, these known issues may be related to the random failures, one in particular explains how unsynchronized access is used to avoid deadlock:

River-145
River-348
River-258
River-140
River-113
River-43
River-37
River-30 (includes patch)

The tests can be run against previous releases to simulate an environment where only the test code has changed.



> Anyway, let's address one or two of your points...
>
> I see you writing inflammatory statements about my leadership skills and
> I think you're  upset because you think I was questioning the quality of
> your work. I understand.  You've put a lot of effort into the codebase.
>
> I feel sorry that you feel that way - it wasn't what I intended.
>
> Apache doesn't recognize any kind of a "project leader" position, and I
> don't pretend to hold any such influence over River.  I'm speaking as a
> committer and PMC member.  I certainly don't think I "hold the future of
> the project in my hands".  If anyone does hold individual control over
> the future of the project, then it doesn't qualify as an Apache project,
> and we need to remedy that.
>
> Really, what I'm trying to do is answer this question for myself - "Can
> I vote +1 on a release based on the trunk?".  There have been a lot of
> changes to the trunk code.  Yes, many that I don't understand.  I've
> done more management than you thnk.  I don't require that I understand
> everything.  That leads me to ask "How can I be confident about a
> release?"
>
> The best answer I have is to ask "does it pass the regression tests?".
> But that implies another question - "Do I trust the tests?"  And the
> answer to that is "currently, no, because from what I can see there have
> also been changes to the tests".
>
> I'm honestly and truly not passing judgement on the quality of the
> code.  I honestly don't know if it's good or bad.  I have to confess
> that, given that Jini was written as a top-level project at Sun,
> sponsored by Bill Joy, when Sun was at the top of its game, and the Jini
> project team was a "who's-who" of distributed computing pioneers, the
> idea that it's riddled with concurrency bugs surprises me.  But mainly,
> I'm still trying to answer that question - "How do I know if it's good?"
>
> Here's what I'm doing:
>
> - I'm attempting to run the tests from "tags/2.2.0" against the "2.2"
> branch.  When I have confidence in the "2.2" branch, I'll publish the
> results, ask anyone else who's interested to test it, and then call for
> a release on "2.2.1"
> - After that, the developers need to reach consensus about how to move
> forward.
>
> Cheers,
>
> Greg.
>
>
>
> > I think this is a deliberate attack on the project because you haven't
> > been following development in trunk and now you're scared because you
> > see changes you don't understand.
> >
> > I've been following your developments in surrogates, an impressive
> > amount of productivity.  Although I think you should consider
> > upgrading apache.commons vfs to version 2 before releasing.
> >
> > Open your mind and ask questions, the code isn't set in stone, you
> > have an obligation as project lead to encourage and nurture
> > development, not stifle it.
> >
> > You strike me as someone who's a very good programmer, but still
> > learning leadership because you lack faith in others and must do
> > everything yourself.  Remember I offered to assist with Surrogates,
> > but you wanted to work alone?
> >
> > You need to let go and give others a go too.
> >
> > How you handle this matter will be a test for your own personal
> > development and an opportunity to grow as a leader.
> >
> > You also hold the future of this project in your hands, so I hope you
> > find strength to let go.
> >
> > Regards,
> >
> > Peter.
> >
> > ----- Original message -----
> > >
> > > OK, so in my last message I talked about how (speaking only for
> > myself) I'm a
> > > little nervous about the state of the trunk.
> > >
> > > So what now?
> > >
> > > Problems we need to avoid in this discussion:
> > > -------------------------------------------------------------
> > >
> > > - Conflation of source tree structure issues with build tool
> > selection.
> > > - Conflation of Maven build, Maven as codebase provider (artifact
> > urls), and
> > > posting artifacts to Maven Central - Wish lists of pet features
> > > - Bruised egos and personal criticisms.
> > >
> > > Issues I see, in no particular order:
> > > ----------------------------------------------
> > > - We've done changes both to the test framework and the code, and
> > lots of them.
> > > We should do one or the other, or small amounts of coevolution, if
> > absolutely
> > > necessary. - Really, I'd like to see a completely separate
> > integration test, and
> > > have the TCK tests separated out again. - The source tree is
> > incomprehensible -
> > > The tests appear to be awfully sensitive to their environment.
> > Insofar as when
> > > I run them locally on an untouched source tree, I get 280 failures.
> > - There have
> > > been changes to class loading and security subsystems.  These
> > subsystems are
> > > core to Jini, and the changes were made to the existing source, so
> > there's no
> > > way to "opt-out" of the changes.  I'd like to see radical changes be
> > optional
> > > until proven in the field, where possible.  In the case of policy
> > providers and
> > > class loaders, that should be easy to do. - Similarly, it seems
> > there have been
> > > some changes to the JERI framework. - There are ".jar" files in our
> > repository.
> > > I'll stipulate that the licensing has been checked, but it smells
> > bad.
> > >
> > > Discussion
> > > -----------------
> > > I guess the biggest thing I'd like to see is stability in the test
> > framework.
> > > Perhaps it needs refactoring or reorganization, but if so, we need
> > to be very
> > > careful to separate it from changes to the core functionality.
> > >
> > > Next, I'd like for it to be easier to comprehend the source tree.  I
> > think a
> > > good way to do that is to separate out (carefully) the core Jini
> > package
> > > (basically the contents of jsk-platform.jar) and the service
> > implementations.
> > > There's no reason that we have to have one huge
> > everything-but-the-kitchen-sink
> > > distribution.  That's just a holdover from how Sun structured the
> > JTSK - It was
> > > literally a "starter kit".  To me it would be fine to have separate
> > deliverables
> > > for the platform and the services.
> > >
> > > While we're separating out the services, it might also be a decent
> > time to
> > > implement Maven-based builds if we think that's a good idea.  I'd
> > start with
> > > Reggie.  It would also be a good time to get rid of the
> > "com.sun.jini" packages.
> > >
> > > Aside:  I'm personally ambivalent on Maven (which is to say I'm
> > nowhere near as
> > > negative on it as I once was).  I do agree with Dennis, though, that
> > the jars
> > > and appropriate poms need to be published to Maven Central.  There's
> > no doubt
> > > that users will appreciate that.
> > >
> > > Once we have a stable set of regression tests, then OK, we could
> > think about
> > > improving performance or using Maven repositories as the codebase
> > server.
> > >
> > > I realize this won't be popular, but my gut feel is that we need to
> > step back to
> > > the 2.2 branch and retrace our steps a little, and go through the
> > evolution
> > > again in a more measured fashion.
> > >
> > > Proposal
> > > ------------
> > >
> > > 1 - Release version 2.2.1 from the 2.2 branch.
> > > 2 - Create a separate source tree for the test framework.  This
> > could come from
> > > the "qa_refactor" branch, but the goal should be to successfully
> > test the 2.2.1
> > > release.  Plus it should be a no-brainer to pull it down and run it
> > on a local
> > > machine. 3 - Release 2.2.2 from the pruned jtsk tree.  Release 1.0.0
> > of the test
> > > framework. 4 - Pull out the infrastructure service implementations
> > (Reggie,
> > > Outrigger, Norm, etc) from the core into separate products.  Release
> > 1.0.0 on
> > > each of them.  Release 2.2.3 from the pruned jtsk tree. 5 - Adopt a
> > fixed
> > > release cycle.  Not sure if it should be quarterly or biennial, or
> > whether it
> > > should be all products at once or staggered releases.  We'll need to
> > discuss. 6
> > > - Then we can start making changes if necessary to the individual
> > products.  And
> > > also try to deal with making it easier for new users to use the
> > technology.
> > >
> > > So there you go.  Opinions?
> > >
> > > Greg Trasuk.
> > >
> >
>


Re: Next steps after 2.2.1 release

Posted by Tom Hobbs <tv...@googlemail.com>.
I'm not sure where I stand with regards to votes and what-not anymore, but
here's my opinion.  I think it's wise to do a quick release with the
minimum of changes in Right Now.  Particularly if those changes includes
the JDK7 fix.  Releasing as many changes as there are without more
consideration just feels too risky.  The greater need right now is to get
the bare minimum release out asap to fix some real-life issues.

That's not a comment on what I think the quality of all the trunk changes
are, it's just my gut feel.

To counter some of Greg's comments, if we trusted the old tests for
previous releases, then digging our way out of the hole is not hugely
difficult.  We just run the old tests against the new code to verify the
new code.  Then we run the new tests against an old release.  There will be
some additional due diligence needed to tie up loose ends and un-grey some
areas, but I think we can then have a good degree of confidence in both the
new tests and new code.

When that's done, I would suggest that it sounds like another release would
be due.  Then the source tree can be straightened out with the right bits
merged to the right branches and the right branches being created for the
right work streams.

I don't think that there is much value in debating the motives people have
had for the code they've written/changed.  As has been said before, we've
all got our own itches to scratch.  If there is a technical reason for
blocking some change then fine, but I don't believe that a lack of
benchmarks detailing some pain point is a reason to throw it out.
 Questioning the Why is often useful because it can aid a discussion and
guide us to what the real What should be, but I'm not sure that it will in
the case - then again, I've been wrong before...  So I think that it is a
good approach to modify the code so it follows the advice given in what
many would consider the "Concurrency Bible" - even if I can't prove that
the previous implementation was flawed in some way.

Dan has mentioned the policy (or lack of) with regards to what gets put
onto trunk and that is something that should really be discussed.  So some
questions;

- Is there a policy?
- Was it the right policy?
- Did we stick to the policy?
- What is a better policy?

Then everything else from Maven, to separate builds, to TaskManager
replacements (or not), to whatever else should just slot into place.

Lastly, I'm glad that the conversation has calmed down.  Thanks to both
Greg for not biting and Peter for responding in kind.  Your reactions to
what could have become a nasty situation speaks volumes to both of your
characters and that's A Good Thing.

Cheers,

Tom


On Mon, Apr 8, 2013 at 2:42 PM, Dan Creswell <da...@gmail.com> wrote:

> > This is an important issue to address.  I know a lot of people here
> >> probably don't participate on the Concurrency-interest mailing list that
> >> has a wide range of discussion about the JLS vs the JMM and what the JIT
> >> compilers actually do to code these days.
> >>
> > ...
> >
> > I used to be a concurrency expert, but have not been following the topic
> > recently. For practical Java coding, I have tended to follow the ideas
> > in Java Concurrency in Practice. Do any of the changes invalidate that
> > approach?
> >
> >
> No, they don't. The JMM hasn't really changed since the work Doug Lea did
> for Java 5 and beyond. What has changed over time is the amount typical
> JITs exploit the opportunities presented by the JMM for aggressive
> instruction re-ordering etc.
>
> If your code "does the right things" it'll be fine. It just potentially
> runs better (it could actually run worse in some cases). If you've
> misunderstood JMM or how it relates to JLS then you may have problems.
>

Re: Next steps after 2.2.1 release

Posted by Dan Creswell <da...@gmail.com>.
> This is an important issue to address.  I know a lot of people here
>> probably don't participate on the Concurrency-interest mailing list that
>> has a wide range of discussion about the JLS vs the JMM and what the JIT
>> compilers actually do to code these days.
>>
> ...
>
> I used to be a concurrency expert, but have not been following the topic
> recently. For practical Java coding, I have tended to follow the ideas
> in Java Concurrency in Practice. Do any of the changes invalidate that
> approach?
>
>
No, they don't. The JMM hasn't really changed since the work Doug Lea did
for Java 5 and beyond. What has changed over time is the amount typical
JITs exploit the opportunities presented by the JMM for aggressive
instruction re-ordering etc.

If your code "does the right things" it'll be fine. It just potentially
runs better (it could actually run worse in some cases). If you've
misunderstood JMM or how it relates to JLS then you may have problems.

Re: Next steps after 2.2.1 release

Posted by Patricia Shanahan <pa...@acm.org>.
On 4/8/2013 6:11 AM, Gregg Wonderly wrote:
> On 4/7/2013 7:03 PM, Greg Trasuk wrote:
>> I'm honestly and truly not passing judgement on the quality of the
>> code. I honestly don't know if it's good or bad. I have to confess
>> that, given that Jini was written as a top-level project at Sun,
>> sponsored by Bill Joy, when Sun was at the top of its game, and the
>> Jini project team was a "who's-who" of distributed computing pioneers,
>> the idea that it's riddled with concurrency bugs surprises me. But
>> mainly, I'm still trying to answer that question - "How do I know if
>> it's good?" Here's what I'm doing: - I'm attempting to run the tests
>> from "tags/2.2.0" against the "2.2" branch. When I have confidence in
>> the "2.2" branch, I'll publish the results, ask anyone else who's
>> interested to test it, and then call for a release on "2.2.1" - After
>> that, the developers need to reach consensus about how to move
>> forward. Cheers, Greg.
>
> This is an important issue to address.  I know a lot of people here
> probably don't participate on the Concurrency-interest mailing list that
> has a wide range of discussion about the JLS vs the JMM and what the JIT
> compilers actually do to code these days.
...

I used to be a concurrency expert, but have not been following the topic
recently. For practical Java coding, I have tended to follow the ideas
in Java Concurrency in Practice. Do any of the changes invalidate that
approach?

Patricia



Re: Next steps after 2.2.1 release

Posted by Patricia Shanahan <pa...@acm.org>.
On 4/11/2013 4:15 AM, Peter Firmstone wrote:
...
> I know some would prefer me to prove something is broken before fixing
> it, providing tests that prove the failure, but this isn't an enterprise
> project and I lack the resources for such things, there's always the
> option of running a 2.2 maintenance branch for those who'd like to wait
> longer before upgrading.
...

I'm still trying to remember/find a place where there seemed to me to be 
a possible race condition.

Back when I was actively working on this, I considered trying to set up 
a test, and all I could think of was to put a moderately long 
Thread.sleep() call in the code where I thought there was a window. I 
was not able to produce a failing test.

Patricia


Re: Next steps after 2.2.1 release

Posted by Peter Firmstone <ji...@zeus.net.au>.
Gregg,

Thanks again for your support.

I refactored LookupDiscovery and tidied up LookupLocatorDiscovery.

If you get some time, I could use a hand with other classes you've 
already fixed.

I'm working on MailboxImpl presently, there's some very dubious code, 
Threads being started from inside their constructors, called from static 
init methods from within MailboxImpl's constructor.

I know some would prefer me to prove something is broken before fixing 
it, providing tests that prove the failure, but this isn't an enterprise 
project and I lack the resources for such things, there's always the 
option of running a 2.2 maintenance branch for those who'd like to wait 
longer before upgrading.

Regards,

Peter.

On 11/04/2013 2:00 AM, Gregg Wonderly wrote:
> I just want to extend this conversation a bit by saying that nearly everything about River is "concurrently accessed".  There are, of course several places, where work is done by one thread, at a time, but new threads are created to do that work, and that means that "visibility" has to be considered.
>
> I won't say that every single field in every class in River needs to be final or volatile, but that should not be considered an extreme.  Specifically, you might see code execute just fine without appropriate concurrency design, and then it will suddenly break when a new optimization appears on the scene, reordering something under the covers and creating an intangible behavior.  Some "visibility bugs" might not ever manifest because of other "happens before" and "cache line sync" activities that happen implicitly based on the "current design" or "thread model".  We can "be happy" with "it ain't broke, so don't fix it", but I don't think that's very productive.
>
> I personally, have been beating on various parts of Jini in my "fork" because of completely unpredictable results in discovery and discovery management.  I've written, rewritten, debugged and stared at that code till I was blue in the face, because my ServiceUI desktop application just doesn't behave like it should.  Some of it is missing lifecycle management that was not in the original services, because System.registerShutdownHook() hasn't been used.  But other parts are these race conditions and thread scheduling overlaps (or underlaps) which keep discovery and notification from happening reliably.   There are lots of different reasons why people might not be "complaining" about this stuff, but I would contend that the fact that there are many examples of people forking and extending Jini, which to me, reflects the fact that there are things that aren't correct, or functional in the wild, and this causes them to jump over the cliff and never look back.
>
> We are at that point today, and Peter's continued slogging through the motions to track down and discover where the issues actually are, is an astronomical effort!  I have been very involved in several different, new work opportunities that have kept me from jumping in to participate in dealing with all of these issues, as I have really wanted to.
>
> Gregg Wonderly
>
> On Apr 8, 2013, at 3:19 PM, Peter<ji...@zeus.net.au>  wrote:
>
>> Thanks Gregg,
>>
>> You've hit the nail on the head, this is exactly the issue I'm having.
>>
>> So I've been fixing safe publication in constructors by making fields final or volatile and ensuring "this" doesn't escape, fixing synchronisation on collections etc during method calls.
>>
>> To fix deadlock, I investigate immutable non blocking data structures with volatile publication, if future state doesn't depend on previous state, if it does a CAS atomic reference can be used instead of volatile.
>>
>> Often i find synchronization is quite acceptable if it is limited in scope, if synchronized or holding a lock while a thread is executing outside your objects scope of control, that's when deadlock is more likely to occur.
>>
>> The polciy providers were deadlock prone, which is why they're mostly immutable non blocking now, any synchronization or locking is limited.
>>
>> I basically follow Doug Lea's concurrency in practise guidelines.
>>
>> For debugging I follow Cliff Click's reccommendations.
>>
>> Unfortunately fixing concurrency bugs means finding a trace of execution, identifying all classes and inspecting the code visually.  Findbugs identifies cases of inadequate sychronization using static analysis.
>>
>> Regards,
>>
>> Peter.
>>
>> ----- Original message -----
>>> On 4/7/2013 7:03 PM, Greg Trasuk wrote:
>>>> I'm honestly and truly not passing judgement on the quality of the code. I
>>>> honestly don't know if it's good or bad. I have to confess that, given that
>>>> Jini was written as a top-level project at Sun, sponsored by Bill Joy, when
>>>> Sun was at the top of its game, and the Jini project team was a "who's-who" of
>>>> distributed computing pioneers, the idea that it's riddled with concurrency
>>>> bugs surprises me. But mainly, I'm still trying to answer that question - "How
>>>> do I know if it's good?" Here's what I'm doing: - I'm attempting to run the
>>>> tests from "tags/2.2.0" against the "2.2" branch. When I have confidence in
>>>> the "2.2" branch, I'll publish the results, ask anyone else who's interested
>>>> to test it, and then call for a release on "2.2.1" - After that, the
>>>> developers need to reach consensus about how to move forward. Cheers, Greg.
>>> This is an important issue to address.  I know a lot of people here probably
>>> don't participate on the Concurrency-interest mailing list that has a wide range
>>> of discussion about the JLS vs the JMM and what the JIT compilers actually do to
>>> code these days.
>>>
>>> The number one issue that you need to understand, is that the optimizer is
>>> working against you more and more these days if you don't have JMM details
>>> exactly write.  Statements are being reordered more and more, including actual
>>> "assignments" which can expose uninitialized data items in "racy" concurrent
>>> code.  The latest example is the  Thread.setName()/Thread.getName() pair.  They
>>> are most likely always to be accessed by "other threads", yet there is no
>>> synchronization on them, including no "visibility" control with volatile even.
>>> What this means, is that if setName() and getName() are being called in a racy
>>> environment, the setName, will assign the array that is created to copy the
>>> characters into, before the arraycopy of the data occurs, potentially exposing
>>> an uninitialized name to getName().
>>>
>>> There are literally hundreds of places in the JDK that still have these kinds of
>>> races going on, and no one at Oracle, based on how people are acting, appears to
>>> be responsible for dealing with it. The Jini code, has many many of the same
>>> issues that just randomly appear in stress cases on "slower" or "faster"
>>> hardware, depending on the issue.
>>>
>>> When you haven't got sharing and visibility covered correctly, the JIT code
>>> rewrites can make execution order play a big part in conflating what you "see"
>>> happening verses what the "code" says, to you, should happen.
>>>
>>> There are some very simple things to get the JIT out of the picture.  One of
>>> these, is to actually open the source up in an IDE and declare every field
>>> final.  If that doesn't work due to 'mutation' of values, change those fields to
>>> 'volatile' so that it will compile again.    Then run your tests and you will now
>>> greatly diminish reordering and visibility issues so that you can just get to
>>> the simple "was it set correctly, before it was read" and "did we provide the
>>> correct atomicity for that update" kinds of questions that will help you
>>> understand things better when code is misbehaving.
>>>
>>> This is the kind of thing that Peter has been working through because the usage
>>> of the code in real life has not continued in the same way that it did when the
>>> code was written, and the JMM in JDK5 has literally broken so much software, all
>>> over the planet, that used to work quite well, because there wasn't a formal
>>> definition of "happens before".    Now that there is, the compiler optimizations
>>> are against you if you don't get it right.  The behaviors you will experience,
>>> because of reorderings that are targeted at all out performance (minimize
>>> traffic in and out of the CPU through memory subsystems), can create completely
>>> unexpected results.  Intra-thread semantics are kept correct, but inter-thread
>>> execution will just seem intangible because stuff will not be happening in the
>>> order the "code" says it should.
>>>
>>> Gregg Wonderly
>>>



Re: Next steps after 2.2.1 release

Posted by Gregg Wonderly <ge...@cox.net>.
I just want to extend this conversation a bit by saying that nearly everything about River is "concurrently accessed".  There are, of course several places, where work is done by one thread, at a time, but new threads are created to do that work, and that means that "visibility" has to be considered.

I won't say that every single field in every class in River needs to be final or volatile, but that should not be considered an extreme.  Specifically, you might see code execute just fine without appropriate concurrency design, and then it will suddenly break when a new optimization appears on the scene, reordering something under the covers and creating an intangible behavior.  Some "visibility bugs" might not ever manifest because of other "happens before" and "cache line sync" activities that happen implicitly based on the "current design" or "thread model".  We can "be happy" with "it ain't broke, so don't fix it", but I don't think that's very productive.

I personally, have been beating on various parts of Jini in my "fork" because of completely unpredictable results in discovery and discovery management.  I've written, rewritten, debugged and stared at that code till I was blue in the face, because my ServiceUI desktop application just doesn't behave like it should.  Some of it is missing lifecycle management that was not in the original services, because System.registerShutdownHook() hasn't been used.  But other parts are these race conditions and thread scheduling overlaps (or underlaps) which keep discovery and notification from happening reliably.   There are lots of different reasons why people might not be "complaining" about this stuff, but I would contend that the fact that there are many examples of people forking and extending Jini, which to me, reflects the fact that there are things that aren't correct, or functional in the wild, and this causes them to jump over the cliff and never look back.

We are at that point today, and Peter's continued slogging through the motions to track down and discover where the issues actually are, is an astronomical effort!  I have been very involved in several different, new work opportunities that have kept me from jumping in to participate in dealing with all of these issues, as I have really wanted to.  

Gregg Wonderly

On Apr 8, 2013, at 3:19 PM, Peter <ji...@zeus.net.au> wrote:

> Thanks Gregg,
> 
> You've hit the nail on the head, this is exactly the issue I'm having.
> 
> So I've been fixing safe publication in constructors by making fields final or volatile and ensuring "this" doesn't escape, fixing synchronisation on collections etc during method calls.
> 
> To fix deadlock, I investigate immutable non blocking data structures with volatile publication, if future state doesn't depend on previous state, if it does a CAS atomic reference can be used instead of volatile.
> 
> Often i find synchronization is quite acceptable if it is limited in scope, if synchronized or holding a lock while a thread is executing outside your objects scope of control, that's when deadlock is more likely to occur.
> 
> The polciy providers were deadlock prone, which is why they're mostly immutable non blocking now, any synchronization or locking is limited.
> 
> I basically follow Doug Lea's concurrency in practise guidelines.
> 
> For debugging I follow Cliff Click's reccommendations.
> 
> Unfortunately fixing concurrency bugs means finding a trace of execution, identifying all classes and inspecting the code visually.  Findbugs identifies cases of inadequate sychronization using static analysis.
> 
> Regards,
> 
> Peter.
> 
> ----- Original message -----
>> On 4/7/2013 7:03 PM, Greg Trasuk wrote:
>>> I'm honestly and truly not passing judgement on the quality of the code. I
>>> honestly don't know if it's good or bad. I have to confess that, given that
>>> Jini was written as a top-level project at Sun, sponsored by Bill Joy, when
>>> Sun was at the top of its game, and the Jini project team was a "who's-who" of
>>> distributed computing pioneers, the idea that it's riddled with concurrency
>>> bugs surprises me. But mainly, I'm still trying to answer that question - "How
>>> do I know if it's good?" Here's what I'm doing: - I'm attempting to run the
>>> tests from "tags/2.2.0" against the "2.2" branch. When I have confidence in
>>> the "2.2" branch, I'll publish the results, ask anyone else who's interested
>>> to test it, and then call for a release on "2.2.1" - After that, the
>>> developers need to reach consensus about how to move forward. Cheers, Greg.
>> 
>> This is an important issue to address.  I know a lot of people here probably
>> don't participate on the Concurrency-interest mailing list that has a wide range
>> of discussion about the JLS vs the JMM and what the JIT compilers actually do to
>> code these days.
>> 
>> The number one issue that you need to understand, is that the optimizer is
>> working against you more and more these days if you don't have JMM details
>> exactly write.  Statements are being reordered more and more, including actual
>> "assignments" which can expose uninitialized data items in "racy" concurrent
>> code.  The latest example is the  Thread.setName()/Thread.getName() pair.  They
>> are most likely always to be accessed by "other threads", yet there is no
>> synchronization on them, including no "visibility" control with volatile even. 
>> What this means, is that if setName() and getName() are being called in a racy
>> environment, the setName, will assign the array that is created to copy the
>> characters into, before the arraycopy of the data occurs, potentially exposing
>> an uninitialized name to getName().
>> 
>> There are literally hundreds of places in the JDK that still have these kinds of
>> races going on, and no one at Oracle, based on how people are acting, appears to
>> be responsible for dealing with it. The Jini code, has many many of the same
>> issues that just randomly appear in stress cases on "slower" or "faster"
>> hardware, depending on the issue.
>> 
>> When you haven't got sharing and visibility covered correctly, the JIT code
>> rewrites can make execution order play a big part in conflating what you "see"
>> happening verses what the "code" says, to you, should happen.
>> 
>> There are some very simple things to get the JIT out of the picture.  One of
>> these, is to actually open the source up in an IDE and declare every field
>> final.  If that doesn't work due to 'mutation' of values, change those fields to
>> 'volatile' so that it will compile again.    Then run your tests and you will now
>> greatly diminish reordering and visibility issues so that you can just get to
>> the simple "was it set correctly, before it was read" and "did we provide the
>> correct atomicity for that update" kinds of questions that will help you
>> understand things better when code is misbehaving.
>> 
>> This is the kind of thing that Peter has been working through because the usage
>> of the code in real life has not continued in the same way that it did when the
>> code was written, and the JMM in JDK5 has literally broken so much software, all
>> over the planet, that used to work quite well, because there wasn't a formal
>> definition of "happens before".    Now that there is, the compiler optimizations
>> are against you if you don't get it right.  The behaviors you will experience,
>> because of reorderings that are targeted at all out performance (minimize
>> traffic in and out of the CPU through memory subsystems), can create completely
>> unexpected results.  Intra-thread semantics are kept correct, but inter-thread
>> execution will just seem intangible because stuff will not be happening in the
>> order the "code" says it should.
>> 
>> Gregg Wonderly
>> 
> 


Re: Next steps after 2.2.1 release

Posted by Peter <ji...@zeus.net.au>.
Thanks Gregg,

You've hit the nail on the head, this is exactly the issue I'm having.

So I've been fixing safe publication in constructors by making fields final or volatile and ensuring "this" doesn't escape, fixing synchronisation on collections etc during method calls.

To fix deadlock, I investigate immutable non blocking data structures with volatile publication, if future state doesn't depend on previous state, if it does a CAS atomic reference can be used instead of volatile.

Often i find synchronization is quite acceptable if it is limited in scope, if synchronized or holding a lock while a thread is executing outside your objects scope of control, that's when deadlock is more likely to occur.

The polciy providers were deadlock prone, which is why they're mostly immutable non blocking now, any synchronization or locking is limited.

I basically follow Doug Lea's concurrency in practise guidelines.

For debugging I follow Cliff Click's reccommendations.

Unfortunately fixing concurrency bugs means finding a trace of execution, identifying all classes and inspecting the code visually.  Findbugs identifies cases of inadequate sychronization using static analysis.

Regards,

Peter.

----- Original message -----
> On 4/7/2013 7:03 PM, Greg Trasuk wrote:
> > I'm honestly and truly not passing judgement on the quality of the code. I
> > honestly don't know if it's good or bad. I have to confess that, given that
> > Jini was written as a top-level project at Sun, sponsored by Bill Joy, when
> > Sun was at the top of its game, and the Jini project team was a "who's-who" of
> > distributed computing pioneers, the idea that it's riddled with concurrency
> > bugs surprises me. But mainly, I'm still trying to answer that question - "How
> > do I know if it's good?" Here's what I'm doing: - I'm attempting to run the
> > tests from "tags/2.2.0" against the "2.2" branch. When I have confidence in
> > the "2.2" branch, I'll publish the results, ask anyone else who's interested
> > to test it, and then call for a release on "2.2.1" - After that, the
> > developers need to reach consensus about how to move forward. Cheers, Greg.
>
> This is an important issue to address.  I know a lot of people here probably
> don't participate on the Concurrency-interest mailing list that has a wide range
> of discussion about the JLS vs the JMM and what the JIT compilers actually do to
> code these days.
>
> The number one issue that you need to understand, is that the optimizer is
> working against you more and more these days if you don't have JMM details
> exactly write.  Statements are being reordered more and more, including actual
> "assignments" which can expose uninitialized data items in "racy" concurrent
> code.  The latest example is the  Thread.setName()/Thread.getName() pair.  They
> are most likely always to be accessed by "other threads", yet there is no
> synchronization on them, including no "visibility" control with volatile even. 
> What this means, is that if setName() and getName() are being called in a racy
> environment, the setName, will assign the array that is created to copy the
> characters into, before the arraycopy of the data occurs, potentially exposing
> an uninitialized name to getName().
>
> There are literally hundreds of places in the JDK that still have these kinds of
> races going on, and no one at Oracle, based on how people are acting, appears to
> be responsible for dealing with it. The Jini code, has many many of the same
> issues that just randomly appear in stress cases on "slower" or "faster"
> hardware, depending on the issue.
>
> When you haven't got sharing and visibility covered correctly, the JIT code
> rewrites can make execution order play a big part in conflating what you "see"
> happening verses what the "code" says, to you, should happen.
>
> There are some very simple things to get the JIT out of the picture.  One of
> these, is to actually open the source up in an IDE and declare every field
> final.  If that doesn't work due to 'mutation' of values, change those fields to
> 'volatile' so that it will compile again.    Then run your tests and you will now
> greatly diminish reordering and visibility issues so that you can just get to
> the simple "was it set correctly, before it was read" and "did we provide the
> correct atomicity for that update" kinds of questions that will help you
> understand things better when code is misbehaving.
>
> This is the kind of thing that Peter has been working through because the usage
> of the code in real life has not continued in the same way that it did when the
> code was written, and the JMM in JDK5 has literally broken so much software, all
> over the planet, that used to work quite well, because there wasn't a formal
> definition of "happens before".    Now that there is, the compiler optimizations
> are against you if you don't get it right.  The behaviors you will experience,
> because of reorderings that are targeted at all out performance (minimize
> traffic in and out of the CPU through memory subsystems), can create completely
> unexpected results.  Intra-thread semantics are kept correct, but inter-thread
> execution will just seem intangible because stuff will not be happening in the
> order the "code" says it should.
>
> Gregg Wonderly
>


Re: Next steps after 2.2.1 release

Posted by Gregg Wonderly <gr...@wonderly.org>.
On 4/7/2013 7:03 PM, Greg Trasuk wrote:
> I'm honestly and truly not passing judgement on the quality of the code. I 
> honestly don't know if it's good or bad. I have to confess that, given that 
> Jini was written as a top-level project at Sun, sponsored by Bill Joy, when 
> Sun was at the top of its game, and the Jini project team was a "who's-who" of 
> distributed computing pioneers, the idea that it's riddled with concurrency 
> bugs surprises me. But mainly, I'm still trying to answer that question - "How 
> do I know if it's good?" Here's what I'm doing: - I'm attempting to run the 
> tests from "tags/2.2.0" against the "2.2" branch. When I have confidence in 
> the "2.2" branch, I'll publish the results, ask anyone else who's interested 
> to test it, and then call for a release on "2.2.1" - After that, the 
> developers need to reach consensus about how to move forward. Cheers, Greg.

This is an important issue to address.  I know a lot of people here probably 
don't participate on the Concurrency-interest mailing list that has a wide range 
of discussion about the JLS vs the JMM and what the JIT compilers actually do to 
code these days.

The number one issue that you need to understand, is that the optimizer is 
working against you more and more these days if you don't have JMM details 
exactly write.  Statements are being reordered more and more, including actual 
"assignments" which can expose uninitialized data items in "racy" concurrent 
code.  The latest example is the  Thread.setName()/Thread.getName() pair.  They 
are most likely always to be accessed by "other threads", yet there is no 
synchronization on them, including no "visibility" control with volatile even.  
What this means, is that if setName() and getName() are being called in a racy 
environment, the setName, will assign the array that is created to copy the 
characters into, before the arraycopy of the data occurs, potentially exposing 
an uninitialized name to getName().

There are literally hundreds of places in the JDK that still have these kinds of 
races going on, and no one at Oracle, based on how people are acting, appears to 
be responsible for dealing with it. The Jini code, has many many of the same 
issues that just randomly appear in stress cases on "slower" or "faster" 
hardware, depending on the issue.

When you haven't got sharing and visibility covered correctly, the JIT code 
rewrites can make execution order play a big part in conflating what you "see" 
happening verses what the "code" says, to you, should happen.

There are some very simple things to get the JIT out of the picture.  One of 
these, is to actually open the source up in an IDE and declare every field 
final.  If that doesn't work due to 'mutation' of values, change those fields to 
'volatile' so that it will compile again.   Then run your tests and you will now 
greatly diminish reordering and visibility issues so that you can just get to 
the simple "was it set correctly, before it was read" and "did we provide the 
correct atomicity for that update" kinds of questions that will help you 
understand things better when code is misbehaving.

This is the kind of thing that Peter has been working through because the usage 
of the code in real life has not continued in the same way that it did when the 
code was written, and the JMM in JDK5 has literally broken so much software, all 
over the planet, that used to work quite well, because there wasn't a formal 
definition of "happens before".   Now that there is, the compiler optimizations 
are against you if you don't get it right.  The behaviors you will experience, 
because of reorderings that are targeted at all out performance (minimize 
traffic in and out of the CPU through memory subsystems), can create completely 
unexpected results.  Intra-thread semantics are kept correct, but inter-thread 
execution will just seem intangible because stuff will not be happening in the 
order the "code" says it should.

Gregg Wonderly


Re: Next steps after 2.2.1 release

Posted by Greg Trasuk <tr...@stratuscom.com>.
On Sun, 2013-04-07 at 17:54, Peter wrote:
> Greg, why have you repeated this message?
> 

First time I sent it was from the wrong email address, so it got hung up
in moderation.  I sent it again from my subscribed address.  I'm
guessing someone just moderated the original through.


Anyway, let's address one or two of your points...

I see you writing inflammatory statements about my leadership skills and
I think you're  upset because you think I was questioning the quality of
your work. I understand.  You've put a lot of effort into the codebase.

I feel sorry that you feel that way - it wasn't what I intended.

Apache doesn't recognize any kind of a "project leader" position, and I
don't pretend to hold any such influence over River.  I'm speaking as a
committer and PMC member.  I certainly don't think I "hold the future of
the project in my hands".  If anyone does hold individual control over
the future of the project, then it doesn't qualify as an Apache project,
and we need to remedy that.

Really, what I'm trying to do is answer this question for myself - "Can
I vote +1 on a release based on the trunk?".  There have been a lot of
changes to the trunk code.  Yes, many that I don't understand.  I've
done more management than you thnk.  I don't require that I understand
everything.  That leads me to ask "How can I be confident about a
release?"

The best answer I have is to ask "does it pass the regression tests?". 
But that implies another question - "Do I trust the tests?"  And the
answer to that is "currently, no, because from what I can see there have
also been changes to the tests".

I'm honestly and truly not passing judgement on the quality of the
code.  I honestly don't know if it's good or bad.  I have to confess
that, given that Jini was written as a top-level project at Sun,
sponsored by Bill Joy, when Sun was at the top of its game, and the Jini
project team was a "who's-who" of distributed computing pioneers, the
idea that it's riddled with concurrency bugs surprises me.  But mainly,
I'm still trying to answer that question - "How do I know if it's good?"

Here's what I'm doing:

- I'm attempting to run the tests from "tags/2.2.0" against the "2.2"
branch.  When I have confidence in the "2.2" branch, I'll publish the
results, ask anyone else who's interested to test it, and then call for
a release on "2.2.1"
- After that, the developers need to reach consensus about how to move
forward.

Cheers,

Greg.



> I think this is a deliberate attack on the project because you haven't
> been following development in trunk and now you're scared because you
> see changes you don't understand.
> 
> I've been following your developments in surrogates, an impressive
> amount of productivity.  Although I think you should consider
> upgrading apache.commons vfs to version 2 before releasing.
> 
> Open your mind and ask questions, the code isn't set in stone, you
> have an obligation as project lead to encourage and nurture
> development, not stifle it.
> 
> You strike me as someone who's a very good programmer, but still
> learning leadership because you lack faith in others and must do
> everything yourself.  Remember I offered to assist with Surrogates,
> but you wanted to work alone? 
> 
> You need to let go and give others a go too.
> 
> How you handle this matter will be a test for your own personal
> development and an opportunity to grow as a leader. 
> 
> You also hold the future of this project in your hands, so I hope you
> find strength to let go.
> 
> Regards,
> 
> Peter.
> 
> ----- Original message -----
> >
> > OK, so in my last message I talked about how (speaking only for
> myself) I'm a
> > little nervous about the state of the trunk.
> >
> > So what now? 
> >
> > Problems we need to avoid in this discussion:
> > -------------------------------------------------------------
> >
> > - Conflation of source tree structure issues with build tool
> selection.
> > - Conflation of Maven build, Maven as codebase provider (artifact
> urls), and
> > posting artifacts to Maven Central - Wish lists of pet features
> > - Bruised egos and personal criticisms.
> >
> > Issues I see, in no particular order:
> > ----------------------------------------------
> > - We've done changes both to the test framework and the code, and
> lots of them.
> > We should do one or the other, or small amounts of coevolution, if
> absolutely
> > necessary. - Really, I'd like to see a completely separate
> integration test, and
> > have the TCK tests separated out again. - The source tree is
> incomprehensible -
> > The tests appear to be awfully sensitive to their environment. 
> Insofar as when
> > I run them locally on an untouched source tree, I get 280 failures.
> - There have
> > been changes to class loading and security subsystems.  These
> subsystems are
> > core to Jini, and the changes were made to the existing source, so
> there's no
> > way to "opt-out" of the changes.  I'd like to see radical changes be
> optional
> > until proven in the field, where possible.  In the case of policy
> providers and
> > class loaders, that should be easy to do. - Similarly, it seems
> there have been
> > some changes to the JERI framework. - There are ".jar" files in our
> repository.
> > I'll stipulate that the licensing has been checked, but it smells
> bad.
> >
> > Discussion
> > -----------------
> > I guess the biggest thing I'd like to see is stability in the test
> framework.
> > Perhaps it needs refactoring or reorganization, but if so, we need
> to be very
> > careful to separate it from changes to the core functionality.
> >
> > Next, I'd like for it to be easier to comprehend the source tree.  I
> think a
> > good way to do that is to separate out (carefully) the core Jini
> package
> > (basically the contents of jsk-platform.jar) and the service
> implementations.
> > There's no reason that we have to have one huge
> everything-but-the-kitchen-sink
> > distribution.  That's just a holdover from how Sun structured the
> JTSK - It was
> > literally a "starter kit".  To me it would be fine to have separate
> deliverables
> > for the platform and the services.
> >
> > While we're separating out the services, it might also be a decent
> time to
> > implement Maven-based builds if we think that's a good idea.  I'd
> start with
> > Reggie.  It would also be a good time to get rid of the
> "com.sun.jini" packages.
> >
> > Aside:  I'm personally ambivalent on Maven (which is to say I'm
> nowhere near as
> > negative on it as I once was).  I do agree with Dennis, though, that
> the jars
> > and appropriate poms need to be published to Maven Central.  There's
> no doubt
> > that users will appreciate that.
> >
> > Once we have a stable set of regression tests, then OK, we could
> think about
> > improving performance or using Maven repositories as the codebase
> server.
> >
> > I realize this won't be popular, but my gut feel is that we need to
> step back to
> > the 2.2 branch and retrace our steps a little, and go through the
> evolution
> > again in a more measured fashion.
> >
> > Proposal
> > ------------
> >
> > 1 - Release version 2.2.1 from the 2.2 branch.
> > 2 - Create a separate source tree for the test framework.  This
> could come from
> > the "qa_refactor" branch, but the goal should be to successfully
> test the 2.2.1
> > release.  Plus it should be a no-brainer to pull it down and run it
> on a local
> > machine. 3 - Release 2.2.2 from the pruned jtsk tree.  Release 1.0.0
> of the test
> > framework. 4 - Pull out the infrastructure service implementations
> (Reggie,
> > Outrigger, Norm, etc) from the core into separate products.  Release
> 1.0.0 on
> > each of them.  Release 2.2.3 from the pruned jtsk tree. 5 - Adopt a
> fixed
> > release cycle.  Not sure if it should be quarterly or biennial, or
> whether it
> > should be all products at once or staggered releases.  We'll need to
> discuss. 6
> > - Then we can start making changes if necessary to the individual
> products.  And
> > also try to deal with making it easier for new users to use the
> technology.
> >
> > So there you go.  Opinions?
> >
> > Greg Trasuk.
> >
> 


Re: Next steps after 2.2.1 release

Posted by Peter <ji...@zeus.net.au>.
Greg, why have you repeated this message?

I think this is a deliberate attack on the project because you haven't been following development in trunk and now you're scared because you see changes you don't understand.

I've been following your developments in surrogates, an impressive amount of productivity.  Although I think you should consider upgrading apache.commons vfs to version 2 before releasing.

Open your mind and ask questions, the code isn't set in stone, you have an obligation as project lead to encourage and nurture development, not stifle it.

You strike me as someone who's a very good programmer, but still learning leadership because you lack faith in others and must do everything yourself.  Remember I offered to assist with Surrogates, but you wanted to work alone? 

You need to let go and give others a go too.

How you handle this matter will be a test for your own personal development and an opportunity to grow as a leader. 

You also hold the future of this project in your hands, so I hope you find strength to let go.

Regards,

Peter.

----- Original message -----
>
> OK, so in my last message I talked about how (speaking only for myself) I'm a
> little nervous about the state of the trunk.
>
> So what now? 
>
> Problems we need to avoid in this discussion:
> -------------------------------------------------------------
>
> - Conflation of source tree structure issues with build tool selection.
> - Conflation of Maven build, Maven as codebase provider (artifact urls), and
> posting artifacts to Maven Central - Wish lists of pet features
> - Bruised egos and personal criticisms.
>
> Issues I see, in no particular order:
> ----------------------------------------------
> - We've done changes both to the test framework and the code, and lots of them.
> We should do one or the other, or small amounts of coevolution, if absolutely
> necessary. - Really, I'd like to see a completely separate integration test, and
> have the TCK tests separated out again. - The source tree is incomprehensible -
> The tests appear to be awfully sensitive to their environment.  Insofar as when
> I run them locally on an untouched source tree, I get 280 failures. - There have
> been changes to class loading and security subsystems.  These subsystems are
> core to Jini, and the changes were made to the existing source, so there's no
> way to "opt-out" of the changes.  I'd like to see radical changes be optional
> until proven in the field, where possible.  In the case of policy providers and
> class loaders, that should be easy to do. - Similarly, it seems there have been
> some changes to the JERI framework. - There are ".jar" files in our repository.
> I'll stipulate that the licensing has been checked, but it smells bad.
>
> Discussion
> -----------------
> I guess the biggest thing I'd like to see is stability in the test framework.
> Perhaps it needs refactoring or reorganization, but if so, we need to be very
> careful to separate it from changes to the core functionality.
>
> Next, I'd like for it to be easier to comprehend the source tree.  I think a
> good way to do that is to separate out (carefully) the core Jini package
> (basically the contents of jsk-platform.jar) and the service implementations.
> There's no reason that we have to have one huge everything-but-the-kitchen-sink
> distribution.  That's just a holdover from how Sun structured the JTSK - It was
> literally a "starter kit".  To me it would be fine to have separate deliverables
> for the platform and the services.
>
> While we're separating out the services, it might also be a decent time to
> implement Maven-based builds if we think that's a good idea.  I'd start with
> Reggie.  It would also be a good time to get rid of the "com.sun.jini" packages.
>
> Aside:  I'm personally ambivalent on Maven (which is to say I'm nowhere near as
> negative on it as I once was).  I do agree with Dennis, though, that the jars
> and appropriate poms need to be published to Maven Central.  There's no doubt
> that users will appreciate that.
>
> Once we have a stable set of regression tests, then OK, we could think about
> improving performance or using Maven repositories as the codebase server.
>
> I realize this won't be popular, but my gut feel is that we need to step back to
> the 2.2 branch and retrace our steps a little, and go through the evolution
> again in a more measured fashion.
>
> Proposal
> ------------
>
> 1 - Release version 2.2.1 from the 2.2 branch.
> 2 - Create a separate source tree for the test framework.  This could come from
> the "qa_refactor" branch, but the goal should be to successfully test the 2.2.1
> release.  Plus it should be a no-brainer to pull it down and run it on a local
> machine. 3 - Release 2.2.2 from the pruned jtsk tree.  Release 1.0.0 of the test
> framework. 4 - Pull out the infrastructure service implementations (Reggie,
> Outrigger, Norm, etc) from the core into separate products.  Release 1.0.0 on
> each of them.  Release 2.2.3 from the pruned jtsk tree. 5 - Adopt a fixed
> release cycle.  Not sure if it should be quarterly or biennial, or whether it
> should be all products at once or staggered releases.  We'll need to discuss. 6
> - Then we can start making changes if necessary to the individual products.  And
> also try to deal with making it easier for new users to use the technology.
>
> So there you go.  Opinions?
>
> Greg Trasuk.
>


Next steps after 2.2.1 release

Posted by Greg Trasuk <tr...@trasuk.com>.
OK, so in my last message I talked about how (speaking only for myself) I'm a little nervous about the state of the trunk.

So what now?  

Problems we need to avoid in this discussion:
-------------------------------------------------------------

- Conflation of source tree structure issues with build tool selection.
- Conflation of Maven build, Maven as codebase provider (artifact urls), and posting artifacts to Maven Central
- Wish lists of pet features
- Bruised egos and personal criticisms.

Issues I see, in no particular order:
----------------------------------------------
- We've done changes both to the test framework and the code, and lots of them.  We should do one or the other, or small amounts of coevolution, if absolutely necessary.
- Really, I'd like to see a completely separate integration test, and have the TCK tests separated out again.
- The source tree is incomprehensible
- The tests appear to be awfully sensitive to their environment.  Insofar as when I run them locally on an untouched source tree, I get 280 failures.
- There have been changes to class loading and security subsystems.  These subsystems are core to Jini, and the changes were made to the existing source, so there's no way to "opt-out" of the changes.  I'd like to see radical changes be optional until proven in the field, where possible.  In the case of policy providers and class loaders, that should be easy to do.
- Similarly, it seems there have been some changes to the JERI framework.
- There are ".jar" files in our repository.  I'll stipulate that the licensing has been checked, but it smells bad.

Discussion
-----------------
I guess the biggest thing I'd like to see is stability in the test framework.  Perhaps it needs refactoring or reorganization, but if so, we need to be very careful to separate it from changes to the core functionality.

Next, I'd like for it to be easier to comprehend the source tree.  I think a good way to do that is to separate out (carefully) the core Jini package (basically the contents of jsk-platform.jar) and the service implementations.  There's no reason that we have to have one huge everything-but-the-kitchen-sink distribution.  That's just a holdover from how Sun structured the JTSK - It was literally a "starter kit".  To me it would be fine to have separate deliverables for the platform and the services.

While we're separating out the services, it might also be a decent time to implement Maven-based builds if we think that's a good idea.  I'd start with Reggie.  It would also be a good time to get rid of the "com.sun.jini" packages.

Aside:  I'm personally ambivalent on Maven (which is to say I'm nowhere near as negative on it as I once was).  I do agree with Dennis, though, that the jars and appropriate poms need to be published to Maven Central.  There's no doubt that users will appreciate that.

Once we have a stable set of regression tests, then OK, we could think about improving performance or using Maven repositories as the codebase server.

I realize this won't be popular, but my gut feel is that we need to step back to the 2.2 branch and retrace our steps a little, and go through the evolution again in a more measured fashion.

Proposal
------------

1 - Release version 2.2.1 from the 2.2 branch.
2 - Create a separate source tree for the test framework.  This could come from the "qa_refactor" branch, but the goal should be to successfully test the 2.2.1 release.  Plus it should be a no-brainer to pull it down and run it on a local machine.
3 - Release 2.2.2 from the pruned jtsk tree.  Release 1.0.0 of the test framework.
4 - Pull out the infrastructure service implementations (Reggie, Outrigger, Norm, etc) from the core into separate products.  Release 1.0.0 on each of them.  Release 2.2.3 from the pruned jtsk tree.
5 - Adopt a fixed release cycle.  Not sure if it should be quarterly or biennial, or whether it should be all products at once or staggered releases.  We'll need to discuss.
6 - Then we can start making changes if necessary to the individual products.  And also try to deal with making it easier for new users to use the technology.

So there you go.  Opinions?

Greg Trasuk.


Re: Next Release

Posted by Greg Trasuk <tr...@trasuk.com>.
The "2.2" branch is very clean.  It starts from release in 2011. Since then, Dennis applied RIVER-417, added poms for listing at Maven Central, and applied the Levels fix.  I've applied RIVER-149, and that's it.

A few days ago, I set out to see what else from the trunk should be rolled in for a "minimal" release.  In particular, I wanted to include the fix for RIVER-149 which I did a while ago, because it fixes a problem with the container work I've been doing separately.  But I also figured we might want to include non-controversial fixes.  

Before then, I did a 'svn diff', but it appears that the vast majority of files have at least cosmetic changes (may be tabs or something), because I got just about every file in the repository.

See below for the list of changes that svn says have been applied to the trunk since the release.  I started going through all the revisions to see what they were, by doing 'svn diff ../trunk -c XXX' where x is the revision number (perhaps there's a non-manual way to do this).   As you can see, I didn't get too far before I started thinking that we'd better do some strategic thinking.  So I just merged in:

 r1211940 - RIVER-149 Fixes
r1140819 - Update build documentation.

Back to the changes in trunk… 

To see what's gone on since 2.2.0, run the following:

	svn log https://svn.apache.org/repos/asf/river/jtsk/trunk -r 1137621:HEAD

I want to be very careful here because I don't want to sound like I'm criticizing anyone.  I know that Peter, especially, has done a lot of work on the code.  

Having said that, as an observation and not a value judgement, I have to say that I'm not confident about the state of the trunk.  There have been too many changes since a release, both to the main code and the test code.  There are radical changes to the security policy provider (for perceived concurrency issues and also for revokable grants, I think).  Much cleaning, deleting, reorganizing.  Many alterations suggested by FindBugs.  Replacement of string concatenation by StringBuilder.  Something about reference collections, which adds a jar file that I can find no information on.  Additions of 'dnsjava name service provider'.  Changes to tests that fail because of a ConcurrentPolicyProvider.  Changes to PreferredClassLoader to supposedly improve concurrency.  More changes to tests.  Adding generics.  Many changes to tests to fix test failures.

A lot of the changes look to me, like thrashing on problems.  Now I realize that chasing concurrency bugs can be a long game of "whack-a-mole", but I see a lot of uncertainty and thrashing at solution attempts.  And I don't recall anyone reporting concurrency problems in the field.  And the first set of changes in there is a hell-of-a-long list of "incremental merge of concurrent policy items". 

Short answer - for now I think we ought to test and release "2.2.1" from the "2.2" branch.  This branch includes the Level fixes and RIVER-149 fix and not much else.  It fixes the immediate problem that users have reported, the system doesn't run with JDK7.

List of longer-term reccommendations  to follow.  This message is already in tl;dr territory.

Cheers,

Greg



Changes to the trunk since 2.2.0
=========================
N r1128239 Added rat_reports.sh.  Superseded 
Y r1140819 Updated minimum Java version in build.html
Y r1211940 Fix RIVER-149
N r1213641 River-265 Fix for unlucky caching as requested. River-401 Changed to utilise URI in place of URL in map's and arrays to avoid unnecessary DNS lookups.
r1213675 RIVER-401 Fix null pointer.  Seems to be related to above.
r1222914 Fix exception cast and reset interrupt status.
Y r1224722 Fix JDK7 compile errors (inner class private field accessibility)
Y r1227948 Commit msg says "RIVER-402 Fix null pointer exception.  Looks like JDK7 inner class private field accessibility stuff.
r1231478 Fix RIVER-403.  DGC Leaks threads.
N r1231673 Reverted common.xml to trunk version.  This is superseded by dreedy's commit on 20130403.
N r1231675 Changed common.xml.  As above.
N r1238468 Changed common.xml. As above.
Y r1241254 Propagate cause of interrupt.  Very minor change to assist service developers during debugging.
r1290906 Prepare for merge 2nd try.  What?
r1290925 Incremental merge of concurrent policy items.
r1290926 Incremental merge of concurrent policy items.
r1290929 Continuing merge of concurrent policy items.
r1290940 Continuing incremental merge.
r1290947 Continuing concurrent policy merge
r1290948 Continuing concurrent policy merge
r1290949 Continuing concurrent policy merge
r1290965 Completion of concurrent policy merge (replace trunk).
r1290982 Minor post-merge changes for above.
r1291177 Removed source/target overrides in javac-cmd in build.xml (JDK7?)
r1291182 Up 5 to 6 in javadoc.source property
r1301927 Cleaning, deleting, reorganizing
r1301929 Cleaning, deleting, reorganizing
r1302036 Cleaning, deleting, reorganizing
r1302083 Cleaning, deleting, reorganizing
r1302114 Deleted failing test, it was no longer relevant, tested Delegate Security Manager functionality, which is now disabled.  Reduced the number of Integer, Long, Float, Char etc objects created, by using valueOf instead of new.  Fixed minor bugs found with FindBugs - some string concatenations in loops outstanding
r1302267 Deleted failing test, it was no longer relevant, tested Delegate Security Manager functionality, which is now disabled.  Reduced the number of Integer, Long, Float, Char etc objects created, by using valueOf instead of new.  Fixed minor bugs found with FindBugs - some string concatenations in loops outstanding
r1302364 Reduced the number of Integer, Long, Float, Char, Short, etc objects created, by using valueOf instead of new.  Replaced string concatenation in loops with StringBuilder.  Fixed some bugs reported by FindBugs.
r1303195 Fixed bug in Reggie, when random number returns Integer.MIN_VALUE, then Maths.abs returns a negative number.  Fixed a classpath issue in the qa suite, caused by separating reference collections.
r1309816 Added missing ASF license header.
r1309818 Updated rat version.
r1332577 Add doap.rdf file
r1337773 Add reference-collections to class path in two qa tests.
r1338673 Session class delayed instantiation in jdk1.6 caused SecurityException because proxy ProtectionDomain was on the stack.  This caused some jtreg tests to fail, small fix, the bug doesn't exist in any releases.
r1344606 Added netbeans onebigjar project.
r1344639 forced default compile options in project.properties (onebigjar)
r1344736 added proper label (onebigjar).
r1355351 Refactoring for release, clean up and decrease size of new public api.  Separated RemotePolicy implementation from DynamicPolicyProvider.  Version numbers and documentation still requires update prior to release.
r1355851 Refactoring for release, clean up and review new public api, sanity check and remove unnecessary methods.  Added dnsjava name service provider to handle reverse dns lookup and to provide concurrent dns lookups.  Updated reference-collections, these were updated to avoid calling hashCode during initialisation of Timed references and temporary referrers, this helped reduced SocketPermission.hashCode calls that caused reverse lookups and recursive permission checks that cause stack overflow in the CombinerSecurityManager.  Two tests are failling due to a change to ConcurrentPolicyFile, now only privileged domains are returned by getPermissions(CodeSource) and all other instances are diverted to the java.security.Policy superclass which returns an empty PermissionCollection this is to avoid checking permissions twice.
r1355852 Refactoring for release, clean up and review new public api, sanity check and remove unnecessary methods.  Added dnsjava name service provider to handle reverse dns lookup and to provide concurrent dns lookups.  Updated reference-collections, these were updated to avoid calling hashCode during initialisation of Timed references and temporary referrers, this helped reduced SocketPermission.hashCode calls that caused reverse lookups and recursive permission checks that cause stack overflow in the CombinerSecurityManager.  Two tests are failling due to a change to ConcurrentPolicyFile, now only privileged domains are returned by getPermissions(CodeSource) and all other instances are diverted to the java.security.Policy superclass which returns an empty PermissionCollection this is to avoid checking permissions twice.
r1358131 Fixed failing junit tests caused by change to ConcurrentPolicyFile.getPermissions(CodeSource) method that now only returns Permissions for privileged CodeSource and delegates up to the super class java.security.Policy if the CodeSource is not privileged.
r1358143 seems to be some (deleted reference-collections.jar)
r1358709 Alter tests that fail due to ConcurrentPolicyFile delegating up to java.security.Policy.getPermissions(CodeSource) when CodeSource is found not to have AllPermission.  Only CodeSources that are privileged have Permissions returned that contains AllPermission. This is an optimisation that complies with java.security.Policy.
r1359548 URI spaces in codebase strings caused problems with Windows platforms - fixed. Removed calls to Thread.yield().
r1360043
r1360396
r1361523
r1361645
r1361646
r1361661
r1361671
r1362432
r1362433
r1362435
r1362452
r1362463
r1362797
r1362940
r1363295
r1363313
r1364250
r1364614
r1366641
r1366657
r1366659
r1367884
r1367889
r1369328
r1369509
r1369512
r1369513
r1369533
r1369538
r1369539
r1369541
r1369570
r1369573
r1369578
r1369666
r1369771
r1370679
r1371049
r1371855
r1373770
r1379716
r1379717
r1379720
r1379725
r1379730
r1379873
r1384715
r1384716
r1384728
r1384729
r1384733
r1384734
r1384740
r1384792
r1384812
r1385061
r1385068
r1385070
r1385072
r1385073
r1385083
r1385085
r1388003
r1389755
r1389763
r1389766
r1389804
r1389823
r1392387
r1393401
r1393420
r1393421
r1393422
r1393979
r1394412
r1394431
r1394432
r1394434
r1394435
r1394437
r1394438
r1395185
r1395234
r1395235
r1396151
r1396153
r1396157
r1396166
r1396173
r1396177
r1396180
r1396183
r1396187
r1396193
r1396198
r1396199
r1396240
r1396251
r1396254
r1396303
r1397599
r1397600
r1397613
r1397824
r1398558
r1398721
r1398725
r1398739
r1398740
r1400681
r1400842
r1400937
r1402752
r1402753
r1402754
r1402755
r1402756
r1402757
r1402758
r1402759
r1402772
r1402959
r1402960
r1402961
r1402989
r1403001
r1404526
r1404527
r1404528
r1404531
r1404901
r1404907
r1404911
r1404913
r1406894
r1406926
r1406927
r1407017
r1407431
r1415845
r1444164
r1444556
r1444557
r1449248
r1455692

On 2013-04-06, at 11:22 AM, Dan Creswell <da...@gmail.com> wrote:

> On 6 April 2013 14:44, Dennis Reedy <de...@gmail.com> wrote:
> 
>> 
>> On Apr 6, 2013, at 532AM, Dan Creswell wrote:
>> 
>>> Right so we're into brutal tradeoffs aren't we?
>>> 
>>> It's beginning to smell like none of the available branches are suitable
>>> for doing releases from. So we need a branch that is.
>> 
>> AFAIK we are going to be releasing 2.2.1 from the 2.2 branch. Once
>> everything passes muster (Greg is running tests) we will tag the branch
>> 2.2.1 and release.
>> 
>>> 
>>> i.e. We shouldn't just pick a branch we have, we should get one sorted
>> and
>>> right now.
>>> 
>>> What are our chances of pulling just qa changes out of qa-refactoring?
>> Have
>>> we at least got changesets that don't mix concurrency fixes with anything
>>> other than concurrency related changes to tests?
>> 
>> You are talking 2.3.0 here? I though qa-trunk was being used for that?
>> 
>> 
> Peter is having some comms trouble looks like so I'll leave it at an open
> question:
> 
> Have we got a shared, agreed view of what unreleased code changes are in
> which branch?
> 
> 
>> Dennis


Re: Next Release

Posted by Peter Firmstone <ji...@zeus.net.au>.
Just to clarify:

Dennis & Greg are using the 2.2.0 branch from last release to fix Levels 
and release 2.2.1

trunk started failing tests after some unrelated changes exposed 
synchronization errors in the qa tests, since then
skunk/qa-refactoring is being used to fix synchronization issues before 
merging back with trunk,
trunk is presently unstable.  After the merge, 2.3.0 is scheduled for 
release.

Having had some time to think, I'd recommend back-porting JERI from 
skunk/qa-refactoring into the 2.2.1 release, as there are some very 
important fixes included for issues seen by downstream users.

Regards,

Peter.


Dan Creswell wrote:
> On 6 April 2013 14:44, Dennis Reedy <de...@gmail.com> wrote:
>
>   
>> On Apr 6, 2013, at 532AM, Dan Creswell wrote:
>>
>>     
>>> Right so we're into brutal tradeoffs aren't we?
>>>
>>> It's beginning to smell like none of the available branches are suitable
>>> for doing releases from. So we need a branch that is.
>>>       
>> AFAIK we are going to be releasing 2.2.1 from the 2.2 branch. Once
>> everything passes muster (Greg is running tests) we will tag the branch
>> 2.2.1 and release.
>>
>>     
>>> i.e. We shouldn't just pick a branch we have, we should get one sorted
>>>       
>> and
>>     
>>> right now.
>>>
>>> What are our chances of pulling just qa changes out of qa-refactoring?
>>>       
>> Have
>>     
>>> we at least got changesets that don't mix concurrency fixes with anything
>>> other than concurrency related changes to tests?
>>>       
>> You are talking 2.3.0 here? I though qa-trunk was being used for that?
>>
>>
>>     
> Peter is having some comms trouble looks like so I'll leave it at an open
> question:
>
> Have we got a shared, agreed view of what unreleased code changes are in
> which branch?
>   



Re: Next Release

Posted by Dan Creswell <da...@gmail.com>.
On 6 April 2013 14:44, Dennis Reedy <de...@gmail.com> wrote:

>
> On Apr 6, 2013, at 532AM, Dan Creswell wrote:
>
> > Right so we're into brutal tradeoffs aren't we?
> >
> > It's beginning to smell like none of the available branches are suitable
> > for doing releases from. So we need a branch that is.
>
> AFAIK we are going to be releasing 2.2.1 from the 2.2 branch. Once
> everything passes muster (Greg is running tests) we will tag the branch
> 2.2.1 and release.
>
> >
> > i.e. We shouldn't just pick a branch we have, we should get one sorted
> and
> > right now.
> >
> > What are our chances of pulling just qa changes out of qa-refactoring?
> Have
> > we at least got changesets that don't mix concurrency fixes with anything
> > other than concurrency related changes to tests?
>
> You are talking 2.3.0 here? I though qa-trunk was being used for that?
>
>
Peter is having some comms trouble looks like so I'll leave it at an open
question:

Have we got a shared, agreed view of what unreleased code changes are in
which branch?


> Dennis

Re: Next Release

Posted by Dennis Reedy <de...@gmail.com>.
On Apr 6, 2013, at 532AM, Dan Creswell wrote:

> Right so we're into brutal tradeoffs aren't we?
> 
> It's beginning to smell like none of the available branches are suitable
> for doing releases from. So we need a branch that is.

AFAIK we are going to be releasing 2.2.1 from the 2.2 branch. Once everything passes muster (Greg is running tests) we will tag the branch 2.2.1 and release. 

> 
> i.e. We shouldn't just pick a branch we have, we should get one sorted and
> right now.
> 
> What are our chances of pulling just qa changes out of qa-refactoring? Have
> we at least got changesets that don't mix concurrency fixes with anything
> other than concurrency related changes to tests?

You are talking 2.3.0 here? I though qa-trunk was being used for that?

Dennis

Re: Next Release

Posted by Dan Creswell <da...@gmail.com>.
Right so we're into brutal tradeoffs aren't we?

It's beginning to smell like none of the available branches are suitable
for doing releases from. So we need a branch that is.

i.e. We shouldn't just pick a branch we have, we should get one sorted and
right now.

What are our chances of pulling just qa changes out of qa-refactoring? Have
we at least got changesets that don't mix concurrency fixes with anything
other than concurrency related changes to tests?

On 3 April 2013 22:10, Peter <ji...@zeus.net.au> wrote:

> Not a good idea, the qa-refactoring branch was created recently to address
> the concurrency bugs in trunk.
>
> ----- Original message -----
> >
> > On Apr 2, 2013, at 750AM, Peter Firmstone wrote:
> >
> > > On 2/04/2013 7:51 PM, Dennis Reedy wrote:
> > > > On Apr 2, 2013, at 338AM, Peter Firmstone wrote:
> > > >
> > > > > The formatting didn't work out, I'll create a Jira issue to
> discuss.
> > > > >
> > > > > Patricia's done a great job detailing the dependencies and issues
> with
> > > > > TaskManager's Task implementations.
> > > > >
> > > > > I recall a list discussion from the original Sun developers who had
> > > > > intended to replace TaskManager, the runAfter method has issues.
> > > > >
> > > > > Being so prevalent, it's quite possible that TaskManager is
> causing issues
> > > > > and it might also explain why as performance improves more issues
> arise.
> > > > >
> > > > > If a task completes before another task which it's supposed to
> runAfter
> > > > > but isn't present in the queue; that could explain some issues.
> > > > >
> > > > > I much prefer idempotent code myself.
> > > > >
> > > > > This could take some effort to fix, any volunteers?
> > > > >
> > > > > Dennis are you able to continue with your 2.2.1 branch release?
> > > > At this point I am unsure what branch to base the 2.2.1 release off
> of.
> > >
> > > The 2.2.0 release, it might benefit from backports of synchronization
> fixes
> > > that improve correctness, but not performance, if some volunteers can
> diff the
> > > qa-refactoring branch and the 2.2.0 branch, there are numerous simple
> > > synchronization fixes.
> >
> > I'd like to suggest we release from qa-trunk. With all the work thats
> been going
> > on here, I dont see back porting it to the 2.2 branch is meaningful. The
> delta
> > is just too much.
> >
>
>

Re: Next Release

Posted by Dan Creswell <da...@gmail.com>.
We created a "qa-refactoring" branch for concurrency work....mmmm....

On 3 April 2013 22:10, Peter <ji...@zeus.net.au> wrote:

> Not a good idea, the qa-refactoring branch was created recently to address
> the concurrency bugs in trunk.
>
> ----- Original message -----
> >
> > On Apr 2, 2013, at 750AM, Peter Firmstone wrote:
> >
> > > On 2/04/2013 7:51 PM, Dennis Reedy wrote:
> > > > On Apr 2, 2013, at 338AM, Peter Firmstone wrote:
> > > >
> > > > > The formatting didn't work out, I'll create a Jira issue to
> discuss.
> > > > >
> > > > > Patricia's done a great job detailing the dependencies and issues
> with
> > > > > TaskManager's Task implementations.
> > > > >
> > > > > I recall a list discussion from the original Sun developers who had
> > > > > intended to replace TaskManager, the runAfter method has issues.
> > > > >
> > > > > Being so prevalent, it's quite possible that TaskManager is
> causing issues
> > > > > and it might also explain why as performance improves more issues
> arise.
> > > > >
> > > > > If a task completes before another task which it's supposed to
> runAfter
> > > > > but isn't present in the queue; that could explain some issues.
> > > > >
> > > > > I much prefer idempotent code myself.
> > > > >
> > > > > This could take some effort to fix, any volunteers?
> > > > >
> > > > > Dennis are you able to continue with your 2.2.1 branch release?
> > > > At this point I am unsure what branch to base the 2.2.1 release off
> of.
> > >
> > > The 2.2.0 release, it might benefit from backports of synchronization
> fixes
> > > that improve correctness, but not performance, if some volunteers can
> diff the
> > > qa-refactoring branch and the 2.2.0 branch, there are numerous simple
> > > synchronization fixes.
> >
> > I'd like to suggest we release from qa-trunk. With all the work thats
> been going
> > on here, I dont see back porting it to the 2.2 branch is meaningful. The
> delta
> > is just too much.
> >
>
>

Re: Next Release

Posted by Peter <ji...@zeus.net.au>.
Not a good idea, the qa-refactoring branch was created recently to address the concurrency bugs in trunk.

----- Original message -----
>
> On Apr 2, 2013, at 750AM, Peter Firmstone wrote:
>
> > On 2/04/2013 7:51 PM, Dennis Reedy wrote:
> > > On Apr 2, 2013, at 338AM, Peter Firmstone wrote:
> > >
> > > > The formatting didn't work out, I'll create a Jira issue to discuss.
> > > >
> > > > Patricia's done a great job detailing the dependencies and issues with
> > > > TaskManager's Task implementations.
> > > >
> > > > I recall a list discussion from the original Sun developers who had
> > > > intended to replace TaskManager, the runAfter method has issues.
> > > >
> > > > Being so prevalent, it's quite possible that TaskManager is causing issues
> > > > and it might also explain why as performance improves more issues arise.
> > > >
> > > > If a task completes before another task which it's supposed to runAfter
> > > > but isn't present in the queue; that could explain some issues.
> > > >
> > > > I much prefer idempotent code myself.
> > > >
> > > > This could take some effort to fix, any volunteers?
> > > >
> > > > Dennis are you able to continue with your 2.2.1 branch release?
> > > At this point I am unsure what branch to base the 2.2.1 release off of.
> >
> > The 2.2.0 release, it might benefit from backports of synchronization fixes
> > that improve correctness, but not performance, if some volunteers can diff the
> > qa-refactoring branch and the 2.2.0 branch, there are numerous simple
> > synchronization fixes.
>
> I'd like to suggest we release from qa-trunk. With all the work thats been going
> on here, I dont see back porting it to the 2.2 branch is meaningful. The delta
> is just too much.
>


Re: Next Release

Posted by Dennis Reedy <de...@gmail.com>.
On Apr 3, 2013, at 120PM, Greg Trasuk wrote:

> 
> On Wed, 2013-04-03 at 12:12, Dennis Reedy wrote:
>> On Apr 3, 2013, at 1115AM, Greg Trasuk wrote:
>> 
>>> 
>>> Did we have a branching policy discussion?  
>> 
>> I was looking here: http://river.apache.org/development-process.html (scroll down to "Branching Policy")
>> 
> Ahh... That makes sense.
> 
>>> I recall we decided not to
>>> do too much in the trunk.  In any case, I think your suggestion works,
>>> barring any other opinions.  
>> 
>> I was going to update the Levels code in the branch. Once we get that branch up to snuff and ready to release I think we tag it 2.2.1
>> 
> OK, go ahead and update the Levels code.  I'll run a diff and see if
> there's anything else that should be ported now.

Ok, all set. I also pushed roll_release.sh and common.xml (updated version to 2.2.1)

Dennis

Re: Next Release

Posted by Greg Trasuk <tr...@stratuscom.com>.
On Wed, 2013-04-03 at 12:12, Dennis Reedy wrote:
> On Apr 3, 2013, at 1115AM, Greg Trasuk wrote:
> 
> > 
> > Did we have a branching policy discussion?  
> 
> I was looking here: http://river.apache.org/development-process.html (scroll down to "Branching Policy")
> 
Ahh... That makes sense.

> > I recall we decided not to
> > do too much in the trunk.  In any case, I think your suggestion works,
> > barring any other opinions.  
> 
> I was going to update the Levels code in the branch. Once we get that branch up to snuff and ready to release I think we tag it 2.2.1
> 
OK, go ahead and update the Levels code.  I'll run a diff and see if
there's anything else that should be ported now.

Greg.

> Regards
> 
> Dennis


Re: Next Release

Posted by Dennis Reedy <de...@gmail.com>.
On Apr 3, 2013, at 1115AM, Greg Trasuk wrote:

> 
> Did we have a branching policy discussion?  

I was looking here: http://river.apache.org/development-process.html (scroll down to "Branching Policy")

> I recall we decided not to
> do too much in the trunk.  In any case, I think your suggestion works,
> barring any other opinions.  

I was going to update the Levels code in the branch. Once we get that branch up to snuff and ready to release I think we tag it 2.2.1

Regards

Dennis

Re: Next Release

Posted by Greg Trasuk <tr...@stratuscom.com>.
Did we have a branching policy discussion?  I recall we decided not to
do too much in the trunk.  In any case, I think your suggestion works,
barring any other opinions.  I was thinking of creating a "2.2.1" branch
first, and then applying patches to that, but assuming there wasn't
anything big done in the 2.2.0 branch I think it comes to the same
ends.  I was going to do that this afternoon, but if someone were to get
there first I wouldn't complain...

Cheers,

Greg.

On Wed, 2013-04-03 at 10:45, Dennis Reedy wrote:
> On Apr 3, 2013, at 1030AM, Greg Trasuk wrote:
> 
> > Hi Dennis:
> > 
> > I think the suggestion was that we do a release branched off the 2.2.0
> > release with a bare set of patches moved over - primarily the Logging
> > fix and I think there was a change to one of the JRMP context classes
> > that I needed for the Surrogate container.  And then a release from the
> > qa_refactor branch a little bit later.
> 
> Okay, sounds good. Just some logistic first. From the branching policy discussion; Seems we should do the work in the 2.2 branch, then when ready to release tag it as 2.2.1 correct? 
> 


Re: Next Release

Posted by Dennis Reedy <de...@gmail.com>.
On Apr 3, 2013, at 1030AM, Greg Trasuk wrote:

> Hi Dennis:
> 
> I think the suggestion was that we do a release branched off the 2.2.0
> release with a bare set of patches moved over - primarily the Logging
> fix and I think there was a change to one of the JRMP context classes
> that I needed for the Surrogate container.  And then a release from the
> qa_refactor branch a little bit later.

Okay, sounds good. Just some logistic first. From the branching policy discussion; Seems we should do the work in the 2.2 branch, then when ready to release tag it as 2.2.1 correct? 


Re: Next Release

Posted by Greg Trasuk <tr...@stratuscom.com>.
Hi Dennis:

I think the suggestion was that we do a release branched off the 2.2.0
release with a bare set of patches moved over - primarily the Logging
fix and I think there was a change to one of the JRMP context classes
that I needed for the Surrogate container.  And then a release from the
qa_refactor branch a little bit later.

Personally I'd like to see some kind of release sooner rather than
later.  It's been a while.  I'll act as RM for a minimal release if we
can agree on doing that.

I'm planning on having a few cycles this afternoon to take a look at a
diff and see what-all changed, and if there was anything else that
should go into a minimal release.

Cheers,

Greg.

On Wed, 2013-04-03 at 09:39, Dennis Reedy wrote:
> On Apr 2, 2013, at 750AM, Peter Firmstone wrote:
> 
> > On 2/04/2013 7:51 PM, Dennis Reedy wrote:
> >> On Apr 2, 2013, at 338AM, Peter Firmstone wrote:
> >> 
> >>> The formatting didn't work out, I'll create a Jira issue to discuss.
> >>> 
> >>> Patricia's done a great job detailing the dependencies and issues with TaskManager's Task implementations.
> >>> 
> >>> I recall a list discussion from the original Sun developers who had intended to replace TaskManager, the runAfter method has issues.
> >>> 
> >>> Being so prevalent, it's quite possible that TaskManager is causing issues and it might also explain why as performance improves more issues arise.
> >>> 
> >>> If a task completes before another task which it's supposed to runAfter but isn't present in the queue; that could explain some issues.
> >>> 
> >>> I much prefer idempotent code myself.
> >>> 
> >>> This could take some effort to fix, any volunteers?
> >>> 
> >>> Dennis are you able to continue with your 2.2.1 branch release?
> >> At this point I am unsure what branch to base the 2.2.1 release off of.
> > 
> > The 2.2.0 release, it might benefit from backports of synchronization fixes that improve correctness, but not performance, if some volunteers can diff the qa-refactoring branch and the 2.2.0 branch, there are numerous simple synchronization fixes.
> 
> I'd like to suggest we release from qa-trunk. With all the work thats been going on here, I dont see back porting it to the 2.2 branch is meaningful. The delta is just too much.
> 


Re: Next Release

Posted by Dennis Reedy <de...@gmail.com>.
On Apr 2, 2013, at 750AM, Peter Firmstone wrote:

> On 2/04/2013 7:51 PM, Dennis Reedy wrote:
>> On Apr 2, 2013, at 338AM, Peter Firmstone wrote:
>> 
>>> The formatting didn't work out, I'll create a Jira issue to discuss.
>>> 
>>> Patricia's done a great job detailing the dependencies and issues with TaskManager's Task implementations.
>>> 
>>> I recall a list discussion from the original Sun developers who had intended to replace TaskManager, the runAfter method has issues.
>>> 
>>> Being so prevalent, it's quite possible that TaskManager is causing issues and it might also explain why as performance improves more issues arise.
>>> 
>>> If a task completes before another task which it's supposed to runAfter but isn't present in the queue; that could explain some issues.
>>> 
>>> I much prefer idempotent code myself.
>>> 
>>> This could take some effort to fix, any volunteers?
>>> 
>>> Dennis are you able to continue with your 2.2.1 branch release?
>> At this point I am unsure what branch to base the 2.2.1 release off of.
> 
> The 2.2.0 release, it might benefit from backports of synchronization fixes that improve correctness, but not performance, if some volunteers can diff the qa-refactoring branch and the 2.2.0 branch, there are numerous simple synchronization fixes.

I'd like to suggest we release from qa-trunk. With all the work thats been going on here, I dont see back porting it to the 2.2 branch is meaningful. The delta is just too much.


Re: test failure repeatability

Posted by Peter Firmstone <ji...@zeus.net.au>.
On 2/04/2013 7:51 PM, Dennis Reedy wrote:
> On Apr 2, 2013, at 338AM, Peter Firmstone wrote:
>
>> The formatting didn't work out, I'll create a Jira issue to discuss.
>>
>> Patricia's done a great job detailing the dependencies and issues with TaskManager's Task implementations.
>>
>> I recall a list discussion from the original Sun developers who had intended to replace TaskManager, the runAfter method has issues.
>>
>> Being so prevalent, it's quite possible that TaskManager is causing issues and it might also explain why as performance improves more issues arise.
>>
>> If a task completes before another task which it's supposed to runAfter but isn't present in the queue; that could explain some issues.
>>
>> I much prefer idempotent code myself.
>>
>> This could take some effort to fix, any volunteers?
>>
>> Dennis are you able to continue with your 2.2.1 branch release?
> At this point I am unsure what branch to base the 2.2.1 release off of.

The 2.2.0 release, it might benefit from backports of synchronization 
fixes that improve correctness, but not performance, if some volunteers 
can diff the qa-refactoring branch and the 2.2.0 branch, there are 
numerous simple synchronization fixes.

>   Additionally, what are the steps necessary to create a release?

   1. First create a branch of 2.2.0, call it 2.2.1.
   2. Developers will need to update their developer keys in the keys file.
   3. Backport sensible synchronization fixes for existing classes
      (don't get carried away, just essentials).
   4. Definitely fix LookupLocator serialization which was accidentally
      broken (similar to Levels an additional field was added that
      should have been transient), look at the qa-refactoring release
      for the fix, which is backward compatible with all releases.
   5. Run the rat report tool, there should be a shell script for that,
      it checks for licence compliance.
   6. Increment version numbers (find 2.2.0 and replace with 2.2.1, but
      don't replace all occurrences of 2.2.0, this has to be inspected
      manually).
   7. Update the release documentation.
   8. Post pre release artifacts for wider community testing.
   9. Wait about two weeks for feedback.
  10. Then vote on the release artifacts.
  11. Publish the artifacts.
  12. I have some handwritten notes about the release process, they're
      about 200km away from where I'm presently working, I should be
      able to access them on the weekend.


This release will probably only be supportable on Linux and Solaris, 
Windows only passed all tests recently, Windows users will want to wait 
until we sort out the concurrency issues (ironically I haven't 
reproduced any concurrency test failures on Windows, maybe the Windows 
jvm is less aggressive with optimisations, or its the hardware being 
tested).

For now I'm going to continue focusing my efforts on the qa-refactoring 
branch, as it's been over three years since 2.2.0 was released, I'm 
close and don't want to  delay 2.3.0 much longer.  This is a big release 
which has culminated in 3 years of ongoing efforts.

In future I'd like to break River up into separate smaller components 
that are far easier to release, I don't think I can do it again for 
another 3 years, but we can save that discussion until after we've 
released so we don't get distracted.   I would like to see a more agile 
release process.

Regards,

Peter.

Re: test failure repeatability

Posted by Dennis Reedy <de...@gmail.com>.
On Apr 2, 2013, at 338AM, Peter Firmstone wrote:

> The formatting didn't work out, I'll create a Jira issue to discuss.
> 
> Patricia's done a great job detailing the dependencies and issues with TaskManager's Task implementations.
> 
> I recall a list discussion from the original Sun developers who had intended to replace TaskManager, the runAfter method has issues.
> 
> Being so prevalent, it's quite possible that TaskManager is causing issues and it might also explain why as performance improves more issues arise.
> 
> If a task completes before another task which it's supposed to runAfter but isn't present in the queue; that could explain some issues.
> 
> I much prefer idempotent code myself.
> 
> This could take some effort to fix, any volunteers?
> 
> Dennis are you able to continue with your 2.2.1 branch release?

At this point I am unsure what branch to base the 2.2.1 release off of. Additionally, what are the steps necessary to create a release?

Regards

Dennis


Re: test failure repeatability

Posted by Peter Firmstone <ji...@zeus.net.au>.
The formatting didn't work out, I'll create a Jira issue to discuss.

Patricia's done a great job detailing the dependencies and issues with 
TaskManager's Task implementations.

I recall a list discussion from the original Sun developers who had 
intended to replace TaskManager, the runAfter method has issues.

Being so prevalent, it's quite possible that TaskManager is causing 
issues and it might also explain why as performance improves more issues 
arise.

If a task completes before another task which it's supposed to runAfter 
but isn't present in the queue; that could explain some issues.

I much prefer idempotent code myself.

This could take some effort to fix, any volunteers?

Dennis are you able to continue with your 2.2.1 branch release?

Regards,

Peter.

On 2/04/2013 5:17 PM, Peter Firmstone wrote:
> I've appended Patricia's notes in html so we don't lose the table 
> formatting, hopefully it will be accepted by the mailer.
>
> On 2/04/2013 1:38 PM, Patricia Shanahan wrote:
>> I've sent Peter some notes that I hope he can make available - I 
>> don't think I can send attachments to the list.
>>
>> Rereading my notes has reminded me that I had special concerns with 
>> RetryTask. Is that still used? If so, I'll explain the problem.
>>
>>
> *TaskManager notes*
>
>
>  Classes That Reference TaskManager
>
> Class
>
>
>
> Package
>
>
>
> Notes
>
> AbortJob
>
>
>
> com.sun.jini.mahalo
>
>
>
> Subclass of Job. Passed a TaskManager as parameter. Uses 
> ParticipantTask, no dependencies.
>
> CommitJob
>
>
>
> com.sun.jini.mahalo
>
>
>
> Subclass of Job. Passed a TaskManager as parameter. Uses 
> ParticipantTask, no dependencies.
>
> EventType
>
>
>
> com.sun.jini.norm.event
>
>
>
> Task type SendTask, subclass of RetryTask, no dependencies.
>
> EventTypeGenerator
>
>
>
> com.sun.jini.norm.event
>
>
>
> Supplies a TaskManager for use by the EventType objects it generates.
>
> FiddlerImpl
>
>
>
> com.sun.jini.fiddler
>
>
>
> Extensive use of TaskManager, with many different Task subtypes. No 
> dependencies.
>
> Job
>
>
>
> com.sun.jini.mahalo
>
>
>
> Manage performance of a job as a set of tasks all of which need to be 
> created by the Job subclass. There is some dubious code in performWork 
> that silently throws away an exception that would indicate internal 
> inconsistency.
>
> JoinManager
>
>
>
> net.jini.lookup
>
>
>
> Uses ProxyRegTask, which extends RetryTask. Special problem - making 
> sure a service gets exactly one ID. If the ID has already been 
> allocated, no dependencies. If not, runAfter any ProxyRegTask with 
> lower sequence number, ensuring that only the lowest sequence number 
> ProxyRegTask in the TaskManager can run. Safe if, and only if, tasks 
> are submitted in sequence number order, and there are no retries.
>
>
> LeaseRenewalManager
>
>
>
> net.jini.lease
>
>
>
> Uses QueuerTask and RenewTask. No dependencies.
>
> LookupDiscovery
>
>
>
> net.jini.discovery
>
>
>
> Uses DecodeAnnouncementTask and UnicastDiscoveryTask. No dependencies.
>
> LookupLocatorDiscovery
>
>
>
> net.jini.discovery
>
>
>
> Uses DiscoveryTask. No dependencies.
>
> MailboxImpl
>
>
>
> com.sun.jini.mercury
>
>
>
> Uses a NotifyTask, subclass of RetryTask, no dependencies.
>
> Notifier
>
>
>
> com.sun.jini.outrigger
>
>
>
> Uses its own NotifyTask, subclass of RetryTask. Dependency based on 
> EventSender runAfter test. EventSender has two implementations. An 
> EventRegistrationWatcher.BasicEventSender waits for any 
> BasicEventSender belonging to the same EventRegistrationWatcher. 
> VisibilityEventSender has no dependencies.
>
> ParticipantTask
>
>
>
> com.sun.jini.mahalo
>
>
>
> No dependencies.
>
> PrepareAndCommitJob
>
>
>
> com.sun.jini.mahalo
>
>
>
> Subclass of Job. Passed a TaskManager as parameter. Uses 
> ParticipantTask, no dependencies.
>
> PrepareJob
>
>
>
> com.sun.jini.mahalo
>
>
>
> Subclass of Job. Passed a TaskManager as parameter. Uses 
> ParticipantTask, no dependencies.
>
> RegistrarImpl
>
>
>
> com.sun.jini.reggie
>
>
>
> Uses multiple Task types: AddressTask - no dependencies; 
> DecodeRequestTask - no dependencies; EventTask - run after EventTask 
> for same listener, "Keep events going to the same listener ordered"; 
> SocketTask - no dependencies.
>
> RetryTask
>
>
>
> com.sun.jini.thread
>
>
>
> Abstract class implementing Task. It provides for automatic retry of 
> failed attempts, where an attempt is a call to tryOnce.
>
> ServiceDiscoveryManager
>
>
>
> net.jini.lookup
>
>
>
> Uses CacheTask - no dependencies; ServiceIdTask - run after 
> ServiceIdTask with same ServiceId and lower sequence number. Its 
> subclasses NewOldServiceTask and UnmapProxyTask inherit runAfter. 
> ServiceIdTask's subclass NotifyEventTask runs after 
> RegisterListenerTask or LookupTask with same ProxyReg and lower 
> sequence, and also calls the ServiceId runAfter. Bug ID 6291851. 
> Comment suggests the writer thought it was necessary to do a sequence 
> number check to find the queue order: " and if those tasks were queued 
> prior to this task (have lower sequence numbers)".
>
>
> /** Whenever a ServiceIdTask is created in this cache, it is assigned
>
> * a unique sequence number to allow such tasks associated with the
>
> * same ServiceID to be executed in the order in which they were
>
> * queued in the TaskManager. This field contains the value of
>
> * the sequence number assigned to the most recently created
>
> * ServiceIdTask.
>
> */
>
> *private**long*taskSeqN= 0;
>
>
> Synchronization window needs fixing. taskSeqN is protected by 
> serviceIdMap synchronization, but it is released before calling 
> cacheTaskMgr.add in addProxyReg
>
>
> SettlerTask
>
>
>
> com.sun.jini.mahalo
>
>
>
> Subclass of RetryTask. No dependencies. Used in TxnManagerImpl.
>
> TxnManagerImpl
>
>
>
> com.sun.jini.mahalo
>
>
>
> Uses SettlerTask and ParticipantTask. No dependencies.
>
> TxnManagerTransaction
>
>
>
> com.sun.jini.mahalo
>
>
>
> Creates a TaskManager, threadpool, and passes it around to e.g. Job 
> and AbortJob.
>
> TxnMonitor
>
>
>
> com.sun.jini.outrigger
>
>
>
> Uses TxnMonitorTask.
>
> TxnMonitorTask
>
>
>
> com.sun.jini.outrigger
>
>
>
> Subclass of RetryTask. No dependencies.
>
>
>  Issues
>
>
>    RetryTask
>
> RetryTask is a Task implementation whose run method tries a subclass 
> supplied method with a boolean result. If the method returns false, 
> indicating failure, the RetryTask's run method schedules another try 
> in the future, using a WakeupManager supplied to the RetryTask 
> constructor.
>
> During the time between a failed attempt and its retry, there does not 
> seem to be any control to prevent conflicting tasks from entering the 
> same TaskManager. Some of those tasks would have waited for the task 
> being retried, if it had been in the TaskManager at their time of 
> arrival. Delayed retry and dependence on sequence number seem 
> incompatible. Notifier.NotifyTask and JoinManager.ProxyRegTask both 
> extend RetryTask and have dependencies. JoinManager.ProxyRegTask uses 
> a sequence number, but probably does not need to, and should not. The 
> intent seems to be to run tasks for a given service one-at-a-time 
> until its ServiceId has been set.
>
>
>    ServiceDiscoveryManager.CacheTask
>
> Most subclasses inherit a "return false;" runAfter. The exceptions are 
> ServiceIdTask, its subclasses, and LookupTask. Both have sequence 
> number dependencies. It is not yet clear whether 
> ServiceDiscoveryManager is ensuring that tasks enter the TaskManager 
> in sequence number order. If it does, the code is correct, but wastes 
> time with a trivially true check. If not, the code is incorrect 
> relative to the comments, which seem to expect order.
>
>
>
>
>
>


Re: test failure repeatability

Posted by Peter Firmstone <ji...@zeus.net.au>.
I've appended Patricia's notes in html so we don't lose the table 
formatting, hopefully it will be accepted by the mailer.

On 2/04/2013 1:38 PM, Patricia Shanahan wrote:
> I've sent Peter some notes that I hope he can make available - I don't 
> think I can send attachments to the list.
>
> Rereading my notes has reminded me that I had special concerns with 
> RetryTask. Is that still used? If so, I'll explain the problem.
>
>
*TaskManager notes*


  Classes That Reference TaskManager

Class

	

Package

	

Notes

AbortJob

	

com.sun.jini.mahalo

	

Subclass of Job. Passed a TaskManager as parameter. Uses 
ParticipantTask, no dependencies.

CommitJob

	

com.sun.jini.mahalo

	

Subclass of Job. Passed a TaskManager as parameter. Uses 
ParticipantTask, no dependencies.

EventType

	

com.sun.jini.norm.event

	

Task type SendTask, subclass of RetryTask, no dependencies.

EventTypeGenerator

	

com.sun.jini.norm.event

	

Supplies a TaskManager for use by the EventType objects it generates.

FiddlerImpl

	

com.sun.jini.fiddler

	

Extensive use of TaskManager, with many different Task subtypes. No 
dependencies.

Job

	

com.sun.jini.mahalo

	

Manage performance of a job as a set of tasks all of which need to be 
created by the Job subclass. There is some dubious code in performWork 
that silently throws away an exception that would indicate internal 
inconsistency.

JoinManager

	

net.jini.lookup

	

Uses ProxyRegTask, which extends RetryTask. Special problem - making 
sure a service gets exactly one ID. If the ID has already been 
allocated, no dependencies. If not, runAfter any ProxyRegTask with lower 
sequence number, ensuring that only the lowest sequence number 
ProxyRegTask in the TaskManager can run. Safe if, and only if, tasks are 
submitted in sequence number order, and there are no retries.


LeaseRenewalManager

	

net.jini.lease

	

Uses QueuerTask and RenewTask. No dependencies.

LookupDiscovery

	

net.jini.discovery

	

Uses DecodeAnnouncementTask and UnicastDiscoveryTask. No dependencies.

LookupLocatorDiscovery

	

net.jini.discovery

	

Uses DiscoveryTask. No dependencies.

MailboxImpl

	

com.sun.jini.mercury

	

Uses a NotifyTask, subclass of RetryTask, no dependencies.

Notifier

	

com.sun.jini.outrigger

	

Uses its own NotifyTask, subclass of RetryTask. Dependency based on 
EventSender runAfter test. EventSender has two implementations. An 
EventRegistrationWatcher.BasicEventSender waits for any BasicEventSender 
belonging to the same EventRegistrationWatcher. VisibilityEventSender 
has no dependencies.

ParticipantTask

	

com.sun.jini.mahalo

	

No dependencies.

PrepareAndCommitJob

	

com.sun.jini.mahalo

	

Subclass of Job. Passed a TaskManager as parameter. Uses 
ParticipantTask, no dependencies.

PrepareJob

	

com.sun.jini.mahalo

	

Subclass of Job. Passed a TaskManager as parameter. Uses 
ParticipantTask, no dependencies.

RegistrarImpl

	

com.sun.jini.reggie

	

Uses multiple Task types: AddressTask - no dependencies; 
DecodeRequestTask - no dependencies; EventTask - run after EventTask for 
same listener, "Keep events going to the same listener ordered"; 
SocketTask - no dependencies.

RetryTask

	

com.sun.jini.thread

	

Abstract class implementing Task. It provides for automatic retry of 
failed attempts, where an attempt is a call to tryOnce.

ServiceDiscoveryManager

	

net.jini.lookup

	

Uses CacheTask - no dependencies; ServiceIdTask - run after 
ServiceIdTask with same ServiceId and lower sequence number. Its 
subclasses NewOldServiceTask and UnmapProxyTask inherit runAfter. 
ServiceIdTask's subclass NotifyEventTask runs after RegisterListenerTask 
or LookupTask with same ProxyReg and lower sequence, and also calls the 
ServiceId runAfter. Bug ID 6291851. Comment suggests the writer thought 
it was necessary to do a sequence number check to find the queue order: 
" and if those tasks were queued prior to this task (have lower sequence 
numbers)".


/** Whenever a ServiceIdTask is created in this cache, it is assigned

* a unique sequence number to allow such tasks associated with the

* same ServiceID to be executed in the order in which they were

* queued in the TaskManager. This field contains the value of

* the sequence number assigned to the most recently created

* ServiceIdTask.

*/

*private**long*taskSeqN= 0;


Synchronization window needs fixing. taskSeqN is protected by 
serviceIdMap synchronization, but it is released before calling 
cacheTaskMgr.add in addProxyReg


SettlerTask

	

com.sun.jini.mahalo

	

Subclass of RetryTask. No dependencies. Used in TxnManagerImpl.

TxnManagerImpl

	

com.sun.jini.mahalo

	

Uses SettlerTask and ParticipantTask. No dependencies.

TxnManagerTransaction

	

com.sun.jini.mahalo

	

Creates a TaskManager, threadpool, and passes it around to e.g. Job and 
AbortJob.

TxnMonitor

	

com.sun.jini.outrigger

	

Uses TxnMonitorTask.

TxnMonitorTask

	

com.sun.jini.outrigger

	

Subclass of RetryTask. No dependencies.


  Issues


    RetryTask

RetryTask is a Task implementation whose run method tries a subclass 
supplied method with a boolean result. If the method returns false, 
indicating failure, the RetryTask's run method schedules another try in 
the future, using a WakeupManager supplied to the RetryTask constructor.

During the time between a failed attempt and its retry, there does not 
seem to be any control to prevent conflicting tasks from entering the 
same TaskManager. Some of those tasks would have waited for the task 
being retried, if it had been in the TaskManager at their time of 
arrival. Delayed retry and dependence on sequence number seem 
incompatible. Notifier.NotifyTask and JoinManager.ProxyRegTask both 
extend RetryTask and have dependencies. JoinManager.ProxyRegTask uses a 
sequence number, but probably does not need to, and should not. The 
intent seems to be to run tasks for a given service one-at-a-time until 
its ServiceId has been set.


    ServiceDiscoveryManager.CacheTask

Most subclasses inherit a "return false;" runAfter. The exceptions are 
ServiceIdTask, its subclasses, and LookupTask. Both have sequence number 
dependencies. It is not yet clear whether ServiceDiscoveryManager is 
ensuring that tasks enter the TaskManager in sequence number order. If 
it does, the code is correct, but wastes time with a trivially true 
check. If not, the code is incorrect relative to the comments, which 
seem to expect order.






Re: test failure repeatability

Posted by Patricia Shanahan <pa...@acm.org>.
I've sent Peter some notes that I hope he can make available - I don't 
think I can send attachments to the list.

Rereading my notes has reminded me that I had special concerns with 
RetryTask. Is that still used? If so, I'll explain the problem.

Patricia

On 4/1/2013 10:12 AM, Dan Creswell wrote:
> It's certainly still being used in Outrigger for notifies and Mahalo for
> transaction settling amongst other things so yep, any notes you've got
> would be worthwhile.
>
> Ta,
>
> Dan.
>
> On 1 April 2013 18:01, Patricia Shanahan <pa...@acm.org> wrote:
>
>> Are you still using River's TaskManager? When I was looking at its
>> performance I noticed some things that seemed dubious to me from the point
>> of view of concurrency. I may still have some notes that would be useful
>> for a concurrency bug hunt.
>>
>> Patricia



Re: test failure repeatability

Posted by Dan Creswell <da...@gmail.com>.
It's certainly still being used in Outrigger for notifies and Mahalo for
transaction settling amongst other things so yep, any notes you've got
would be worthwhile.

Ta,

Dan.

On 1 April 2013 18:01, Patricia Shanahan <pa...@acm.org> wrote:

> Are you still using River's TaskManager? When I was looking at its
> performance I noticed some things that seemed dubious to me from the point
> of view of concurrency. I may still have some notes that would be useful
> for a concurrency bug hunt.
>
> Patricia
>
>
> On 4/1/2013 4:51 AM, Bryan Thompson wrote:
>
>> I am uncomfortable with a release that has known concurrency problems.  I
>> am also uncomfortable that the custom Levels serialization change by
>> Oracle [1] has broken the current release for new JVMs.  I would like to
>> see a minor release which fixes that serialization problem and a candidate
>> release which gives people a chance to discovery concurrent issues without
>> risking a release that is known to be unstable.
>>
>> I'm happy to review a few of the classes with known concurrency problems
>> to see if I can help nail some of these bugs.  Since I do not know the
>> river internals, I would only be able to spot concurrency problems that
>> exist within a class.  I am not in a good position to comment on
>> concurrency problems that might arise through the interactions among
>> classes.
>>
>> Bryan
>>
>> [1] https://issues.apache.org/**jira/browse/RIVER-416<https://issues.apache.org/jira/browse/RIVER-416>
>>
>> On 4/1/13 6:14 AM, "Peter Firmstone" <ji...@zeus.net.au> wrote:
>>
>>  The attachments will be removed from the list, so I've cc'd you, anyone
>>> who's interested, let me know I can forward the attachments.  They can
>>> be opened with jvisualvm.
>>>
>>> The profiling isn't perfect, the test runs for about 8.5 minutes, so
>>> hotspot should have kicked in relatively early in both test runs.
>>>
>>> I guess a significant problem is; the more I remove performance
>>> impedances, like unnecessary DNS calls, the faster multithreading and
>>> context switching gets.
>>>
>>> Not only did the old policy providers create contention, but it was
>>> slower for single threaded performance (I'll have to run the previous
>>> release branch for comparison when I get some time).
>>>
>>> The URIGrant.implies call is now down to .228 ms per invocation, down
>>> from 1.68 ms per invocation this week, which was already quite good (on
>>> old UltraSparcII hardware), during stress tests this method is called
>>> almost 40,000 times.
>>>
>>> In comparison the old policy provider which required a DNS call (every
>>> time CodeSource.implies is called, functionality now replaced by
>>> URIGrant.implies), the old policy provider also cached all Permission's
>>> in highly contended PermissionCollection's, which during network calls
>>> invoked SocketPermission.implies, possibly for every SocketPermission in
>>> the PermissionCollection, DNS is also consulted by
>>> SocketPermission.implies, while synchronized, ouch!
>>>
>>> URIGrant.implies is non blocking, that's right zero contention.
>>> SocketPermission.implies DNS calls can be avoided in most cases if
>>> PermissionComparator finds an exact match or wild card.
>>>
>>> This is without the CombinerSecurityManager, which improves security
>>> performance by a factor of 10 (SocketPermission is only checked once for
>>> each AccessControlContext).
>>>
>>> PreferredClassProvider no longer creates unecessary DNS calls, and
>>> neither does SecureClassLoader,  URLClassLoader or PreferredClassLoader.
>>>
>>> So the good news is the next release will feel much faster, the bad news
>>> is that existing concurrency bugs that previously didn't appear during
>>> test runs, but likely to manifest during production are now occurring
>>> during testing.  There's more good news, we fixed a number of
>>> concurrency bugs since the last release too, I guess I have to draw the
>>> line somewhere and cut a release.
>>>
>>> DNS calls haven't been completely eliminated as some are still
>>> necessary, but a multitude of unnecessary DNS calls have been eliminated.
>>>
>>> Regards,
>>>
>>> Peter.
>>>
>>> Peter Firmstone wrote:
>>>
>>>> They've passed more consistently in the past, they're either
>>>> concurrency bugs or network timing related, it would be nice to at
>>>> least determine if it's the former or latter.
>>>>
>>>> Cheers,
>>>>
>>>> Peter.
>>>>
>>>> Tom Hobbs wrote:
>>>>
>>>>> Are these all new failures, i.e. were they working before?  Or are they
>>>>> "new" failures in that the test categories have only recently been
>>>>> reactivated and the failures discovered?
>>>>>
>>>>> Are they a big enough blocker to stop a release?
>>>>>
>>>>>
>>>>> On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <ji...@zeus.net.au>
>>>>> wrote:
>>>>>
>>>>>
>>>>>  The following test fails 30 times in a run of 130 tests:
>>>>>>
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>> [java]  #       of      tests   passed  =       0
>>>>>> [java]  #       of      tests   passed  =       1
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>          100
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> Buildfile: build.xml
>>>>>>
>>>>>> qa.run-tests:
>>>>>>
>>>>>> james-brown:
>>>>>>    [delete] Deleting directory /opt/src/River_Fixed/**
>>>>>> peterConcurrentPolicy/qa/soul
>>>>>>     [mkdir] Created dir: /opt/src/River_Fixed/**
>>>>>> peterConcurrentPolicy/qa/soul
>>>>>>     [touch] Creating
>>>>>> /opt/src/River_Fixed/****peterConcurrentPolicy/qa/soul/***
>>>>>> *soul.201303312239034808
>>>>>>
>>>>>> run-tests:
>>>>>>      [java]
>>>>>>      [java] ------------------------------****-----------
>>>>>>      [java] CONFIGURATION FILE:
>>>>>>      [java]
>>>>>>      [java]    /opt/src/River_Fixed/****
>>>>>> peterConcurrentPolicy/qa/src/****
>>>>>> com/sun/jini/test/resources/****qaHarness.prop
>>>>>>      [java]
>>>>>>      [java] ------------------------------****-----------
>>>>>>      [java] SETTING UP THE TEST LIST:
>>>>>>      [java]
>>>>>>      [java]    Adding test:
>>>>>> com/sun/jini/test/spec/****javaspace/conformance/
>>>>>> **snapshot/****SnapshotExpirationNotifyTest.****td
>>>>>>      [java]
>>>>>>      [java] ------------------------------****-----------
>>>>>>      [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>>>>>>      [java]
>>>>>>      [java]    Date started:
>>>>>>      [java]       Sun Mar 31 22:39:37 EST 2013
>>>>>>      [java]    Installation directory of the JSK:
>>>>>>      [java]       com.sun.jini.jsk.home=/opt/****src/River_Fixed/**
>>>>>> peterConcurrentPolicy
>>>>>>      [java]    Installation directory of the harness:
>>>>>>      [java]       com.sun.jini.qa.home=/opt/src/****River_Fixed/**
>>>>>> peterConcurrentPolicy/qa
>>>>>>      [java]    Categories being tested:
>>>>>>      [java]       categories=No Categories
>>>>>>      [java] ------------------------------****-----------
>>>>>>      [java] ENVIRONMENT PROPERTIES:
>>>>>>      [java]
>>>>>>      [java]    JVM information:
>>>>>>      [java]       Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
>>>>>>      [java]       Sun Microsystems Inc.
>>>>>>      [java]    OS information:
>>>>>>      [java]       SunOS, 5.10, sparc
>>>>>>      [java]
>>>>>>      [java] ------------------------------****-----------
>>>>>>      [java] STARTING TO RUN THE TESTS
>>>>>>      [java]
>>>>>>      [java]
>>>>>>      [java] Running com/sun/jini/test/spec/****
>>>>>> javaspace/conformance/**
>>>>>> snapshot/****SnapshotExpirationNotifyTest.****td
>>>>>>      [java] Time is Sun Mar 31 22:39:38 EST 2013
>>>>>>      [java] Starting test in separate process with command:
>>>>>>      [java] /usr/jdk/jdk1.6.0_30/jre/bin/****java
>>>>>>
>>>>>> -Djava.security.manager=org.****apache.river.api.security.****
>>>>>> CombinerSecur
>>>>>> ityManager
>>>>>>
>>>>>> -Djava.security.policy=file:/****opt/src/River_Fixed/**
>>>>>> peterConcurrentPolicy/qa/****harness/policy/defaulttest.****policy
>>>>>>
>>>>>> -Djava.rmi.server.codebase=**htt**p://bluto:9082/qa1-**
>>>>>> javaspace-**dl.jar<h
>>>>>> ttp://bluto:9082/qa1-**javaspace-dl.jar>-cp
>>>>>> /opt/src/River_Fixed/
>>>>>>
>>>>>> **peterConcurrentPolicy/qa/**lib/**jiniharness.jar:/opt/**
>>>>>> src/**River_Fixed
>>>>>> /**
>>>>>>
>>>>>> peterConcurrentPolicy/qa/lib/****jinitests.jar:/opt/src/River_****
>>>>>>
>>>>>> Fixed/peterConcurrentPolicy/****lib/jsk-platform.jar:/opt/src/**
>>>>>> **River_Fix
>>>>>> ed/
>>>>>>
>>>>>> **peterConcurrentPolicy/lib/**jsk-**lib.jar:/opt/src/River_**Fixed/**
>>>>>>
>>>>>> peterConcurrentPolicy/lib/****high-scale-lib.jar:/opt/src/****
>>>>>> River_Fixed/*
>>>>>> *
>>>>>>
>>>>>> peterConcurrentPolicy/lib/****custard-apple-1.0.2.jar -ea -esa
>>>>>> -client
>>>>>> -Djava.ext.dirs=/usr/jdk/jdk1.****6.0_30/jre/lib/ext:/usr/jdk/****
>>>>>>
>>>>>> packages/lib/ext:/opt/src/****River_Fixed/****
>>>>>> peterConcurrentPolicy/qa/lib-
>>>>>> **
>>>>>>
>>>>>> ext:/opt/src/River_Fixed/****peterConcurrentPolicy/lib-ext
>>>>>> -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>>>>>> -Dcom.sun.jini.jsk.home=/opt/****src/River_Fixed/****
>>>>>> peterConcurrentPolicy
>>>>>>
>>>>>> -Dcom.sun.jini.qa.home=/opt/****src/River_Fixed/****
>>>>>> peterConcurrentPolicy/q
>>>>>> a
>>>>>>
>>>>>> -Dcom.sun.jini.qa.harness.****harnessJar=/opt/src/River_**
>>>>>> Fixed/peterConcurrentPolicy/****qa/lib/jiniharness.jar
>>>>>> -Dcom.sun.jini.qa.harness.****testJar=/opt/src/River_Fixed/****
>>>>>> peterConcurrentPolicy/qa/lib/****jinitests.jar
>>>>>> -Dcom.sun.jini.qa.harness.****runjiniserver=true
>>>>>> -Dcom.sun.jini.qa.harness.****runkitserver=true
>>>>>> -Djava.security.properties=*
>>>>>> *file:/opt/src/River_Fixed/****peterConcurrentPolicy/qa/**
>>>>>> harness/trust/dynamic-policy.****properties
>>>>>> -Dcom.sun.jini.qa.harness.****testhosts=
>>>>>> -Djava.util.logging.config.****file=/home/peter/logging.****
>>>>>> properties
>>>>>>
>>>>>> -Dcom.sun.jini.test.home=/opt/****src/River_Fixed/****
>>>>>> peterConcurrentPolicy
>>>>>> /qa
>>>>>>
>>>>>> -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>>>>>> policies=file:/opt/src/River_****Fixed/peterConcurrentPolicy/****
>>>>>> qa/src/com/sun/jini/test/****resources/jinitest.policy
>>>>>> -Djava.ext.dirs=/usr/jdk/jdk1.****6.0_30/jre/lib/ext:/usr/jdk/****
>>>>>>
>>>>>> packages/lib/ext:/opt/src/****River_Fixed/****
>>>>>> peterConcurrentPolicy/qa/lib-
>>>>>> **
>>>>>>
>>>>>> ext:/opt/src/River_Fixed/****peterConcurrentPolicy/lib-ext
>>>>>> com.sun.jini.qa.harness.****MasterTest com/sun/jini/test/spec/**
>>>>>> javaspace/conformance/****snapshot/****SnapshotExpirationNotifyTest.*
>>>>>> ***td
>>>>>>      [java] com.sun.jini.qa.harness.****TestException: Not all
>>>>>> listeners've
>>>>>> got expected number of events.
>>>>>>      [java]     at com.sun.jini.test.spec.****
>>>>>> javaspace.conformance.**
>>>>>> snapshot.****SnapshotExpirationNotifyTest.****run(**
>>>>>> SnapshotExpirationNotifyTest.****java:370)
>>>>>>      [java]     at
>>>>>> com.sun.jini.qa.harness.****MasterTest.doTest(MasterTest.*
>>>>>> *java:256)
>>>>>>      [java]     at
>>>>>> com.sun.jini.qa.harness.****MasterTest.main(MasterTest.**
>>>>>> java:144)
>>>>>>      [java]
>>>>>>      [java] TIME: 10:42:54 PM
>>>>>>      [java]
>>>>>>      [java] Test process was destroyed and returned code 1
>>>>>>      [java]
>>>>>> com/sun/jini/test/spec/****javaspace/conformance/****snapshot/**
>>>>>> SnapshotExpirationNotifyTest.****td
>>>>>>      [java] Test Failed: Test Failed:
>>>>>> com.sun.jini.qa.harness.****TestException:
>>>>>> Not all listeners've got expected number of events.
>>>>>>      [java]
>>>>>>      [java]
>>>>>>      [java] ------------------------------****-----------
>>>>>>      [java]
>>>>>>      [java] SUMMARY ==============================****===
>>>>>>      [java]
>>>>>>      [java]
>>>>>> com/sun/jini/test/spec/****javaspace/conformance/****snapshot/**
>>>>>> SnapshotExpirationNotifyTest.****td
>>>>>>      [java] Test Failed: Test Failed:
>>>>>> com.sun.jini.qa.harness.****TestException:
>>>>>> Not all listeners've got expected number of events.
>>>>>>      [java]
>>>>>>      [java] ------------------------------****-----------
>>>>>>      [java]
>>>>>>      [java] # of tests started   = 1
>>>>>>      [java] # of tests completed = 1
>>>>>>      [java] # of tests passed    = 0
>>>>>>      [java] # of tests failed    = 1
>>>>>>      [java]
>>>>>>      [java] ------------------------------****-----------
>>>>>>      [java]
>>>>>>      [java]    Date finished:
>>>>>>      [java]       Sun Mar 31 22:42:59 EST 2013
>>>>>>      [java]    Time elapsed:
>>>>>>      [java]       201 seconds
>>>>>>      [java]
>>>>>>      [java] Java Result: 1
>>>>>>
>>>>>> collect-result:
>>>>>>
>>>>>> BUILD FAILED
>>>>>> /opt/src/River_Fixed/****peterConcurrentPolicy/build.****xml:2105:
>>>>>> The
>>>>>> following error occurred while executing this line:
>>>>>> /opt/src/River_Fixed/****peterConcurrentPolicy/qa/****build.xml:357:
>>>>>> condition satisfied
>>>>>>
>>>>>> Total time: 3 minutes 30 seconds
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>
>>>>>
>>>>
>>>>
>>>
>>
>

Re: test failure repeatability

Posted by Patricia Shanahan <pa...@acm.org>.
Are you still using River's TaskManager? When I was looking at its 
performance I noticed some things that seemed dubious to me from the 
point of view of concurrency. I may still have some notes that would be 
useful for a concurrency bug hunt.

Patricia

On 4/1/2013 4:51 AM, Bryan Thompson wrote:
> I am uncomfortable with a release that has known concurrency problems.  I
> am also uncomfortable that the custom Levels serialization change by
> Oracle [1] has broken the current release for new JVMs.  I would like to
> see a minor release which fixes that serialization problem and a candidate
> release which gives people a chance to discovery concurrent issues without
> risking a release that is known to be unstable.
>
> I'm happy to review a few of the classes with known concurrency problems
> to see if I can help nail some of these bugs.  Since I do not know the
> river internals, I would only be able to spot concurrency problems that
> exist within a class.  I am not in a good position to comment on
> concurrency problems that might arise through the interactions among
> classes.
>
> Bryan
>
> [1] https://issues.apache.org/jira/browse/RIVER-416
>
> On 4/1/13 6:14 AM, "Peter Firmstone" <ji...@zeus.net.au> wrote:
>
>> The attachments will be removed from the list, so I've cc'd you, anyone
>> who's interested, let me know I can forward the attachments.  They can
>> be opened with jvisualvm.
>>
>> The profiling isn't perfect, the test runs for about 8.5 minutes, so
>> hotspot should have kicked in relatively early in both test runs.
>>
>> I guess a significant problem is; the more I remove performance
>> impedances, like unnecessary DNS calls, the faster multithreading and
>> context switching gets.
>>
>> Not only did the old policy providers create contention, but it was
>> slower for single threaded performance (I'll have to run the previous
>> release branch for comparison when I get some time).
>>
>> The URIGrant.implies call is now down to .228 ms per invocation, down
>>from 1.68 ms per invocation this week, which was already quite good (on
>> old UltraSparcII hardware), during stress tests this method is called
>> almost 40,000 times.
>>
>> In comparison the old policy provider which required a DNS call (every
>> time CodeSource.implies is called, functionality now replaced by
>> URIGrant.implies), the old policy provider also cached all Permission's
>> in highly contended PermissionCollection's, which during network calls
>> invoked SocketPermission.implies, possibly for every SocketPermission in
>> the PermissionCollection, DNS is also consulted by
>> SocketPermission.implies, while synchronized, ouch!
>>
>> URIGrant.implies is non blocking, that's right zero contention.
>> SocketPermission.implies DNS calls can be avoided in most cases if
>> PermissionComparator finds an exact match or wild card.
>>
>> This is without the CombinerSecurityManager, which improves security
>> performance by a factor of 10 (SocketPermission is only checked once for
>> each AccessControlContext).
>>
>> PreferredClassProvider no longer creates unecessary DNS calls, and
>> neither does SecureClassLoader,  URLClassLoader or PreferredClassLoader.
>>
>> So the good news is the next release will feel much faster, the bad news
>> is that existing concurrency bugs that previously didn't appear during
>> test runs, but likely to manifest during production are now occurring
>> during testing.  There's more good news, we fixed a number of
>> concurrency bugs since the last release too, I guess I have to draw the
>> line somewhere and cut a release.
>>
>> DNS calls haven't been completely eliminated as some are still
>> necessary, but a multitude of unnecessary DNS calls have been eliminated.
>>
>> Regards,
>>
>> Peter.
>>
>> Peter Firmstone wrote:
>>> They've passed more consistently in the past, they're either
>>> concurrency bugs or network timing related, it would be nice to at
>>> least determine if it's the former or latter.
>>>
>>> Cheers,
>>>
>>> Peter.
>>>
>>> Tom Hobbs wrote:
>>>> Are these all new failures, i.e. were they working before?  Or are they
>>>> "new" failures in that the test categories have only recently been
>>>> reactivated and the failures discovered?
>>>>
>>>> Are they a big enough blocker to stop a release?
>>>>
>>>>
>>>> On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <ji...@zeus.net.au>
>>>> wrote:
>>>>
>>>>
>>>>> The following test fails 30 times in a run of 130 tests:
>>>>>
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>> [java]  #       of      tests   passed  =       0
>>>>> [java]  #       of      tests   passed  =       1
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>          100
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> Buildfile: build.xml
>>>>>
>>>>> qa.run-tests:
>>>>>
>>>>> james-brown:
>>>>>    [delete] Deleting directory /opt/src/River_Fixed/**
>>>>> peterConcurrentPolicy/qa/soul
>>>>>     [mkdir] Created dir: /opt/src/River_Fixed/**
>>>>> peterConcurrentPolicy/qa/soul
>>>>>     [touch] Creating
>>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
>>>>> *soul.201303312239034808
>>>>>
>>>>> run-tests:
>>>>>      [java]
>>>>>      [java] ------------------------------**-----------
>>>>>      [java] CONFIGURATION FILE:
>>>>>      [java]
>>>>>      [java]    /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
>>>>> com/sun/jini/test/resources/**qaHarness.prop
>>>>>      [java]
>>>>>      [java] ------------------------------**-----------
>>>>>      [java] SETTING UP THE TEST LIST:
>>>>>      [java]
>>>>>      [java]    Adding test:
>>>>> com/sun/jini/test/spec/**javaspace/conformance/
>>>>> **snapshot/**SnapshotExpirationNotifyTest.**td
>>>>>      [java]
>>>>>      [java] ------------------------------**-----------
>>>>>      [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>>>>>      [java]
>>>>>      [java]    Date started:
>>>>>      [java]       Sun Mar 31 22:39:37 EST 2013
>>>>>      [java]    Installation directory of the JSK:
>>>>>      [java]       com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
>>>>> peterConcurrentPolicy
>>>>>      [java]    Installation directory of the harness:
>>>>>      [java]       com.sun.jini.qa.home=/opt/src/**River_Fixed/**
>>>>> peterConcurrentPolicy/qa
>>>>>      [java]    Categories being tested:
>>>>>      [java]       categories=No Categories
>>>>>      [java] ------------------------------**-----------
>>>>>      [java] ENVIRONMENT PROPERTIES:
>>>>>      [java]
>>>>>      [java]    JVM information:
>>>>>      [java]       Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
>>>>>      [java]       Sun Microsystems Inc.
>>>>>      [java]    OS information:
>>>>>      [java]       SunOS, 5.10, sparc
>>>>>      [java]
>>>>>      [java] ------------------------------**-----------
>>>>>      [java] STARTING TO RUN THE TESTS
>>>>>      [java]
>>>>>      [java]
>>>>>      [java] Running com/sun/jini/test/spec/**javaspace/conformance/**
>>>>> snapshot/**SnapshotExpirationNotifyTest.**td
>>>>>      [java] Time is Sun Mar 31 22:39:38 EST 2013
>>>>>      [java] Starting test in separate process with command:
>>>>>      [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
>>>>>
>>>>> -Djava.security.manager=org.**apache.river.api.security.**CombinerSecur
>>>>> ityManager
>>>>>
>>>>> -Djava.security.policy=file:/**opt/src/River_Fixed/**
>>>>> peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
>>>>>
>>>>> -Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<h
>>>>> ttp://bluto:9082/qa1-javaspace-dl.jar>-cp
>>>>> /opt/src/River_Fixed/
>>>>>
>>>>> **peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed
>>>>> /**
>>>>>
>>>>> peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
>>>>>
>>>>> Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fix
>>>>> ed/
>>>>>
>>>>> **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
>>>>>
>>>>> peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/*
>>>>> *
>>>>>
>>>>> peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa -client
>>>>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>>>>>
>>>>> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
>>>>> **
>>>>>
>>>>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>>>>> -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>>>>> -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>>>>>
>>>>> -Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/q
>>>>> a
>>>>>
>>>>> -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
>>>>> Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
>>>>> -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
>>>>> peterConcurrentPolicy/qa/lib/**jinitests.jar
>>>>> -Dcom.sun.jini.qa.harness.**runjiniserver=true
>>>>> -Dcom.sun.jini.qa.harness.**runkitserver=true
>>>>> -Djava.security.properties=*
>>>>> *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
>>>>> harness/trust/dynamic-policy.**properties
>>>>> -Dcom.sun.jini.qa.harness.**testhosts=
>>>>> -Djava.util.logging.config.**file=/home/peter/logging.**properties
>>>>>
>>>>> -Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>>>>> /qa
>>>>>
>>>>> -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>>>>> policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
>>>>> qa/src/com/sun/jini/test/**resources/jinitest.policy
>>>>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>>>>>
>>>>> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
>>>>> **
>>>>>
>>>>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>>>>> com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
>>>>> javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
>>>>>      [java] com.sun.jini.qa.harness.**TestException: Not all
>>>>> listeners've
>>>>> got expected number of events.
>>>>>      [java]     at com.sun.jini.test.spec.**javaspace.conformance.**
>>>>> snapshot.**SnapshotExpirationNotifyTest.**run(**
>>>>> SnapshotExpirationNotifyTest.**java:370)
>>>>>      [java]     at
>>>>> com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
>>>>> *java:256)
>>>>>      [java]     at
>>>>> com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
>>>>> java:144)
>>>>>      [java]
>>>>>      [java] TIME: 10:42:54 PM
>>>>>      [java]
>>>>>      [java] Test process was destroyed and returned code 1
>>>>>      [java]
>>>>> com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>>>>> SnapshotExpirationNotifyTest.**td
>>>>>      [java] Test Failed: Test Failed:
>>>>> com.sun.jini.qa.harness.**TestException:
>>>>> Not all listeners've got expected number of events.
>>>>>      [java]
>>>>>      [java]
>>>>>      [java] ------------------------------**-----------
>>>>>      [java]
>>>>>      [java] SUMMARY ==============================**===
>>>>>      [java]
>>>>>      [java]
>>>>> com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>>>>> SnapshotExpirationNotifyTest.**td
>>>>>      [java] Test Failed: Test Failed:
>>>>> com.sun.jini.qa.harness.**TestException:
>>>>> Not all listeners've got expected number of events.
>>>>>      [java]
>>>>>      [java] ------------------------------**-----------
>>>>>      [java]
>>>>>      [java] # of tests started   = 1
>>>>>      [java] # of tests completed = 1
>>>>>      [java] # of tests passed    = 0
>>>>>      [java] # of tests failed    = 1
>>>>>      [java]
>>>>>      [java] ------------------------------**-----------
>>>>>      [java]
>>>>>      [java]    Date finished:
>>>>>      [java]       Sun Mar 31 22:42:59 EST 2013
>>>>>      [java]    Time elapsed:
>>>>>      [java]       201 seconds
>>>>>      [java]
>>>>>      [java] Java Result: 1
>>>>>
>>>>> collect-result:
>>>>>
>>>>> BUILD FAILED
>>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105: The
>>>>> following error occurred while executing this line:
>>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
>>>>> condition satisfied
>>>>>
>>>>> Total time: 3 minutes 30 seconds
>>>>>
>>>>>
>>>>>
>>>>
>>>>
>>>
>>>
>>
>


Re: test failure repeatability

Posted by Bryan Thompson <br...@systap.com>.
I am uncomfortable with a release that has known concurrency problems.  I
am also uncomfortable that the custom Levels serialization change by
Oracle [1] has broken the current release for new JVMs.  I would like to
see a minor release which fixes that serialization problem and a candidate
release which gives people a chance to discovery concurrent issues without
risking a release that is known to be unstable.

I'm happy to review a few of the classes with known concurrency problems
to see if I can help nail some of these bugs.  Since I do not know the
river internals, I would only be able to spot concurrency problems that
exist within a class.  I am not in a good position to comment on
concurrency problems that might arise through the interactions among
classes.

Bryan

[1] https://issues.apache.org/jira/browse/RIVER-416

On 4/1/13 6:14 AM, "Peter Firmstone" <ji...@zeus.net.au> wrote:

>The attachments will be removed from the list, so I've cc'd you, anyone
>who's interested, let me know I can forward the attachments.  They can
>be opened with jvisualvm.
>
>The profiling isn't perfect, the test runs for about 8.5 minutes, so
>hotspot should have kicked in relatively early in both test runs.
>
>I guess a significant problem is; the more I remove performance
>impedances, like unnecessary DNS calls, the faster multithreading and
>context switching gets.
>
>Not only did the old policy providers create contention, but it was
>slower for single threaded performance (I'll have to run the previous
>release branch for comparison when I get some time).
>
>The URIGrant.implies call is now down to .228 ms per invocation, down
>from 1.68 ms per invocation this week, which was already quite good (on
>old UltraSparcII hardware), during stress tests this method is called
>almost 40,000 times.
>
>In comparison the old policy provider which required a DNS call (every
>time CodeSource.implies is called, functionality now replaced by
>URIGrant.implies), the old policy provider also cached all Permission's
>in highly contended PermissionCollection's, which during network calls
>invoked SocketPermission.implies, possibly for every SocketPermission in
>the PermissionCollection, DNS is also consulted by
>SocketPermission.implies, while synchronized, ouch!
>
>URIGrant.implies is non blocking, that's right zero contention.
>SocketPermission.implies DNS calls can be avoided in most cases if
>PermissionComparator finds an exact match or wild card.
>
>This is without the CombinerSecurityManager, which improves security
>performance by a factor of 10 (SocketPermission is only checked once for
>each AccessControlContext).
>
>PreferredClassProvider no longer creates unecessary DNS calls, and
>neither does SecureClassLoader,  URLClassLoader or PreferredClassLoader.
>
>So the good news is the next release will feel much faster, the bad news
>is that existing concurrency bugs that previously didn't appear during
>test runs, but likely to manifest during production are now occurring
>during testing.  There's more good news, we fixed a number of
>concurrency bugs since the last release too, I guess I have to draw the
>line somewhere and cut a release.
>
>DNS calls haven't been completely eliminated as some are still
>necessary, but a multitude of unnecessary DNS calls have been eliminated.
>
>Regards,
>
>Peter.
>
>Peter Firmstone wrote:
>> They've passed more consistently in the past, they're either
>> concurrency bugs or network timing related, it would be nice to at
>> least determine if it's the former or latter.
>>
>> Cheers,
>>
>> Peter.
>>
>> Tom Hobbs wrote:
>>> Are these all new failures, i.e. were they working before?  Or are they
>>> "new" failures in that the test categories have only recently been
>>> reactivated and the failures discovered?
>>>
>>> Are they a big enough blocker to stop a release?
>>>
>>>
>>> On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <ji...@zeus.net.au>
>>> wrote:
>>>
>>>
>>>> The following test fails 30 times in a run of 130 tests:
>>>>
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>> [java]  #       of      tests   passed  =       0
>>>> [java]  #       of      tests   passed  =       1
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>         100
>>>>
>>>>
>>>>
>>>>
>>>> Buildfile: build.xml
>>>>
>>>> qa.run-tests:
>>>>
>>>> james-brown:
>>>>   [delete] Deleting directory /opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/qa/soul
>>>>    [mkdir] Created dir: /opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/qa/soul
>>>>    [touch] Creating
>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
>>>> *soul.201303312239034808
>>>>
>>>> run-tests:
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java] CONFIGURATION FILE:
>>>>     [java]
>>>>     [java]    /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
>>>> com/sun/jini/test/resources/**qaHarness.prop
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java] SETTING UP THE TEST LIST:
>>>>     [java]
>>>>     [java]    Adding test:
>>>> com/sun/jini/test/spec/**javaspace/conformance/
>>>> **snapshot/**SnapshotExpirationNotifyTest.**td
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>>>>     [java]
>>>>     [java]    Date started:
>>>>     [java]       Sun Mar 31 22:39:37 EST 2013
>>>>     [java]    Installation directory of the JSK:
>>>>     [java]       com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
>>>> peterConcurrentPolicy
>>>>     [java]    Installation directory of the harness:
>>>>     [java]       com.sun.jini.qa.home=/opt/src/**River_Fixed/**
>>>> peterConcurrentPolicy/qa
>>>>     [java]    Categories being tested:
>>>>     [java]       categories=No Categories
>>>>     [java] ------------------------------**-----------
>>>>     [java] ENVIRONMENT PROPERTIES:
>>>>     [java]
>>>>     [java]    JVM information:
>>>>     [java]       Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
>>>>     [java]       Sun Microsystems Inc.
>>>>     [java]    OS information:
>>>>     [java]       SunOS, 5.10, sparc
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java] STARTING TO RUN THE TESTS
>>>>     [java]
>>>>     [java]
>>>>     [java] Running com/sun/jini/test/spec/**javaspace/conformance/**
>>>> snapshot/**SnapshotExpirationNotifyTest.**td
>>>>     [java] Time is Sun Mar 31 22:39:38 EST 2013
>>>>     [java] Starting test in separate process with command:
>>>>     [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
>>>>
>>>>-Djava.security.manager=org.**apache.river.api.security.**CombinerSecur
>>>>ityManager
>>>>
>>>> -Djava.security.policy=file:/**opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
>>>>
>>>>-Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<h
>>>>ttp://bluto:9082/qa1-javaspace-dl.jar>-cp
>>>> /opt/src/River_Fixed/
>>>>
>>>>**peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed
>>>>/**
>>>>
>>>> peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
>>>>
>>>>Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fix
>>>>ed/
>>>>
>>>> **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
>>>>
>>>>peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/*
>>>>*
>>>>
>>>> peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa -client
>>>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>>>>
>>>>packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
>>>>**
>>>>
>>>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>>>> -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>>>> -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>>>>
>>>>-Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/q
>>>>a
>>>>
>>>> -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
>>>> Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
>>>> -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
>>>> peterConcurrentPolicy/qa/lib/**jinitests.jar
>>>> -Dcom.sun.jini.qa.harness.**runjiniserver=true
>>>> -Dcom.sun.jini.qa.harness.**runkitserver=true
>>>> -Djava.security.properties=*
>>>> *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
>>>> harness/trust/dynamic-policy.**properties
>>>> -Dcom.sun.jini.qa.harness.**testhosts=
>>>> -Djava.util.logging.config.**file=/home/peter/logging.**properties
>>>>
>>>>-Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>>>>/qa
>>>>
>>>> -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>>>> policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
>>>> qa/src/com/sun/jini/test/**resources/jinitest.policy
>>>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>>>>
>>>>packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-
>>>>**
>>>>
>>>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>>>> com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
>>>> javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
>>>>     [java] com.sun.jini.qa.harness.**TestException: Not all
>>>> listeners've
>>>> got expected number of events.
>>>>     [java]     at com.sun.jini.test.spec.**javaspace.conformance.**
>>>> snapshot.**SnapshotExpirationNotifyTest.**run(**
>>>> SnapshotExpirationNotifyTest.**java:370)
>>>>     [java]     at
>>>> com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
>>>> *java:256)
>>>>     [java]     at
>>>> com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
>>>> java:144)
>>>>     [java]
>>>>     [java] TIME: 10:42:54 PM
>>>>     [java]
>>>>     [java] Test process was destroyed and returned code 1
>>>>     [java]
>>>>com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>>>> SnapshotExpirationNotifyTest.**td
>>>>     [java] Test Failed: Test Failed:
>>>> com.sun.jini.qa.harness.**TestException:
>>>> Not all listeners've got expected number of events.
>>>>     [java]
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java]
>>>>     [java] SUMMARY ==============================**===
>>>>     [java]
>>>>     [java]
>>>>com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>>>> SnapshotExpirationNotifyTest.**td
>>>>     [java] Test Failed: Test Failed:
>>>> com.sun.jini.qa.harness.**TestException:
>>>> Not all listeners've got expected number of events.
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java]
>>>>     [java] # of tests started   = 1
>>>>     [java] # of tests completed = 1
>>>>     [java] # of tests passed    = 0
>>>>     [java] # of tests failed    = 1
>>>>     [java]
>>>>     [java] ------------------------------**-----------
>>>>     [java]
>>>>     [java]    Date finished:
>>>>     [java]       Sun Mar 31 22:42:59 EST 2013
>>>>     [java]    Time elapsed:
>>>>     [java]       201 seconds
>>>>     [java]
>>>>     [java] Java Result: 1
>>>>
>>>> collect-result:
>>>>
>>>> BUILD FAILED
>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105: The
>>>> following error occurred while executing this line:
>>>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
>>>> condition satisfied
>>>>
>>>> Total time: 3 minutes 30 seconds
>>>>
>>>>
>>>>
>>>
>>>
>>
>>
>


Re: test failure repeatability

Posted by Peter Firmstone <ji...@zeus.net.au>.
The attachments will be removed from the list, so I've cc'd you, anyone 
who's interested, let me know I can forward the attachments.  They can 
be opened with jvisualvm.

The profiling isn't perfect, the test runs for about 8.5 minutes, so 
hotspot should have kicked in relatively early in both test runs.

I guess a significant problem is; the more I remove performance 
impedances, like unnecessary DNS calls, the faster multithreading and 
context switching gets.

Not only did the old policy providers create contention, but it was 
slower for single threaded performance (I'll have to run the previous 
release branch for comparison when I get some time).

The URIGrant.implies call is now down to .228 ms per invocation, down 
from 1.68 ms per invocation this week, which was already quite good (on 
old UltraSparcII hardware), during stress tests this method is called 
almost 40,000 times.

In comparison the old policy provider which required a DNS call (every 
time CodeSource.implies is called, functionality now replaced by 
URIGrant.implies), the old policy provider also cached all Permission's 
in highly contended PermissionCollection's, which during network calls 
invoked SocketPermission.implies, possibly for every SocketPermission in 
the PermissionCollection, DNS is also consulted by 
SocketPermission.implies, while synchronized, ouch!

URIGrant.implies is non blocking, that's right zero contention.  
SocketPermission.implies DNS calls can be avoided in most cases if 
PermissionComparator finds an exact match or wild card.

This is without the CombinerSecurityManager, which improves security 
performance by a factor of 10 (SocketPermission is only checked once for 
each AccessControlContext).

PreferredClassProvider no longer creates unecessary DNS calls, and 
neither does SecureClassLoader,  URLClassLoader or PreferredClassLoader.

So the good news is the next release will feel much faster, the bad news 
is that existing concurrency bugs that previously didn't appear during 
test runs, but likely to manifest during production are now occurring 
during testing.  There's more good news, we fixed a number of 
concurrency bugs since the last release too, I guess I have to draw the 
line somewhere and cut a release.

DNS calls haven't been completely eliminated as some are still 
necessary, but a multitude of unnecessary DNS calls have been eliminated.

Regards,

Peter.

Peter Firmstone wrote:
> They've passed more consistently in the past, they're either 
> concurrency bugs or network timing related, it would be nice to at 
> least determine if it's the former or latter.
>
> Cheers,
>
> Peter.
>
> Tom Hobbs wrote:
>> Are these all new failures, i.e. were they working before?  Or are they
>> "new" failures in that the test categories have only recently been
>> reactivated and the failures discovered?
>>
>> Are they a big enough blocker to stop a release?
>>
>>
>> On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <ji...@zeus.net.au> 
>> wrote:
>>
>>  
>>> The following test fails 30 times in a run of 130 tests:
>>>
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>> [java]  #       of      tests   passed  =       0
>>> [java]  #       of      tests   passed  =       1
>>>
>>>
>>>
>>>
>>>
>>>
>>>         100
>>>
>>>
>>>
>>>
>>> Buildfile: build.xml
>>>
>>> qa.run-tests:
>>>
>>> james-brown:
>>>   [delete] Deleting directory /opt/src/River_Fixed/**
>>> peterConcurrentPolicy/qa/soul
>>>    [mkdir] Created dir: /opt/src/River_Fixed/**
>>> peterConcurrentPolicy/qa/soul
>>>    [touch] Creating 
>>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
>>> *soul.201303312239034808
>>>
>>> run-tests:
>>>     [java]
>>>     [java] ------------------------------**-----------
>>>     [java] CONFIGURATION FILE:
>>>     [java]
>>>     [java]    /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
>>> com/sun/jini/test/resources/**qaHarness.prop
>>>     [java]
>>>     [java] ------------------------------**-----------
>>>     [java] SETTING UP THE TEST LIST:
>>>     [java]
>>>     [java]    Adding test: 
>>> com/sun/jini/test/spec/**javaspace/conformance/
>>> **snapshot/**SnapshotExpirationNotifyTest.**td
>>>     [java]
>>>     [java] ------------------------------**-----------
>>>     [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>>>     [java]
>>>     [java]    Date started:
>>>     [java]       Sun Mar 31 22:39:37 EST 2013
>>>     [java]    Installation directory of the JSK:
>>>     [java]       com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
>>> peterConcurrentPolicy
>>>     [java]    Installation directory of the harness:
>>>     [java]       com.sun.jini.qa.home=/opt/src/**River_Fixed/**
>>> peterConcurrentPolicy/qa
>>>     [java]    Categories being tested:
>>>     [java]       categories=No Categories
>>>     [java] ------------------------------**-----------
>>>     [java] ENVIRONMENT PROPERTIES:
>>>     [java]
>>>     [java]    JVM information:
>>>     [java]       Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
>>>     [java]       Sun Microsystems Inc.
>>>     [java]    OS information:
>>>     [java]       SunOS, 5.10, sparc
>>>     [java]
>>>     [java] ------------------------------**-----------
>>>     [java] STARTING TO RUN THE TESTS
>>>     [java]
>>>     [java]
>>>     [java] Running com/sun/jini/test/spec/**javaspace/conformance/**
>>> snapshot/**SnapshotExpirationNotifyTest.**td
>>>     [java] Time is Sun Mar 31 22:39:38 EST 2013
>>>     [java] Starting test in separate process with command:
>>>     [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
>>> -Djava.security.manager=org.**apache.river.api.security.**CombinerSecurityManager 
>>>
>>> -Djava.security.policy=file:/**opt/src/River_Fixed/**
>>> peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
>>> -Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<http://bluto:9082/qa1-javaspace-dl.jar>-cp 
>>> /opt/src/River_Fixed/
>>> **peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed/** 
>>>
>>> peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
>>> Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fixed/ 
>>>
>>> **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
>>> peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/** 
>>>
>>> peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa -client
>>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>>> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-** 
>>>
>>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>>> -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>>> -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>>> -Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/qa 
>>>
>>> -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
>>> Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
>>> -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
>>> peterConcurrentPolicy/qa/lib/**jinitests.jar 
>>> -Dcom.sun.jini.qa.harness.**runjiniserver=true
>>> -Dcom.sun.jini.qa.harness.**runkitserver=true 
>>> -Djava.security.properties=*
>>> *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
>>> harness/trust/dynamic-policy.**properties 
>>> -Dcom.sun.jini.qa.harness.**testhosts=
>>> -Djava.util.logging.config.**file=/home/peter/logging.**properties
>>> -Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/qa 
>>>
>>> -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>>> policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
>>> qa/src/com/sun/jini/test/**resources/jinitest.policy
>>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>>> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-** 
>>>
>>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>>> com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
>>> javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
>>>     [java] com.sun.jini.qa.harness.**TestException: Not all 
>>> listeners've
>>> got expected number of events.
>>>     [java]     at com.sun.jini.test.spec.**javaspace.conformance.**
>>> snapshot.**SnapshotExpirationNotifyTest.**run(**
>>> SnapshotExpirationNotifyTest.**java:370)
>>>     [java]     at 
>>> com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
>>> *java:256)
>>>     [java]     at 
>>> com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
>>> java:144)
>>>     [java]
>>>     [java] TIME: 10:42:54 PM
>>>     [java]
>>>     [java] Test process was destroyed and returned code 1
>>>     [java] com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>>> SnapshotExpirationNotifyTest.**td
>>>     [java] Test Failed: Test Failed: 
>>> com.sun.jini.qa.harness.**TestException:
>>> Not all listeners've got expected number of events.
>>>     [java]
>>>     [java]
>>>     [java] ------------------------------**-----------
>>>     [java]
>>>     [java] SUMMARY ==============================**===
>>>     [java]
>>>     [java] com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>>> SnapshotExpirationNotifyTest.**td
>>>     [java] Test Failed: Test Failed: 
>>> com.sun.jini.qa.harness.**TestException:
>>> Not all listeners've got expected number of events.
>>>     [java]
>>>     [java] ------------------------------**-----------
>>>     [java]
>>>     [java] # of tests started   = 1
>>>     [java] # of tests completed = 1
>>>     [java] # of tests passed    = 0
>>>     [java] # of tests failed    = 1
>>>     [java]
>>>     [java] ------------------------------**-----------
>>>     [java]
>>>     [java]    Date finished:
>>>     [java]       Sun Mar 31 22:42:59 EST 2013
>>>     [java]    Time elapsed:
>>>     [java]       201 seconds
>>>     [java]
>>>     [java] Java Result: 1
>>>
>>> collect-result:
>>>
>>> BUILD FAILED
>>> /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105: The
>>> following error occurred while executing this line:
>>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
>>> condition satisfied
>>>
>>> Total time: 3 minutes 30 seconds
>>>
>>>
>>>     
>>
>>   
>
>


Re: test failure repeatability

Posted by Peter Firmstone <ji...@zeus.net.au>.
They've passed more consistently in the past, they're either concurrency 
bugs or network timing related, it would be nice to at least determine 
if it's the former or latter.

Cheers,

Peter.

Tom Hobbs wrote:
> Are these all new failures, i.e. were they working before?  Or are they
> "new" failures in that the test categories have only recently been
> reactivated and the failures discovered?
>
> Are they a big enough blocker to stop a release?
>
>
> On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <ji...@zeus.net.au> wrote:
>
>   
>> The following test fails 30 times in a run of 130 tests:
>>
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>> [java]  #       of      tests   passed  =       0
>> [java]  #       of      tests   passed  =       1
>>
>>
>>
>>
>>
>>
>>         100
>>
>>
>>
>>
>> Buildfile: build.xml
>>
>> qa.run-tests:
>>
>> james-brown:
>>   [delete] Deleting directory /opt/src/River_Fixed/**
>> peterConcurrentPolicy/qa/soul
>>    [mkdir] Created dir: /opt/src/River_Fixed/**
>> peterConcurrentPolicy/qa/soul
>>    [touch] Creating /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
>> *soul.201303312239034808
>>
>> run-tests:
>>     [java]
>>     [java] ------------------------------**-----------
>>     [java] CONFIGURATION FILE:
>>     [java]
>>     [java]    /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
>> com/sun/jini/test/resources/**qaHarness.prop
>>     [java]
>>     [java] ------------------------------**-----------
>>     [java] SETTING UP THE TEST LIST:
>>     [java]
>>     [java]    Adding test: com/sun/jini/test/spec/**javaspace/conformance/
>> **snapshot/**SnapshotExpirationNotifyTest.**td
>>     [java]
>>     [java] ------------------------------**-----------
>>     [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>>     [java]
>>     [java]    Date started:
>>     [java]       Sun Mar 31 22:39:37 EST 2013
>>     [java]    Installation directory of the JSK:
>>     [java]       com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
>> peterConcurrentPolicy
>>     [java]    Installation directory of the harness:
>>     [java]       com.sun.jini.qa.home=/opt/src/**River_Fixed/**
>> peterConcurrentPolicy/qa
>>     [java]    Categories being tested:
>>     [java]       categories=No Categories
>>     [java] ------------------------------**-----------
>>     [java] ENVIRONMENT PROPERTIES:
>>     [java]
>>     [java]    JVM information:
>>     [java]       Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
>>     [java]       Sun Microsystems Inc.
>>     [java]    OS information:
>>     [java]       SunOS, 5.10, sparc
>>     [java]
>>     [java] ------------------------------**-----------
>>     [java] STARTING TO RUN THE TESTS
>>     [java]
>>     [java]
>>     [java] Running com/sun/jini/test/spec/**javaspace/conformance/**
>> snapshot/**SnapshotExpirationNotifyTest.**td
>>     [java] Time is Sun Mar 31 22:39:38 EST 2013
>>     [java] Starting test in separate process with command:
>>     [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
>> -Djava.security.manager=org.**apache.river.api.security.**CombinerSecurityManager
>> -Djava.security.policy=file:/**opt/src/River_Fixed/**
>> peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
>> -Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<http://bluto:9082/qa1-javaspace-dl.jar>-cp /opt/src/River_Fixed/
>> **peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed/**
>> peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
>> Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fixed/
>> **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
>> peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/**
>> peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa -client
>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-**
>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>> -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
>> -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
>> -Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/qa
>> -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
>> Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
>> -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
>> peterConcurrentPolicy/qa/lib/**jinitests.jar -Dcom.sun.jini.qa.harness.**runjiniserver=true
>> -Dcom.sun.jini.qa.harness.**runkitserver=true -Djava.security.properties=*
>> *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
>> harness/trust/dynamic-policy.**properties -Dcom.sun.jini.qa.harness.**testhosts=
>> -Djava.util.logging.config.**file=/home/peter/logging.**properties
>> -Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/qa
>> -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
>> policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
>> qa/src/com/sun/jini/test/**resources/jinitest.policy
>> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
>> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-**
>> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
>> com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
>> javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
>>     [java] com.sun.jini.qa.harness.**TestException: Not all listeners've
>> got expected number of events.
>>     [java]     at com.sun.jini.test.spec.**javaspace.conformance.**
>> snapshot.**SnapshotExpirationNotifyTest.**run(**
>> SnapshotExpirationNotifyTest.**java:370)
>>     [java]     at com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
>> *java:256)
>>     [java]     at com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
>> java:144)
>>     [java]
>>     [java] TIME: 10:42:54 PM
>>     [java]
>>     [java] Test process was destroyed and returned code 1
>>     [java] com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>> SnapshotExpirationNotifyTest.**td
>>     [java] Test Failed: Test Failed: com.sun.jini.qa.harness.**TestException:
>> Not all listeners've got expected number of events.
>>     [java]
>>     [java]
>>     [java] ------------------------------**-----------
>>     [java]
>>     [java] SUMMARY ==============================**===
>>     [java]
>>     [java] com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
>> SnapshotExpirationNotifyTest.**td
>>     [java] Test Failed: Test Failed: com.sun.jini.qa.harness.**TestException:
>> Not all listeners've got expected number of events.
>>     [java]
>>     [java] ------------------------------**-----------
>>     [java]
>>     [java] # of tests started   = 1
>>     [java] # of tests completed = 1
>>     [java] # of tests passed    = 0
>>     [java] # of tests failed    = 1
>>     [java]
>>     [java] ------------------------------**-----------
>>     [java]
>>     [java]    Date finished:
>>     [java]       Sun Mar 31 22:42:59 EST 2013
>>     [java]    Time elapsed:
>>     [java]       201 seconds
>>     [java]
>>     [java] Java Result: 1
>>
>> collect-result:
>>
>> BUILD FAILED
>> /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105: The
>> following error occurred while executing this line:
>> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
>> condition satisfied
>>
>> Total time: 3 minutes 30 seconds
>>
>>
>>     
>
>   


Re: test failure repeatability

Posted by Tom Hobbs <tv...@googlemail.com>.
Are these all new failures, i.e. were they working before?  Or are they
"new" failures in that the test categories have only recently been
reactivated and the failures discovered?

Are they a big enough blocker to stop a release?


On Sun, Mar 31, 2013 at 11:31 PM, Peter Firmstone <ji...@zeus.net.au> wrote:

> The following test fails 30 times in a run of 130 tests:
>
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
> [java]  #       of      tests   passed  =       0
> [java]  #       of      tests   passed  =       1
>
>
>
>
>
>
>         100
>
>
>
>
> Buildfile: build.xml
>
> qa.run-tests:
>
> james-brown:
>   [delete] Deleting directory /opt/src/River_Fixed/**
> peterConcurrentPolicy/qa/soul
>    [mkdir] Created dir: /opt/src/River_Fixed/**
> peterConcurrentPolicy/qa/soul
>    [touch] Creating /opt/src/River_Fixed/**peterConcurrentPolicy/qa/soul/*
> *soul.201303312239034808
>
> run-tests:
>     [java]
>     [java] ------------------------------**-----------
>     [java] CONFIGURATION FILE:
>     [java]
>     [java]    /opt/src/River_Fixed/**peterConcurrentPolicy/qa/src/**
> com/sun/jini/test/resources/**qaHarness.prop
>     [java]
>     [java] ------------------------------**-----------
>     [java] SETTING UP THE TEST LIST:
>     [java]
>     [java]    Adding test: com/sun/jini/test/spec/**javaspace/conformance/
> **snapshot/**SnapshotExpirationNotifyTest.**td
>     [java]
>     [java] ------------------------------**-----------
>     [java] GENERAL HARNESS CONFIGURATION INFORMATION:
>     [java]
>     [java]    Date started:
>     [java]       Sun Mar 31 22:39:37 EST 2013
>     [java]    Installation directory of the JSK:
>     [java]       com.sun.jini.jsk.home=/opt/**src/River_Fixed/**
> peterConcurrentPolicy
>     [java]    Installation directory of the harness:
>     [java]       com.sun.jini.qa.home=/opt/src/**River_Fixed/**
> peterConcurrentPolicy/qa
>     [java]    Categories being tested:
>     [java]       categories=No Categories
>     [java] ------------------------------**-----------
>     [java] ENVIRONMENT PROPERTIES:
>     [java]
>     [java]    JVM information:
>     [java]       Java HotSpot(TM) Server VM, 20.5-b03, 32 bit VM mode
>     [java]       Sun Microsystems Inc.
>     [java]    OS information:
>     [java]       SunOS, 5.10, sparc
>     [java]
>     [java] ------------------------------**-----------
>     [java] STARTING TO RUN THE TESTS
>     [java]
>     [java]
>     [java] Running com/sun/jini/test/spec/**javaspace/conformance/**
> snapshot/**SnapshotExpirationNotifyTest.**td
>     [java] Time is Sun Mar 31 22:39:38 EST 2013
>     [java] Starting test in separate process with command:
>     [java] /usr/jdk/jdk1.6.0_30/jre/bin/**java
> -Djava.security.manager=org.**apache.river.api.security.**CombinerSecurityManager
> -Djava.security.policy=file:/**opt/src/River_Fixed/**
> peterConcurrentPolicy/qa/**harness/policy/defaulttest.**policy
> -Djava.rmi.server.codebase=htt**p://bluto:9082/qa1-javaspace-**dl.jar<http://bluto:9082/qa1-javaspace-dl.jar>-cp /opt/src/River_Fixed/
> **peterConcurrentPolicy/qa/lib/**jiniharness.jar:/opt/src/**River_Fixed/**
> peterConcurrentPolicy/qa/lib/**jinitests.jar:/opt/src/River_**
> Fixed/peterConcurrentPolicy/**lib/jsk-platform.jar:/opt/src/**River_Fixed/
> **peterConcurrentPolicy/lib/jsk-**lib.jar:/opt/src/River_Fixed/**
> peterConcurrentPolicy/lib/**high-scale-lib.jar:/opt/src/**River_Fixed/**
> peterConcurrentPolicy/lib/**custard-apple-1.0.2.jar -ea -esa -client
> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-**
> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
> -Dcom.sun.jini.jsk.port=9080 -Dcom.sun.jini.qa.port=9081
> -Dcom.sun.jini.jsk.home=/opt/**src/River_Fixed/**peterConcurrentPolicy
> -Dcom.sun.jini.qa.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/qa
> -Dcom.sun.jini.qa.harness.**harnessJar=/opt/src/River_**
> Fixed/peterConcurrentPolicy/**qa/lib/jiniharness.jar
> -Dcom.sun.jini.qa.harness.**testJar=/opt/src/River_Fixed/**
> peterConcurrentPolicy/qa/lib/**jinitests.jar -Dcom.sun.jini.qa.harness.**runjiniserver=true
> -Dcom.sun.jini.qa.harness.**runkitserver=true -Djava.security.properties=*
> *file:/opt/src/River_Fixed/**peterConcurrentPolicy/qa/**
> harness/trust/dynamic-policy.**properties -Dcom.sun.jini.qa.harness.**testhosts=
> -Djava.util.logging.config.**file=/home/peter/logging.**properties
> -Dcom.sun.jini.test.home=/opt/**src/River_Fixed/**peterConcurrentPolicy/qa
> -Dcom.sun.jini.test.port=9082 -Dcom.sun.jini.qa.harness.**
> policies=file:/opt/src/River_**Fixed/peterConcurrentPolicy/**
> qa/src/com/sun/jini/test/**resources/jinitest.policy
> -Djava.ext.dirs=/usr/jdk/jdk1.**6.0_30/jre/lib/ext:/usr/jdk/**
> packages/lib/ext:/opt/src/**River_Fixed/**peterConcurrentPolicy/qa/lib-**
> ext:/opt/src/River_Fixed/**peterConcurrentPolicy/lib-ext
> com.sun.jini.qa.harness.**MasterTest com/sun/jini/test/spec/**
> javaspace/conformance/**snapshot/**SnapshotExpirationNotifyTest.**td
>     [java] com.sun.jini.qa.harness.**TestException: Not all listeners've
> got expected number of events.
>     [java]     at com.sun.jini.test.spec.**javaspace.conformance.**
> snapshot.**SnapshotExpirationNotifyTest.**run(**
> SnapshotExpirationNotifyTest.**java:370)
>     [java]     at com.sun.jini.qa.harness.**MasterTest.doTest(MasterTest.*
> *java:256)
>     [java]     at com.sun.jini.qa.harness.**MasterTest.main(MasterTest.**
> java:144)
>     [java]
>     [java] TIME: 10:42:54 PM
>     [java]
>     [java] Test process was destroyed and returned code 1
>     [java] com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
> SnapshotExpirationNotifyTest.**td
>     [java] Test Failed: Test Failed: com.sun.jini.qa.harness.**TestException:
> Not all listeners've got expected number of events.
>     [java]
>     [java]
>     [java] ------------------------------**-----------
>     [java]
>     [java] SUMMARY ==============================**===
>     [java]
>     [java] com/sun/jini/test/spec/**javaspace/conformance/**snapshot/**
> SnapshotExpirationNotifyTest.**td
>     [java] Test Failed: Test Failed: com.sun.jini.qa.harness.**TestException:
> Not all listeners've got expected number of events.
>     [java]
>     [java] ------------------------------**-----------
>     [java]
>     [java] # of tests started   = 1
>     [java] # of tests completed = 1
>     [java] # of tests passed    = 0
>     [java] # of tests failed    = 1
>     [java]
>     [java] ------------------------------**-----------
>     [java]
>     [java]    Date finished:
>     [java]       Sun Mar 31 22:42:59 EST 2013
>     [java]    Time elapsed:
>     [java]       201 seconds
>     [java]
>     [java] Java Result: 1
>
> collect-result:
>
> BUILD FAILED
> /opt/src/River_Fixed/**peterConcurrentPolicy/build.**xml:2105: The
> following error occurred while executing this line:
> /opt/src/River_Fixed/**peterConcurrentPolicy/qa/**build.xml:357:
> condition satisfied
>
> Total time: 3 minutes 30 seconds
>
>