You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vcl.apache.org by ar...@apache.org on 2014/03/06 19:50:32 UTC
svn commit: r1574996 - in /vcl/trunk/managementnode: bin/vcld
lib/VCL/Module/Semaphore.pm lib/VCL/Module/State.pm lib/VCL/new.pm
lib/VCL/utils.pm
Author: arkurth
Date: Thu Mar 6 18:50:32 2014
New Revision: 1574996
URL: http://svn.apache.org/r1574996
Log:
VCL-734
Updated new.pm::computer_not_being_used to not use recursion. This could cause recursive loops. Added a multiple attempt loop instead.
Moved command to rename the forked process from State.pm::initialize to vcld::make_new_child. This was done to have the rename occur as early as possible. Other reservations run pgrep and use the process name to determine if another process is running for a reservation. This allows a running process to be detected sooner after a process begins.
Added Semaphore.pm::get_process_semaphore_ids. This is used in new.pm::computer_not_being_used to detect if a running reload process may be transferring an image. It's possible for old, stale semaphore lockfiles to exist in /tmp. This subroutine verifies that the lockfile matches the PID of the other running process.
Added check to State.pm::initialize if it fails to update the state to pending. There are a few cases where this is normal such as if the request was deleted in the brief time before it could be updated to pending.
Updated utils.pm::reservation_being_processed to return either an array or integer.
Modified:
vcl/trunk/managementnode/bin/vcld
vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm
vcl/trunk/managementnode/lib/VCL/Module/State.pm
vcl/trunk/managementnode/lib/VCL/new.pm
vcl/trunk/managementnode/lib/VCL/utils.pm
Modified: vcl/trunk/managementnode/bin/vcld
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/bin/vcld?rev=1574996&r1=1574995&r2=1574996&view=diff
==============================================================================
--- vcl/trunk/managementnode/bin/vcld (original)
+++ vcl/trunk/managementnode/bin/vcld Thu Mar 6 18:50:32 2014
@@ -539,6 +539,9 @@ sub make_new_child {
$ENV{state} = $state;
$ENV{data} = $data_structure;
+ # Rename this process to include some request info
+ rename_vcld_process($data_structure);
+
#notify($ERRORS{'DEBUG'}, $LOGFILE, "creating new process");
eval "use $state_module";
if ($EVAL_ERROR) {
Modified: vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm?rev=1574996&r1=1574995&r2=1574996&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/Module/Semaphore.pm Thu Mar 6 18:50:32 2014
@@ -354,6 +354,70 @@ sub get_reservation_semaphore_ids {
#/////////////////////////////////////////////////////////////////////////////
+=head2 get_process_semaphore_ids
+
+ Parameters : $pid
+ Returns : array
+ Description : Returns the Semaphore IDs opened by the process PID specified by
+ the argument. An empty list is returned if no Semaphores are
+ open.
+
+=cut
+
+sub get_process_semaphore_ids {
+ my $self = shift;
+ unless (ref($self) && $self->isa('VCL::Module')) {
+ notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method");
+ return;
+ }
+
+ my $pid = shift;
+ if (!$pid) {
+ notify($ERRORS{'WARNING'}, 0, "process PID argument was not supplied");
+ return;
+ }
+
+ my @lockfile_paths = $self->mn_os->find_files($LOCKFILE_DIRECTORY_PATH, "*.$LOCKFILE_EXTENSION");
+ if (!@lockfile_paths) {
+ notify($ERRORS{'DEBUG'}, 0, "did not find any lockfiles on this management node");
+ return ();
+ }
+
+ my @process_semaphore_ids;
+
+ for my $lockfile_path (@lockfile_paths) {
+ my ($semaphore_id) = $lockfile_path =~ /([^\/]+)\.$LOCKFILE_EXTENSION/;
+
+ my @lockfile_contents = $self->mn_os->get_file_contents($lockfile_path);
+ if (!@lockfile_contents) {
+ notify($ERRORS{'WARNING'}, 0, "failed to retrieve contents of lockfile: $lockfile_path");
+ next;
+ }
+
+ my $lockfile_line = $lockfile_contents[0];
+
+ # Line should contain a string similar to this:
+ # 31862 vclark 2376:3116 tomaintenance vclv1-42>vclh3-12.hpc.ncsu.edu vmwarewinxp-base234-v14 admin
+ my ($lockfile_pid) = $lockfile_line =~ /^(\d+) /;
+
+ if (!defined($lockfile_pid)) {
+ notify($ERRORS{'WARNING'}, 0, "failed to determine PID from 1st line in $lockfile_path: '$lockfile_line'");
+ next;
+ }
+
+ if ($lockfile_pid == $pid) {
+ notify($ERRORS{'DEBUG'}, 0, "semaphore '$semaphore_id' belongs to process $pid");
+ push @process_semaphore_ids, $semaphore_id;
+ }
+ else {
+ notify($ERRORS{'DEBUG'}, 0, "semaphore '$semaphore_id' does NOT belong to process $pid");
+ }
+ }
+ return @process_semaphore_ids;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
=head2 DESTROY
Parameters : none
Modified: vcl/trunk/managementnode/lib/VCL/Module/State.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/Module/State.pm?rev=1574996&r1=1574995&r2=1574996&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/Module/State.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/Module/State.pm Thu Mar 6 18:50:32 2014
@@ -106,9 +106,6 @@ sub initialize {
return;
}
- # Rename this process to include some request info
- rename_vcld_process($self->data);
-
# Update reservation lastcheck value to prevent processes from being forked over and over if a problem occurs
my $reservation_lastcheck = update_reservation_lastcheck($reservation_id);
if ($reservation_lastcheck) {
@@ -185,6 +182,23 @@ sub initialize {
# Update the request state to pending for this reservation
if (!update_request_state($request_id, "pending", $request_state_name)) {
+ # Check if request was deleted
+ if (is_request_deleted($request_id)) {
+ exit;
+ }
+
+ # Check the current state
+ my ($current_request_state, $current_request_laststate) = get_request_current_state_name($request_id);
+ if (!$current_request_state) {
+ # Request probably complete and already removed
+ notify($ERRORS{'DEBUG'}, 0, "current request state could not be retrieved, it was probably completed by another vcld process");
+ exit;
+ }
+ if ($current_request_state =~ /^(deleted|complete)$/ || $current_request_laststate =~ /^(deleted)$/) {
+ notify($ERRORS{'DEBUG'}, 0, "current request state: $current_request_state/$current_request_laststate, exiting");
+ exit;
+ }
+
$self->reservation_failed("failed to update request state to pending");
}
}
@@ -635,7 +649,7 @@ sub state_exit {
}
# Update the request state
- if (!is_request_deleted($request_id)) {
+ if ($request_state_name_old ne 'deleted' && !is_request_deleted($request_id)) {
# Check if the request state has already been updated
# This can occur if another reservation in a cluster failed
my ($request_state_name_current, $request_laststate_name_current) = get_request_current_state_name($request_id);
Modified: vcl/trunk/managementnode/lib/VCL/new.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/new.pm?rev=1574996&r1=1574995&r2=1574996&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/new.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/new.pm Thu Mar 6 18:50:32 2014
@@ -553,7 +553,7 @@ sub reload_image {
notify($ERRORS{'OK'}, 0, "node status not checked, node_status() not implemented by " . ref($self->provisioner) . ", assuming load=true");
}
- #If reinstall state - force reload state
+ # If reinstall state - force reload state
$computer_state_name = 'reload' if ($request_state_name eq 'reinstall');
if ($computer_state_name eq 'reload') {
@@ -699,184 +699,199 @@ sub reload_image {
sub computer_not_being_used {
my $self = shift;
+
my $request_id = $self->data->get_request_id();
my $computer_id = $self->data->get_computer_id();
my $computer_short_name = $self->data->get_computer_short_name();
- my $computer_state_name = $self->data->get_computer_state_name();
my $imagerevision_id = $self->data->get_imagerevision_id();
my $image_name = $self->data->get_image_name();
my $image_reloadtime = $self->data->get_image_reload_time();
my $request_state_name = $self->data->get_request_state_name();
- # Return 0 if computer state is maintenance, deleted, vmhostinuse
- if ($computer_state_name =~ /^(deleted|maintenance|vmhostinuse)$/) {
- notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, its state is $computer_state_name");
- return 0;
- }
-
- # Warn if computer state isn't available or reload - except for reinstall requests
- if ($request_state_name !~ /^(reinstall)$/ && $computer_state_name !~ /^(available|reload)$/) {
- notify($ERRORS{'WARNING'}, 0, "$computer_short_name state is $computer_state_name, checking if any conflicting reservations are active");
- }
-
- # Check if there is another request using this machine
- # Get a hash containing all of the reservations for the computer
- notify($ERRORS{'OK'}, 0, "retrieving info for reservations assigned to $computer_short_name");
- my $competing_request_info = get_request_by_computerid($computer_id);
-
- # There should be at least 1 request -- the one being processed
- if (!$competing_request_info) {
- notify($ERRORS{'WARNING'}, 0, "failed to retrieve any requests for computer id=$computer_id, there should be at least 1");
- return;
- }
-
- # Remove the request currently being processed from the hash
- delete $competing_request_info->{$request_id};
-
- if (!keys(%$competing_request_info)) {
- notify($ERRORS{'OK'}, 0, "$computer_short_name is not assigned to any other reservations");
- return 1;
- }
-
- # Loop through the competing requests
- COMPETING_REQUESTS: for my $competing_request_id (sort keys %$competing_request_info) {
- my $competing_reservation_id = $competing_request_info->{$competing_request_id}{data}->get_reservation_id();
- my $competing_request_state = $competing_request_info->{$competing_request_id}{data}->get_request_state_name();
- my $competing_request_laststate = $competing_request_info->{$competing_request_id}{data}->get_request_laststate_name();
- my $competing_imagerevision_id = $competing_request_info->{$competing_request_id}{data}->get_imagerevision_id();
- my $competing_request_start = $competing_request_info->{$competing_request_id}{data}->get_request_start_time();
- my $competing_request_end = $competing_request_info->{$competing_request_id}{data}->get_request_end_time();
-
- my $competing_request_start_epoch = convert_to_epoch_seconds($competing_request_start);
- my $competing_request_end_epoch = convert_to_epoch_seconds($competing_request_end);
-
- my $now_epoch = time;
-
- my $competing_request_info_string;
- $competing_request_info_string .= "request:reservation ID: $competing_request_id:$competing_reservation_id\n";
- $competing_request_info_string .= "request state: $competing_request_state/$competing_request_laststate\n";
- $competing_request_info_string .= "request start time: $competing_request_start\n";
- $competing_request_info_string .= "request end time: $competing_request_end";
-
- notify($ERRORS{'DEBUG'}, 0, "checking reservation assigned to $computer_short_name:\n$competing_request_info_string");
-
- # Check for existing image creation requests
- if ($competing_request_state =~ /^(image)$/ || $competing_request_laststate =~ /^(image)$/) {
- notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, it is assigned to an existing imaging reservation:\n$competing_request_info_string");
+ my $attempt_limit = 5;
+ ATTEMPT: for (my $attempt = 1; $attempt <= $attempt_limit; $attempt++) {
+ notify($ERRORS{'OK'}, 0, "attempt $attempt/$attempt_limit: checking for competing reservations assigned to $computer_short_name");
+ my $computer_state_name = $self->data->get_computer_state_name();
+
+ # Return 0 if computer state is maintenance, deleted, vmhostinuse
+ if ($computer_state_name =~ /^(deleted|maintenance|vmhostinuse)$/) {
+ notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, its state is $computer_state_name");
return 0;
}
- # Check for any requests in the maintenance state
- if ($competing_request_state =~ /^(maintenance)$/) {
- notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, it is assigned to an existing request in the '$competing_request_state' state:\n$competing_request_info_string");
- return 0;
+ # Warn if computer state isn't available or reload - except for reinstall requests
+ if ($request_state_name !~ /^(reinstall)$/ && $computer_state_name !~ /^(available|reload)$/) {
+ notify($ERRORS{'WARNING'}, 0, "$computer_short_name state is $computer_state_name, checking if any conflicting reservations are active");
}
- # Ignore 'complete', 'failed' requests
- if ($competing_request_state =~ /^(complete|failed)$/) {
- notify($ERRORS{'DEBUG'}, 0, "ignoring request in state: $competing_request_state/$competing_request_laststate");
- next COMPETING_REQUESTS;
- }
-
- # Check if the other reservation assigned to computer hasn't started yet
- if ($competing_request_start_epoch > $now_epoch) {
- # If they overlap, let the other reservation worry about it
- notify($ERRORS{'OK'}, 0, "request $competing_request_id:$competing_reservation_id start time is in the future: $competing_request_start");
- next COMPETING_REQUESTS;
- }
-
- # Check if the other reservation is a 'reload' reservation for the same image revision
- if ($competing_imagerevision_id eq $imagerevision_id && $competing_request_state eq 'pending' && $competing_request_laststate =~ /(reload)/) {
- notify($ERRORS{'OK'}, 0, "reservation $competing_reservation_id is currently loading $computer_short_name with the correct image: $image_name, waiting for the other reload process to complete");
-
- my $message = "reload reservation $competing_request_id:$competing_reservation_id is still loading $computer_short_name with $image_name";
- my $total_wait_seconds = (60 * $image_reloadtime);
- my $attempt_delay_seconds = 30;
-
- # Loop until other process is done
- if ($self->code_loop_timeout(sub{return !reservation_being_processed(@_)}, [$competing_reservation_id], $message, $total_wait_seconds, $attempt_delay_seconds)) {
- notify($ERRORS{'DEBUG'}, 0, "reload reservation $competing_reservation_id finished loading $computer_short_name with $image_name");
-
- # Call this subroutine again in order to retrieve a current list of competing reservations
- # The list of competing reservations may have changed while waiting
- notify($ERRORS{'OK'}, 0, "calling this subroutine again to retrieve the current list of competing reservations assigned to $computer_short_name");
- return $self->computer_not_being_used();
- }
- else {
- notify($ERRORS{'WARNING'}, 0, "reload reservation $competing_reservation_id has NOT finished loading $computer_short_name with $image_name, waited $total_wait_seconds seconds");
- }
+ # Check if there is another request using this machine
+ # Get a hash containing all of the reservations for the computer
+ my $competing_request_info = get_request_by_computerid($computer_id);
+
+ # There should be at least 1 request -- the one being processed
+ if (!$competing_request_info) {
+ notify($ERRORS{'WARNING'}, 0, "failed to retrieve any requests for computer id=$computer_id, there should be at least 1");
+ return;
}
- # Check if the other reservation assigned to computer end time has been reached
- # -or-
- # Reload reservation -- either for a different image or the previous check loop monitoring the reload process for the same image timed out
- #
- if ($competing_request_end_epoch <= $now_epoch ||
- ($competing_request_state =~ /(timeout|deleted|reload)/) ||
- ($competing_request_state eq 'pending' && $competing_request_laststate =~ /(timeout|deleted|reload)/)) {
-
- # Update the competing request state to complete
- # If this fails, check if the competing request has already been deleted
- # Do this before checking if the reservation is being processed to prevent new processes from being created
- if (update_request_state($competing_request_id, "complete", ($competing_request_state eq 'pending') ? $competing_request_laststate : $competing_request_state)) {
- notify($ERRORS{'OK'}, 0, "request state set to 'complete' for competing reservation $competing_reservation_id");
- }
- elsif (is_request_deleted($competing_request_id)) {
- notify($ERRORS{'OK'}, 0, "request state not set to 'complete' for competing reservation $competing_reservation_id because request has been deleted");
+ # Remove the request currently being processed from the hash
+ delete $competing_request_info->{$request_id};
+
+ if (!keys(%$competing_request_info)) {
+ notify($ERRORS{'OK'}, 0, "$computer_short_name is not assigned to any other reservations");
+ return 1;
+ }
+
+ # Loop through the competing requests
+ COMPETING_REQUESTS: for my $competing_request_id (sort keys %$competing_request_info) {
+ my $competing_reservation_id = $competing_request_info->{$competing_request_id}{data}->get_reservation_id();
+ my $competing_request_state = $competing_request_info->{$competing_request_id}{data}->get_request_state_name();
+ my $competing_request_laststate = $competing_request_info->{$competing_request_id}{data}->get_request_laststate_name();
+ my $competing_imagerevision_id = $competing_request_info->{$competing_request_id}{data}->get_imagerevision_id();
+ my $competing_request_start = $competing_request_info->{$competing_request_id}{data}->get_request_start_time();
+ my $competing_request_end = $competing_request_info->{$competing_request_id}{data}->get_request_end_time();
+
+ my $competing_request_start_epoch = convert_to_epoch_seconds($competing_request_start);
+ my $competing_request_end_epoch = convert_to_epoch_seconds($competing_request_end);
+
+ my $now_epoch = time;
+
+ my $competing_request_info_string;
+ $competing_request_info_string .= "request:reservation ID: $competing_request_id:$competing_reservation_id\n";
+ $competing_request_info_string .= "request state: $competing_request_state/$competing_request_laststate\n";
+ $competing_request_info_string .= "request start time: $competing_request_start\n";
+ $competing_request_info_string .= "request end time: $competing_request_end";
+
+ notify($ERRORS{'DEBUG'}, 0, "checking reservation assigned to $computer_short_name:\n$competing_request_info_string");
+
+ # Check for existing image creation requests
+ if ($competing_request_state =~ /^(image)$/ || $competing_request_laststate =~ /^(image)$/) {
+ notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, it is assigned to an existing imaging reservation:\n$competing_request_info_string");
+ return 0;
}
- else {
- notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, failed to set request state to 'complete', competing request has NOT been deleted:\n$competing_request_info_string");
+
+ # Check for any requests in the maintenance state
+ if ($competing_request_state =~ /^(maintenance)$/) {
+ notify($ERRORS{'WARNING'}, 0, "$computer_short_name is NOT available, it is assigned to an existing request in the '$competing_request_state' state:\n$competing_request_info_string");
return 0;
}
- # Check if the other reservation is still being processed
- if (reservation_being_processed($competing_reservation_id)) {
- notify($ERRORS{'OK'}, 0, "reservation $competing_reservation_id is currently being processed, making sure the process doesn't have any Semaphore objects open before attempting to kill it");
+ # Ignore 'complete', 'failed' requests
+ if ($competing_request_state =~ /^(complete|failed)$/) {
+ notify($ERRORS{'DEBUG'}, 0, "ignoring request in state: $competing_request_state/$competing_request_laststate");
+ next COMPETING_REQUESTS;
+ }
+
+ # Check if the other reservation assigned to computer hasn't started yet
+ if ($competing_request_start_epoch > $now_epoch) {
+ # If they overlap, let the other reservation worry about it
+ notify($ERRORS{'OK'}, 0, "request $competing_request_id:$competing_reservation_id start time is in the future: $competing_request_start");
+ next COMPETING_REQUESTS;
+ }
+
+ # Check if the other reservation is a 'reload' reservation for the same image revision
+ if ($competing_imagerevision_id eq $imagerevision_id && $competing_request_state =~ /^(pending|reload)$/ && $competing_request_laststate =~ /(reload)/) {
+ notify($ERRORS{'OK'}, 0, "reservation $competing_reservation_id is assigned to $computer_short_name with the same image revision: $image_name, waiting for the other reload process to complete");
- # Create a Semaphore object and check if the competing process owns any of its own Semaphore objects
- # This would indicate it's doing something such as retrieving an image
- # Don't kill it or a partial image may be copied
- my $semaphore = VCL::Module::Semaphore->new();
- if ($semaphore->get_reservation_semaphore_ids($competing_reservation_id)) {
- notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, reservation $competing_reservation_id is still being processed and owns a Semaphore object, not killing the competing process, it may be transferring an image:\n$competing_request_info_string");
- return 0;
- }
+ my $message = "waiting for reload reservation $competing_request_id:$competing_reservation_id to finish loading $computer_short_name with $image_name";
- # Kill competing process and update request state to complete
- notify($ERRORS{'OK'}, 0, "attempting to kill process of competing reservation $competing_reservation_id assigned to $computer_short_name");
- if (kill_reservation_process($competing_reservation_id)) {
- notify($ERRORS{'OK'}, 0, "killed process for competing reservation $competing_reservation_id");
- }
+ # Wait at least 5 minutes
+ $image_reloadtime = 5 if $image_reloadtime < 10;
+ my $total_wait_seconds = (60 * $image_reloadtime);
+ my $attempt_delay_seconds = 10;
- # Wait for competing process to end before verifying that it was successfully killed
- sleep 2;
+ # Loop until other process is done
+ if ($self->code_loop_timeout(sub{return !reservation_being_processed(@_)}, [$competing_reservation_id], $message, $total_wait_seconds, $attempt_delay_seconds)) {
+ notify($ERRORS{'DEBUG'}, 0, "reload reservation $competing_reservation_id is not loading $computer_short_name with $image_name");
+ # Verified competing 'reload' is not being processed verify it is not stuck in pending/reload
+ my ($current_competing_request_state, $current_competing_request_laststate) = get_request_current_state_name($competing_request_id);
+ if ($current_competing_request_state eq 'pending' && $current_competing_request_laststate eq 'reload') {
+ notify($ERRORS{'OK'}, 0, "state of competing reload request $competing_request_id:$competing_reservation_id is $current_competing_request_state/$current_competing_request_laststate, verified it is not being processed, changing state of competing request $competing_request_id to 'complete'");
+ update_request_state($competing_request_id, 'complete', 'reload');
+ }
+
+ # Try again in order to retrieve a current list of competing reservations
+ # The list of competing reservations may have changed while waiting
+ notify($ERRORS{'OK'}, 0, "making another attempt to retrieve the current list of competing reservations assigned to $computer_short_name");
+ next ATTEMPT;
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reload reservation $competing_reservation_id has NOT finished loading $computer_short_name with $image_name, waited $total_wait_seconds seconds");
+ }
+ }
+
+ # Check if the other reservation assigned to computer end time has been reached
+ # -or-
+ # Reload reservation -- either for a different image or the previous check loop monitoring the reload process for the same image timed out
+ if ($competing_request_end_epoch <= $now_epoch ||
+ ($competing_request_state =~ /(timeout|deleted|reload)/) ||
+ ($competing_request_state eq 'pending' && $competing_request_laststate =~ /(timeout|deleted|reload)/)) {
- # Verify that the competing reservation process was killed
- if (reservation_being_processed($competing_reservation_id)) {
- notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, failed to kill process for competing reservation, competing reservation is still being processed:\n$competing_request_info_string");
+ # Update the competing request state to complete
+ # If this fails, check if the competing request has already been deleted
+ # Do this before checking if the reservation is being processed to prevent new processes from being created
+ if (update_request_state($competing_request_id, "complete", ($competing_request_state eq 'pending') ? $competing_request_laststate : $competing_request_state)) {
+ notify($ERRORS{'OK'}, 0, "request state set to 'complete' for competing reservation $competing_reservation_id");
+ }
+ elsif (is_request_deleted($competing_request_id)) {
+ notify($ERRORS{'OK'}, 0, "request state not set to 'complete' for competing reservation $competing_reservation_id because request has been deleted");
+ }
+ else {
+ notify($ERRORS{'CRITICAL'}, 0, "computer $computer_short_name is NOT available, failed to set request state to 'complete', competing request has NOT been deleted:\n$competing_request_info_string");
return 0;
}
+
+ # Check if the other reservation is still being processed
+ if (my @competing_reservation_pids = reservation_being_processed($competing_reservation_id)) {
+ notify($ERRORS{'OK'}, 0, "reservation $competing_reservation_id is currently being processed by PID(s): " . join(', ', @competing_reservation_pids) . ", making sure the process doesn't have any Semaphore objects open before attempting to kill it");
+
+ # Create a Semaphore object and check if the competing process owns any of its own Semaphore objects
+ # This would indicate it's doing something such as retrieving an image
+ # Don't kill it or a partial image may be copied
+ my $semaphore = VCL::Module::Semaphore->new();
+ for my $competing_reservation_pid (@competing_reservation_pids) {
+ if ($semaphore->get_process_semaphore_ids($competing_reservation_pid)) {
+ notify($ERRORS{'CRITICAL'}, 0, "computer $computer_short_name is NOT available, reservation $competing_reservation_id is still being processed and owns a Semaphore object, not killing the competing process, it may be transferring an image:\n$competing_request_info_string");
+ return;
+ }
+ }
+
+ # Kill competing process and update request state to complete
+ notify($ERRORS{'OK'}, 0, "attempting to kill process of competing reservation $competing_reservation_id assigned to $computer_short_name");
+ if (kill_reservation_process($competing_reservation_id)) {
+ notify($ERRORS{'OK'}, 0, "killed process for competing reservation $competing_reservation_id");
+ }
+
+ # Wait for competing process to end before verifying that it was successfully killed
+ sleep 2;
+
+ # Verify that the competing reservation process was killed
+ if (reservation_being_processed($competing_reservation_id)) {
+ notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, failed to kill process for competing reservation, competing reservation is still being processed:\n$competing_request_info_string");
+ return 0;
+ }
+ }
+
+ # Try again in order to retrieve a current list of competing reservations
+ # The list of competing reservations may have changed
+ # A new reload reservation may have been added by timeout/deleted processes
+ notify($ERRORS{'OK'}, 0, "making another attempt to retrieve the current list of competing reservations assigned to $computer_short_name");
+ next ATTEMPT;
+ }
+ elsif (reservation_being_processed($competing_reservation_id)) {
+ notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, assigned overlapping reservations, competing reservation is currently being processed:\n$competing_request_info_string");
+ return 0;
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, assigned overlapping reservations, competing reservation is NOT currently being processed:\n$competing_request_info_string");
+ return 0;
}
-
- # Call this subroutine again in order to retrieve a current list of competing reservations
- # The list of competing reservations may have changed
- # A new reload reservation may have been added by timeout/deleted processes
- notify($ERRORS{'OK'}, 0, "calling this subroutine again to retrieve the current list of competing reservations assigned to $computer_short_name");
- return $self->computer_not_being_used();
- }
- elsif (reservation_being_processed($competing_reservation_id)) {
- notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, assigned overlapping reservations, competing reservation is currently being processed:\n$competing_request_info_string");
- return 0;
- }
- else {
- notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name is NOT available, assigned overlapping reservations, competing reservation is NOT currently being processed:\n$competing_request_info_string");
- return 0;
}
+
+ # Checked all competing requests and didn't find any conflicting reservations
+ notify($ERRORS{'OK'}, 0, "$computer_short_name is available, did not find any conflicting reservations");
+ return 1;
}
-
- # Checked all competing requests and didn't find any conflicting reservations
- notify($ERRORS{'OK'}, 0, "$computer_short_name is available, did not find any conflicting reservations");
- return 1;
}
#/////////////////////////////////////////////////////////////////////////////
Modified: vcl/trunk/managementnode/lib/VCL/utils.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/utils.pm?rev=1574996&r1=1574995&r2=1574996&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/utils.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/utils.pm Thu Mar 6 18:50:32 2014
@@ -1290,16 +1290,17 @@ AND request.stateid = currentstate.id
AND request.laststateid = currentlaststate.id
AND state.name = '$state_name'
AND laststate.name = '$laststate_name'
+AND currentstate.name != 'maintenance'
EOF
if (!$force) {
if ($state_name eq 'pending') {
+ # Avoid: deleted/inuse --> pending/inuse
$update_statement .= "AND laststate.name = currentstate.name\n";
}
elsif ($state_name !~ /(complete|failed|maintenance)/) {
# New state is not pending
- # Need to avoid:
- # pending/image --> inuse/inuse
+ # Avoid: pending/image --> inuse/inuse
$update_statement .= "AND currentstate.name = 'pending'\n";
$update_statement .= "AND currentlaststate.name = '$laststate_name'\n";
}
@@ -6541,7 +6542,6 @@ sub get_user_info {
notify($ERRORS{'DEBUG'}, 0, "retrieving current user info for '$user_identifier' from database, cached data is stale: $data_age_seconds seconds old");
}
}
- notify($ERRORS{'DEBUG'}, 0, "retrieving user info: $user_identifier");
# If affiliation identifier argument wasn't supplied, set it to % wildcard
$affiliation_identifier = '%' if !$affiliation_identifier;
@@ -6653,7 +6653,7 @@ EOF
if (!$user_info->{uid}) {
$user_info->{uid} = ($user_info->{id} + 500);
$user_info->{STANDALONE} = 1;
- notify($ERRORS{'DEBUG'}, 0, "UID value is not configured for user $user_login_id, setting UID to VCL user ID: $user_login_id, standalone: 1");
+ notify($ERRORS{'DEBUG'}, 0, "UID value is not configured for user '$user_login_id', setting UID: $user_info->{uid}, standalone: 1");
}
# Fix the unityid if the user's UID is >= 1,000,000
@@ -7327,7 +7327,7 @@ sub format_data {
}
# If a string was passed which appears to be XML, convert it to a hash using XML::Simple
- if (scalar(@data) == 1 && !ref($data[0]) && $data[0] =~ /^</) {
+ if (scalar(@data) == 1 && defined($data[0]) && !ref($data[0]) && $data[0] =~ /^</) {
my $xml_hashref = xml_string_to_hash($data[0]);
return format_data($xml_hashref);
}
@@ -7638,7 +7638,17 @@ sub switch_vmhost_id {
Description : Retrieves the computerloadlog entries for all reservations
belonging to the request. A hash is constructed with keys set to
the reservation IDs. The data of each key is a reference to an
- array containing the computerloadstate names.
+ array containing the computerloadstate names. Example:
+ {
+ 3115 => [
+ "begin",
+ ],
+ 3116 => [
+ "begin",
+ "nodeready"
+ ]
+ }
+
=cut
@@ -7695,10 +7705,9 @@ EOF
=head2 reservation_being_processed
Parameters : reservation ID
- Returns : true if reservation is avtively being processed, false otherwise
+ Returns : array or boolean
Description : Checks the computerloadlog table for rows matching the
- reservation ID and loadstate = begin. Returns true if any
- matching rows exist, false otherwise.
+ reservation ID and loadstate = begin.
=cut
@@ -7732,11 +7741,11 @@ sub reservation_being_processed {
# Check if at least 1 row was returned
my $computerloadlog_exists;
if (scalar @computerloadlog_rows == 1) {
- notify($ERRORS{'DEBUG'}, 0, "computerloadlog 'begin' entry exists for reservation");
+ notify($ERRORS{'DEBUG'}, 0, "computerloadlog 'begin' entry exists for reservation $reservation_id");
$computerloadlog_exists = 1;
}
elsif (scalar @computerloadlog_rows > 1) {
- notify($ERRORS{'WARNING'}, 0, "multiple computerloadlog 'begin' entries exist for reservation");
+ notify($ERRORS{'WARNING'}, 0, "multiple computerloadlog 'begin' entries exist for reservation $reservation_id");
$computerloadlog_exists = 1;
}
else {
@@ -7749,21 +7758,18 @@ sub reservation_being_processed {
# Check the results and return
if ($computerloadlog_exists && @processes_running) {
- notify($ERRORS{'DEBUG'}, 0, "reservation is currently being processed, computerloadlog 'begin' entry exists and running process was found: @processes_running");
- return 1;
+ notify($ERRORS{'DEBUG'}, 0, "reservation $reservation_id is currently being processed, computerloadlog 'begin' entry exists and running process was found: @processes_running");
}
elsif (!$computerloadlog_exists && @processes_running) {
- notify($ERRORS{'WARNING'}, 0, "computerloadlog 'begin' entry does NOT exist but running process was found: @processes_running, assuming reservation is currently being processed");
- return 1;
+ notify($ERRORS{'WARNING'}, 0, "computerloadlog 'begin' entry does NOT exist but running process was found: @processes_running, assuming reservation $reservation_id is currently being processed");
}
elsif ($computerloadlog_exists && !@processes_running) {
- notify($ERRORS{'WARNING'}, 0, "computerloadlog 'begin' entry exists but running process was NOT found, assuming reservation is NOT currently being processed");
- return 0;
+ notify($ERRORS{'WARNING'}, 0, "computerloadlog 'begin' entry exists but running process was NOT found, assuming reservation $reservation_id is NOT currently being processed");
}
else {
- notify($ERRORS{'DEBUG'}, 0, "reservation is NOT currently being processed");
- return 0;
+ notify($ERRORS{'DEBUG'}, 0, "reservation $reservation_id is NOT currently being processed");
}
+ return wantarray ? @processes_running : scalar(@processes_running);
}
#/////////////////////////////////////////////////////////////////////////////
@@ -8928,8 +8934,14 @@ sub setup_get_hash_choice {
if ($display_key1) {
$display_name = $hash_ref->{$key}{$display_key1};
}
+
if ($display_key2) {
- $display_name .= " (" . $hash_ref->{$key}{$display_key2} . ")";
+ if ($display_key2 =~ /^([^-]+)-([^-]+)$/) {
+ $display_name .= " (" . $hash_ref->{$key}{$1}{$2} . ")";
+ }
+ else {
+ $display_name .= " (" . $hash_ref->{$key}{$display_key2} . ")";
+ }
}
if (!$display_name) {