#!/usr/bin/perl # mpiBLAST run script by Joseph Landman # copyright 2003 Scalable Informatics LLC # http://scalableinformatics.com # # This code is dual licensed. # See http://scalableinformatics.com/license/dual.html # for specific details. # # Usage: # qsub -pe mpich 8 run_mpiblast [options for sge_mpiblast] \ # [-- options for blastall | # --blastopt="options for blastall"] # run_mpiblast [options for sge_mpiblast] \ # [-- options for blastall | # --blastopt="options for blastall"] # # # v1.1 4-Aug-2003: # Bug fixes, and some additional functionality. # --debug now turns on mpiBLAST debug (as it should). This # will result in significant amounts of output. # --help ... # # DOCUMENTATION! First pass # v1.0 1-June-2003: # Merging the basic DRM-BLAST code with the sge_mpiblast script # stripped the DRM objects out the the DRM-BLAST, and used the # shell of this code to handle running the ### handle running under Sun Gridware Engine #$ -S /usr/bin/perl use strict; # Do the right thing... use Getopt::Long; # process options use POSIX; # POSIX functions use Sys::Hostname; # portable hostname use Data::Dumper; # for debugging use IO::File; # OO file access use File::Copy; # OO/nonOO file copy use Benchmark; # basic timing # constants use constant true => (1==1); # makes some programming more use constant false => (1==0); # obvious. Syntactic sugar # but sometimes it helps. As in below... # variables my ($verbose,$debug,$temp,$local_blast_work_dir,$delta_t); my ($generate_conf_files,$useconf,$full_temp_path,$rc); my ($user,$home_dir,$uid,$gid,@pwent,$date,%perm_vector); my ($time,$line,$elt,$writable,@list,@list2,$config_file,%config); my ($mpiblast,$mpirun,$shared_db_dir,$local_db_dir,$run_command); my ($file,$test,%option_list,$x,$y,$cp,$ncpu,$t_start,$t_finish); my (%Kid_Status,%incoming,%outgoing,$destination,@stage_in,@stage_out); my ($machinefile,$ncbirc,$data,$blast_options,$pid,$run_fh,$dry_run); my ($start_dir,$input_source,$mpirun_options,$help); # initial conditions $debug=$verbose=false; # set both to false $generate_conf_files = true; # generate config files # .ncbirc and .mpiblast # by default # change shells (needed, DO NOT CHANGE this, even if you cannot stand # tcsh, and have participated in comp.unix.shells.advocacy ... # Not trying to change anyones mind about shells, just doing # the right thing for the code. $ENV{'SHELL'}='/bin/tcsh'; # grab directory where we are running from initially $start_dir = $ENV{'PWD'}; $input_source = $start_dir; # default condition, files come from here $destination = $start_dir; # default condition, files go to here # quick reaper subroutine: handle dead child processes sub grim_reaper { my $child; while (($child = waitpid(-1,WNOHANG)) > 0) { $Kid_Status{$child} = $?; } $SIG{CHLD} = \&grim_reaper; } $SIG{CHLD} = \&grim_reaper; # process options, get user information, grab current state/time/date/etc. # get user name, uid, gid, and home directory $user=getlogin; @pwent=getpwnam( $user ); $uid=$pwent[2]; $gid=$pwent[3]; $home_dir=$pwent[7]; # get the current date $date = POSIX::strftime "%F",localtime; # get the current time $time = POSIX::strftime "%T",localtime; $file = IO::File->new; # format 'option[=parameter_type]' => reference to variable %option_list = ( # debugging switch 'debug' => \$debug, # verbosity switch 'verbose' => \$verbose, # top level directory of where we are # going to run from, 'temp=s' => \$temp, # use a pre-existing config file 'useconf=s' => \$useconf, # path to mpirun (mandatory) 'mpirun=s' => \$mpirun, # path to mpiblast (mandatory) 'mpiblast=s' => \$mpiblast, # path to shared cluster storage for # databases 'shared=s' => \$shared_db_dir, # path to compute node local storage # to store databases 'local=s' => \$local_db_dir, # number of cpus to use 'ncpu=s' => \$ncpu, # path to an mpich machinefile 'machinefile=s'=> \$machinefile, # path to an existing ncbirc file to use # so we do not make our own 'ncbirc=s' => \$ncbirc, # directory where the scoring matrices sit 'data=s' => \$data, # options for blastall if you dont want to # use -- at the end of the command line 'blastopt=s' => \$blast_options, 'dryrun' => \$dry_run, # where to copy output files upon completion 'destination=s'=> \$destination, # where to get input files from (defaults to $start_dir) 'source=s' => \$input_source, 'help' => \$help ); $rc=GetOptions ( %option_list ); if ($help) { system("perldoc $0"); exit 0; } # loop over 3 possible config file locations, and pull in %config options # the next set of options can override the preceding set. # # Your 3 possible config files are (in processing order): # # /etc/sge_mpiBLAST.conf, ~/.sge_mpiblastrc, .sge_mpiblastrc # # Config file format: real simple, so as to make parsing trivial # # # comment 1 # parameter = value # comment 2 # ... # foreach $config_file ( "/etc/sge_mpiBLAST.conf", join("/",$home_dir,".sge_mpiblastrc"), ".sge_mpiblastrc" ) { if (-e $config_file) # if config file exists, then read/parse it { if ($file->open("<".$config_file)) # open file { foreach $line ($file->getlines) # read the entire file { # pull out the main text from the comments my ($main,$rest)=split(/\#/,$line,2); # processing this if there is no main text # with a key=value pair next if ($main eq ""); # search the "key=value" pair for # patterns of the regex below $main=~/(\S+)\s*=\s*(\S+)/; # save in the config hash $config{$1}=$2; } } $file->close; # close this file, and flush the # file handle and buffers } } $verbose=true if ($debug); # turn on verbosity if debugging # mode is on # fix the option list (get rid of the "name=type" and replace with # "name" foreach $elt (keys %option_list) # loop over the command { # line options ($x,$y)=split(/\=/,$elt,2); # find the "=" if specified next if ($x eq ""); # skip if we have nothing # to the left of the equal sign if ($x ne $elt) # compare the kernel of the option to { # the option itself, if not equal... # then insert the new, and delete the old $option_list{$x}=$option_list{$elt}; delete $option_list{$elt}; } } # set the values of the option's from the config files if # the config file options match against the known options # in the command line foreach $elt (keys %config) # loop over the config options { foreach (keys %option_list) # loop over the command line options { if ( (lc($elt) eq $_) && (!defined(${$option_list{$_}})) ) # compare the two, and see if { # the option is defined as one we # can set ${$option_list{$elt}} = $config{$elt}; # set it } } } # start processing print "RUN-mpiBLAST v1.0 copyright 2003 by Scalable Informatics LLC\n"; print "email: landman\@scalableinformatics.com\twww: http://scalableinformatics.com\n" if ($verbose); printf "started on %s at %s by %s\n\n",$date,$time,$user if ($verbose);; # output options print STDERR "Config file options:\n" if ($debug); foreach $elt (keys %config) { printf STDERR "\t%s\t=\t%s\n",$elt,$config{$elt} if ($debug); } print STDERR "\nCommand line options:\n" if ($debug); foreach $elt (keys %option_list) { printf STDERR "\t%s\t=\t%s\n",$elt,${$option_list{$elt}} if ($debug); } # compute node temporary directory path if (!defined($temp)) # no --temp=/path was used { print STDERR "Warning: no --temp=/path was set \n" if ($debug); # look in the environment for a # TEMP, or TMP variable (the TMPDIR is reserved in # the mpich environment under SGE) if (exists($ENV{'TEMP'})) { $temp = $ENV{'TEMP'}; printf STDERR "\ttemp was set to \'%s\' from the TEMP environment variable\n",$temp if ($debug); } elsif (exists($ENV{'TMP'})) { $temp= $ENV{'TMP'}; printf STDERR "\ttemp was set to \'%s\' from the TMP environment variable\n",$temp if ($debug); } else { $temp = "/tmp"; # not an optimal choice, but # ok for the moment printf STDERR "\tcaution: temp defaulted to \'%s\' \n",$temp if ($debug); } } else { printf STDERR "\ttemp set to \'%s\'\n",$temp if ($verbose); } # now that we have the path, we have to make a temporary directory # in the temp_dir as a container for the data from the run. $full_temp_path=$temp; # we would like to create this. We need to see if we are allowed to. # if we cannot, we need to abort with an error message foreach $elt ($user,$date,$$) { my $path_builder=join("/",$full_temp_path,$elt); printf STDERR "testing path = \'%s\' permissions\n",$full_temp_path if($debug); &does_this_file_or_path_exist($full_temp_path,"fail"); &is_this_path_writable($full_temp_path,"fail"); # ok, the directory exists and is writable, so # first lets check to see of the path we want to build exists, # and if not, lets mkdir it... (use the internal mkdir, # and give perm vector of 0750 (via a umask of 0027) # # set umask, and create dir if needed umask 0027; # owner can do anything, group can read and execute # other users can't see anything. if (!(-e $path_builder)) { if (mkdir $path_builder) { printf STDERR "created path = \'%s\'\n",$path_builder if ($verbose); } else { printf STDERR "DANGER: unable to create path = \'%s\'\n",$path_builder if ($verbose); } } else { printf STDERR "path = \'%s\' already exists\n",$path_builder if ($verbose); } $full_temp_path=$path_builder; } ##### # # Ok, now we have the temp directory. Echo this to user # and start building the dot files in there. # ##### if (!(chdir $full_temp_path)) { printf STDERR "FATAL ERROR: cannot change directory to path = \'%s\'\nrc = %s\n",$full_temp_path,$!; die "terminating execution (sorry about this)...\n"; } # setenv MPIRUN /opt/mpich/ethernet/gcc/bin/mpirun # setenv MPIBLAST /opt/apps/mpiblast/bin/mpiblast # look to see whether or not we need # stop because we cannot find mpiblast binary, or the mpirun binary if ((!defined($mpirun)) || !((-e $mpirun) && (-x $mpirun))) { print STDERR "FATAL ERROR: cannot find the mpirun binary. Please add\n"; print STDERR "either the --mpirun=/path/to/binary command line option,\n"; print STDERR "or add the line mpirun=/path/to/binary to the .sge_mpiblastrc file\n"; die "terminating execution (sorry about this)...\n"; } if ((!defined($mpiblast))|| !((-e $mpiblast) && (-x $mpiblast)) ) { print STDERR "FATAL ERROR: cannot find the mpiblast binary. Please add\n"; print STDERR "either the --mpiblast=/path/to/binary command line option,\n"; print STDERR "or add the line mpiblast=/path/to/binary to the .sge_mpiblastrc file\n"; die "terminating execution (sorry about this)...\n"; } if (!defined($useconf)) # if we are not using an existing { # config file from somewhere ... # look at the local and shared paths, check for writability as well if (!defined($local_db_dir)) { print STDERR "FATAL ERROR: cannot find the local directory. Please add\n"; print STDERR "either the --local=/path/to/storage command line option,\n"; print STDERR "or add the line local=/path/to/storage to the .sge_mpiblastrc file\n"; die "terminating execution (sorry about this)...\n"; } &does_this_file_or_path_exist($local_db_dir,"fail"); &is_this_path_writable($local_db_dir,"fail"); if (!defined($shared_db_dir)) { print STDERR "FATAL ERROR: cannot find the shared directory. Please add\n"; print STDERR "either the --shared=/path/to/storage command line option,\n"; print STDERR "or add the line shared=/path/to/storage to the .sge_mpiblastrc file\n"; die "terminating execution (sorry about this)...\n"; } &does_this_file_or_path_exist($shared_db_dir,"fail"); &is_this_path_writable($shared_db_dir,"fail"); &make_mpiblast_dot_rc; } else { &is_this_file_readable($useconf,"fail"); if (copy($useconf,'.mpiblastrc')) { printf STDERR "Copying \'%s\' to \.mpiblastrc\n",$useconf if ($debug); } else { printf STDERR "FATAL ERROR: cannot copy \'%s\' to \.mpiblastrc!\n",$useconf; die "terminating execution (sorry about this)...\n"; } } if (!defined($ncbirc)) # if we are not using an existing ncbi { # config file from somewhere so make one if (defined($data)) { &make_dot_ncbirc; &is_this_path_readable($data,"fail"); } else { printf STDERR "FATAL ERROR: no --data=/path/to/scoring/matrices\n or data=/path/to/scoring/matrices in a config file has been provided\n"; die "terminating execution (sorry about this)...\n"; } } else { &is_this_file_readable($ncbirc,"fail"); if (copy($ncbirc,'.ncbirc')) { printf STDERR "Copying \'%s\' to \.ncbirc\n",$ncbirc if ($debug); } else { printf STDERR "FATAL ERROR: cannot copy \'%s\' to \.ncbirc!\n",$ncbirc; die "terminating execution (sorry about this)...\n"; } } #### # look for number of cpus on command line, and if not, look # at the environment # if (!defined($ncpu)) { if (exists($ENV{'NSLOTS'})) { $ncpu=$ENV{'NSLOTS'}; } elsif (exists($ENV{'NCPU'})) { $ncpu=$ENV{'NCPU'}; } else { printf STDERR "FATAL ERROR: you must specify the number of cpus.\n"; printf STDERR "Use --np=number or set the environment variable NCPU to number\n"; die "terminating execution (sorry about this)...\n"; } } if (($ncpu < 1) || ($ncpu > 1000)) { printf STDERR "FATAL ERROR: this number of cpus (%i) is not valid.\n",$ncpu; printf STDERR "Please use ncpus in the range of 1 to 1000\n"; die "terminating execution (sorry about this)...\n"; } printf STDERR "NCPU=%i\n",$ncpu; if (defined($machinefile)) { &is_this_file_readable($machinefile,"fail"); if (copy($machinefile,'machines')) { printf STDERR "Copying \'%s\' to machines\n",$machinefile if ($debug); } else { printf STDERR "FATAL ERROR: cannot copy \'%s\' to machines!\n",$machinefile; die "terminating execution (sorry about this)...\n"; } } #### ### # ready to build the execution string # $run_command = $mpirun; $run_command .= $mpirun_options if (defined($mpirun_options)); $run_command .= $ENV{'MPI_OPTIONS'} if (exists($ENV{'MPI_OPTIONS'})); # append a "-v" if we are being verbose $run_command .= " -v " if ($verbose); # append the number of CPUs $run_command .= sprintf " -np %i ",$ncpu; # append machines file if specified, or append the # machine file from the SGE or other DRM environment $run_command .= sprintf " -machinefile %s ",$machinefile if ($machinefile); $run_command .= sprintf " -machinefile %s ",join("/",$ENV{'TMPDIR'},"machines") if (exists($ENV{'SGE_ARCH'})); # append the mpiblast binary $run_command .= $mpiblast; $run_command .= " --debug " if ($debug); # append blast options either by joining up the unprocessed # version of @ARGV, or by using the $blast_options contents if (defined($blast_options)) { $run_command .= " " . $blast_options; } else { $blast_options=join(" ",@ARGV); $run_command .= " " .$blast_options; } # now scan the blast_options, and make sure we copy the inputs # over to the current directory, and queue up the copy of output # back to the launch directory or the destination if the # launch directory was not specified %incoming=( '-R' => true, '-i' => true ); %outgoing=( '-O' => true, '-o' => true ); foreach my $arg (keys %incoming) { if ($blast_options =~ /$arg\s+(\S+)/) { push @stage_in,$1; printf STDERR "file =\'%s\' will be staged in\n",$1; } } foreach my $arg (keys %outgoing) { if ($blast_options =~ /$arg\s+(\S+)/) { push @stage_out,$1;printf STDERR "file =\'%s\' will be staged out\n",$1; } } ### stage files in foreach my $input (@stage_in) { my $source=join("/",$input_source,$input); # make a real unix style path ... &stage_file_in($source,$full_temp_path); # do it } ### believe it or not, we are ready to run ### start the clock..., but first, the fork please... # we are forking as we wish to be able to kill off # the child processes when the parent dies, # or signify an error from the child back to the parent defined($pid = fork) or die "cannot fork! $!\n"; # # Ok, now we have a parent process with a non-zero PID (process ID) # of the child, and a child process with a zero PID, everything else # is shared. We want to sleep and loop in the parent if ($pid) { # parent process: sleep for a bit (2 seconds), wake up and # send an "are you there" 'kill' to the child $t_start=new Benchmark; while (1) { sleep 1; last if (!(kill 0, $pid)); } $t_finish=new Benchmark; } else { #child process printf STDERR "child executing: %s\n",$run_command if ($verbose); if (!($dry_run)) { if (!(open($run_fh, "$run_command |"))) { printf STDERR "FATAL ERROR: unable to run %s \n",$run_command.' |'; die "terminating execution (sorry about that)...\n"; } while($line=<$run_fh>) { printf STDERR "exec:\t%s",$line; } } else { printf STDERR "WARNING: dryrun in effect, no execution will occur!\n"; foreach my $arg (@stage_out) { open(my $fh,"> ".$arg); print $fh "DRY RUN STAGE OUT FILE: nothing of consequence should be in here.\n"; close ($fh); } sleep 3; exit; } } $delta_t=timestr(timediff($t_finish,$t_start)); printf STDERR "execution required %-.3f second(s)\n",$delta_t; ### file stage out foreach my $output (@stage_out) { &stage_file_out($output,$destination); # do it } # ok, run is complete, data is back to the user, now we need to clean # up after ourselves, that is, unless debug was turned on, in # which case we let someone else clean up after us. # 1st find the parent directory to currect directory my @_xpath=split("/",$full_temp_path); my $del_dir = pop @_xpath; chdir join("/",@_xpath); printf STDERR "moving up to directory = \'%s\'\n",join("/",@_xpath) if ($verbose); printf STDERR "deletion target is directory \'%s\'\n",$del_dir if ($verbose); if (!($debug)) { $rc=`/bin/rm -rf $del_dir`; # the directory has left } # the filesystem ... exit; # $MPIRUN -v -np $NSLOTS \ # -machinefile $TMPDIR/machines \ # $MPIBLAST \ # --config-file=$MPIBLAST_CONFIG \ # $* >>& $OUT # echo -n " run finished at " >> $OUT # date >> $OUT sub get_permissions { my $path=shift; my %return_permissions= ( '-r' => (-r $path), '-R' => (-R _ ), '-e' => (-e _ ), '-w' => (-w _ ), '-W' => (-W _ ), '-x' => (-x _ ), '-X' => (-X _ ), '-o' => (-o _ ), '-O' => (-O _ ), '-d' => (-d _ ) ); return \%return_permissions; } sub is_this_path_writable { my $test_file=shift; # path or file to test my $action=shift; # action = (fail|continue) my %test_perms=%{&get_permissions($test_file)}; my $test_writable=true; # assume the best case ... map { push @list,$test_perms{$_} } qw(-R -W -X -d -e); map { $test_writable &&= $_ || false } @list; # AND together # the permissions # to make the decision printf STDERR "path or file \'%s\' is writable\n",$test_file if ($debug && $test_writable); # handle the case of not being writable if ((!$test_writable) && (lc($action) =~ /fail/)) { printf STDERR "FATAL ERROR: path or file = \'%s\' is not writable by this user!\n",$test_file; print STDERR "I am unable to continue, as I cannot write my files.\n"; printf STDERR "hostname = \'%s\'\npath = \'%s\'\n",hostname,$test_file; printf STDERR "date = \'%s\'\ntime = \'%s\'\n",$date,$time; die "terminating execution (sorry about that)...\n"; } } sub is_this_file_readable { my $test_file=shift; # path or file to test my $action=shift; # action = (fail|continue) my %test_perms=%{&get_permissions($test_file)}; my $test_readable=($test_perms{'-r'} && $test_perms{'-e'}); # handle the case of not being writable if ((!$test_readable) && (lc($action) =~ /fail/)) { printf STDERR "FATAL ERROR: path or file = \'%s\' is not readable by this user!\n",$test_file; print STDERR "I am unable to continue, as I cannot read my files.\n"; printf STDERR "hostname = \'%s\'\npath = \'%s\'\n",hostname,$test_file; printf STDERR "date = \'%s\'\ntime = \'%s\'\n",$date,$time; die "terminating execution (sorry about that)...\n"; } } sub is_this_path_readable { my $test_file=shift; # path or file to test my $action=shift; # action = (fail|continue) my %test_perms=%{&get_permissions($test_file)}; my $test_readable=($test_perms{'-r'} && $test_perms{'-e'}); # handle the case of not being writable if ((!$test_readable) && (lc($action) =~ /fail/)) { printf STDERR "FATAL ERROR: path = \'%s\' is not readable by this user (or it does not exist)!\n",$test_file; print STDERR "I am unable to continue, as I cannot read from this path.\n"; printf STDERR "hostname = \'%s\'\npath = \'%s\'\n",hostname,$test_file; printf STDERR "date = \'%s\'\ntime = \'%s\'\n",$date,$time; die "terminating execution (sorry about that)...\n"; } } sub does_this_file_or_path_exist { my $test_file=shift; # path or file to test my $action=shift; # action = (fail|continue) my %test_perms=%{&get_permissions($test_file)}; #printf STDERR "Dump=%s\n",Dumper(\%test_perms); if ((!$test_perms{'-e'}) && (lc($action) =~ /fail/)) { printf STDERR "FATAL ERROR: path or file = \'%s\' does not exist!\n",$test_file; printf STDERR "I am unable to continue, as I do not know where to place my files.\n"; printf STDERR "hostname = \'%s\'\npath = \'%s\'\n",hostname,$test_file; printf STDERR "date = \'%s\'\ntime = \'%s\'\n",$date,$time; die "terminating execution (sorry about that)...\n"; } } sub make_mpiblast_dot_rc { my $rc_file=IO::File->new; if ($rc_file->open('> .mpiblastrc')) { printf $rc_file "%s\n",$shared_db_dir; printf $rc_file "%s\n",$local_db_dir; $rc_file->close; } else { printf STDERR "FATAL ERROR: Unable to write the \.mpiblastrc file\n"; die "terminating execution (sorry about this)...\n"; } return true; } sub make_dot_ncbirc { my $rc_file=IO::File->new; if ($rc_file->open('> .ncbirc')) { printf $rc_file '[NCBI]%s',"\n"; printf $rc_file "Data=%s\n",$data; printf $rc_file '[BLAST]%s',"\n"; printf $rc_file "BLASTDB=%s\n",$local_db_dir; $rc_file->close; } else { printf STDERR "FATAL ERROR: Unable to write the \.mpiblastrc file\n"; die "terminating execution (sorry about this)...\n"; } return true; } sub stage_file_in { # file staging is fine for very simple systems such # as this. More complex systems require better # technology. Right now for these purposes # it is adequate. my ($file_in,$to)=@_; printf STDERR "staging in file=\'%s\' to \'%s\'\n",$file_in,$to; # basic sanity check: can we read $file? Does it exist? # we assume that the destination $to has been vetted previously &is_this_file_readable($file_in,"fail"); if (!(copy($file_in,$to))) { printf STDERR "FATAL ERROR: staging file \'%s\' to directory \'%s\' failed!\n",$file_in,$to; die "terminating execution (sorry about this)...\n"; } else { printf STDERR "\tstaged file \'%s\' to directory \'%s\'\n",$file_in,$to if ($debug); } } sub stage_file_out { # file staging is fine for very simple systems such # as this. More complex systems require better # technology. Right now for these purposes # it is adequate. my ($file,$to)=@_; # basic sanity check: can we read $file? Does it exist? # check the file destination. Would be terrible if we cannot # write there &is_this_file_readable($file,"fail"); &is_this_path_writable($to,"fail"); if (!(copy($file,$to))) { printf STDERR "FATAL ERROR: returning file \'%s\' to directory \'%s\' failed!\n",$file,$to; die "terminating execution (sorry about this)...\n"; } else { printf STDERR "\treturned file \'%s\' to directory \'%s\'\n",$file,$to if ($debug); } } __END__ =head1 NAME run_mpiblast - a method to make running mpiBLAST simpler and more robust =head1 SYNOPSIS B B =head1 DESCRIPTION B will make running mpiBLAST on a cluster in the presence of other users and queuing systems somewhat more robust. It will allow you to catch errors in configuration well in advance of the actual execution, and allow you to submit the run to a queuing system =head1 OPTIONS =over 4 =item B<--debug> turn on verbosity and debugging output. Default is off. Note: turning debug on will prevent the program from erasing the temporary directory. This means that you will be able to examine all of the files by noting the temporary directory path. =item B<--verbose> turn on verbosity. Default is off =item B<--temp=>I Top level directory where a temporary subdirectory may be placed. Default is I. After execution finishes, this temporary directory will be destroyed. If you are attempting to resolve problems, it is best to use the B<--debug> option. =item B<--useconf=>I The path to the I or I<.mpiblastrc> file that you will use for this run. No default, this is a mandatory argument. =item B<--mpirun=>I The path to the I. No default, this is a mandatory argument. =item B<--mpiblast=>I The path to the I. No default, this is a mandatory argument. =item B<--local=>I The path (on each compute node) to where mpiBLAST will copy the databases. No default, this is a mandatory argument if it is not given in the I or I<.mpiblastrc> files. Note: this path B be writable by the user submitting the run, on all nodes where this job will run. Failure to do so will result in difficult to track errors, and force you to use the B<--debug> option. =item B<--ncpu=>I This is the number of CPUs to request that mpiBLAST use. It is mandatory if you are not using a queuing environment that can create an MPI run out of a requested number of CPUs. Under the I, using the I<-pe mpich N> sets an environment variable named B which this program will detect if B<--ncpu> were not set. Under other Distributed Resource Manager (DRM) systems, there may be other similar mechanisms. There is no default value. =item B<--machinefile=>I This is the machine file that I should use. Under the I, using the I<-pe mpich N> sets an environment variable named B which this program will detect if B<--machinefile> were not set. Under other Distributed Resource Manager (DRM) systems, there may be other similar mechanisms. The default value is whatever is built into the particular MPI implementation you are using. =item B<--ncbirc=>I This is the location of the I<.ncbirc> file that I should use. There is no default. If you use the B<--data> argument, you can build this file during the run. Note: one known failure mode of BLAST and mpiBLAST is finishing very quickly, returning no hits. This usually happens when it cannot find the scoring matrices via the I keyword in the B<.ncbirc> file. It is advisable that you build a default. Note 2: The location of this path should be the same on all compute nodes. As these files are read infrequently, are small, and not written to, a read-only mount or a file push to a I should be adequate. =item B<--data=>I This is the path to the substitution matrices file that I should use. See the notes above. You must use this argument if you do not have a pre-existing I<.ncbirc> file that you have pointed to with the B<--ncbirc=/path/to/.ncbirc> argument. =item B<--blastopt=>I<"standard NCBI BLAST options"> These are the standard NCBI BLAST options. The NCBI BLAST defaults are in effect for this argument. Note: There is no facility to stream sequence data as input on I. This This is a limitation of B, and any DRM or queuing system. This represents a feature in NCBI BLAST that is not available in mpiBLAST, and by extension, not available in run_mpiblast. =item B<--dryrun> Do all the checks, but do not do the run itself. This option will turn on debugging. =item B<--destination=>I<"/path/to/store/output/files/"> This is the path where you wish the output files to be staged to from the temporary directory. The default value will be the path from which the I was run. =item B<--source=>I<"/path/to/get/input/files/"> This is the path where you wish the input files to be staged from and copied to the temporary directory. The default value will be the path from which the I was run. =item B<--help> This file. =head1 FILES =over 4 /usr/local/bin/run_mpiblast or /path/to/run_mpiblast =back =head1 ENVIRONMENT VARIABLES =item B<$MPI_OPTIONS> Options to be passed to mpirun in addition to what the run_mpiblast program would normally use. You may use this for additional debugging options for MPI. =item B<$TMPDIR> Temporary directory for run. Overridden by B<--temp> option. =head1 DIAGNOSTICS B will emit a message such as I when it finds a condition that requires someone to fix something. If a default value is used where a command line or configuration argument should be used, B will emit a I message if the B<--verbose> or B<--debug> options have been set. Note: significant efforts have been made to making the error messages meaningful. Please read the output from the program with the B<--debug> option specified carefully. Often the problem will be found by reading this. Specific cases: =item B B needs to be run against the database. For some reason, B was not able to detect a I<.dbs> file at the directory indicated as the shared directory, which is the first line of either the B<.mpiblastrc> or B file. =item B As indicated previously, this results from B not being able to find the substitution matrices (ftp://ftp.ncbi.nlm.nih.gov/toolbox/ncbi_tools/data.tar.Z) for the calculation. You need to make sure you have a properly constructed B<.ncbirc> file. A sample file which would place the matrices at I is as follows: [NCBI] Data=/opt/apps/blast/data =head1 EXAMPLES Run with debugging turned on, 8 CPUs, with mpirun in /usr/local/bin, mpiBLAST in /usr/local/mpiblast, blast options of C<-i input.fa -o output -d nt -p blastx -e 0.01> using the /home/landman/machines machine file, ncbirc file from /home/landman, and an mpiblastrc file from /home/landman. ./run_mpiblast --ncpu 8 --mpirun=/usr/local/bin/mpirun \ --mpiblast=/usr/local/mpiblast/bin/mpiblast \ --blastopt="-i input.fa -o output -d nt -p blastx -e 0.01" \ --debug --machinefile=/home/landman/machines \ --ncbirc=/home/landman/.ncbirc \ --useconf=/home/landman/.mpiblastrc =head1 REQUIRES Perl 5.8.0, Getopt::Long, POSIX, working signals, mpich 1.2.4 or greater, working mpiBLAST installation, working NCBI Toolbox source code and binaries =head1 OPTIONAL Working GridEngine or similar DRM system. =head1 SEE ALSO mpiBLAST (http://mpiblast.lanl.gov), mpirun(1) =head1 LICENSE Dual license, similar to MySQL. Details will be posted on http://scalableinformatics.com/license/dual.html in short order. This program is B freely redistributable, please direct all requests for this program to http://scalableinformatics.com. You may not bundle this with commercial programs, or distribute this without obtaining a license from Scalable Informatics. =head1 AUTHOR Joe Landman landman@scalableinformatics.com http://scalableinformatics.com =cut