From c5cc50f6ab598aeafca827f8f2fcc05f4755ee0e Mon Sep 17 00:00:00 2001 From: Ovidiu Mircescu Date: Thu, 4 Dec 2014 18:00:08 +0100 Subject: [PATCH] Add more documentation and fix on testing yacs_file jobs --- idl/SALOME_Launcher.idl | 129 ++++++++++++++++++++--------- idl/SALOME_ResourcesManager.idl | 32 ++++++- src/Launcher/Test/test_launcher.py | 28 ++++++- src/UnitTests/prepare_test.py | 1 - 4 files changed, 146 insertions(+), 44 deletions(-) diff --git a/idl/SALOME_Launcher.idl b/idl/SALOME_Launcher.idl index b251d1226..c3ad69051 100644 --- a/idl/SALOME_Launcher.idl +++ b/idl/SALOME_Launcher.idl @@ -26,7 +26,7 @@ #include "SALOME_Exception.idl" #include "SALOME_ResourcesManager.idl" -/*! \file SALOME_Launcher.idl \brief interfaces for %SALOME Launcher service +/*! \file SALOME_Launcher.idl \brief Interfaces for %SALOME Launcher service */ module Engines @@ -49,34 +49,37 @@ struct JobParameters //! Name of the job. string job_name; - /*! \brief Type of the job. - There are three supported types: - - "command" : execute #job_file script without SALOME environment - - "python_salome" : execute #job_file python script by SALOME + //! Type of the job. + /*! There are three supported types: + - "command" : execute #job_file script without %SALOME environment + - "python_salome" : execute #job_file python script by %SALOME - "yacs_file" : execute #job_file by YACS module as a xml YACS schema */ string job_type; // Common values - /*! \brief Local path to the file to be executed by the job. - The type of the file depends on #job_type. + //! Local path to the file to be executed by the job. + /*! The type of the file depends on #job_type. + If #job_type is "command", the #job_file must be a single filename + specifying a self-consistent script to be executed without any argument, + on the remote host. */ string job_file; - /*! \brief Local path to a script to be sourced in the environment of the job. - It may contain modifications of environment variables. + //! Local path to a script to be sourced in the environment of the job. + /*! It may contain modifications of environment variables. */ string env_file; - /*! List of local data files to be copied to #work_directory. - #job_file and #env_file are automaticaly copied, without adding them - to this list. The actual path of the files could be prefixed by - #local_directory. + //! List of local data files to be copied to #work_directory. + /*! #job_file and #env_file are automaticaly copied, without adding them + to this list. If basenames are specified, then the files are supposed + to be located in #local_directory. */ FilesList in_files; - /*! List of results to get back at the end of the job. - These results can be names of files or directories, produced by the job + //! List of results to get back at the end of the job. + /*! These results can be names of files or directories, produced by the job in #work_directory. Directories will be copied recursively. It is also possible to use an absolute path instead of the simple name, (string beginning with '/') and this absolute path will be used instead @@ -86,29 +89,41 @@ struct JobParameters FilesList out_files; //! Remote directory where the job will be executed. + /*! It must be used to specify the remote directory where to put all + the stuff to run the job. Note that the job will be executed from within + this directory. A change directory toward this working directory is done + by the batch system before running the job. + */ string work_directory; //! Prefix to be applied to #in_files. + /*! It can be used to specify where to find the local input files. + It's optionnal if you specify the absolute path name of input files. + */ string local_directory; - /*! Local directory where to get result files. + //! Local directory where to get result files. + /*! It must be used to specify where to download the output files on the + local file system. \see SalomeLauncher::getJobResults */ string result_directory; - /*! Time for the batch (has to be like this : hh:mm) - Could be empty, in - this case, default value of the selected resource will be used. + //! Maximum time for the batch execution (expected format : "hh:mm"). + /*! Could be empty, in this case, default value of the selected resource + will be used. */ string maximum_duration; - /*! Memory is expressed in megabytes -> mem_mb. - Number of Processors -> nb_proc. + //! Specifies the rules to choose the ressource where to execute the job. + /*! The additionnal two following parameters MUST be specified explicitly, + because they are not provided by the resource definition: + - mem_mb -> Memory expressed in megabytes. + - nb_proc -> Number of Processors. */ ResourceParameters resource_required; - /*! - Name of the batch queue chosen - optional - */ + //! Name of the batch queue chosen - optional string queue; //! Specifies if the job must run in exclusive mode (without sharing nodes with other jobs) @@ -123,8 +138,14 @@ struct JobParameters //! String that is added to the job submission file - optional string extra_params; - /*! - Specific parameters for each type of job - optional + //! Specific parameters for each type of job - optional + /*! This is a list of parameters (key - value pairs of strings) useful in + some specific situations. + Known parameters: + - EnableDumpYACS : value of the "dump" option of the "driver" command + when the job type is "yacs_file". It gives the number of seconds + between two updates of the state dump file. There will be no dump file + if this parameter is missing or if its value is less than 1. */ Engines::ParameterList specific_parameters; @@ -146,38 +167,45 @@ interface SalomeLauncherObserver void notify(in string event_name, in string event_data); }; -/*! \brief Interface of the %salomelauncher - This interface is used for interaction with the unique instance - of SalomeLauncher +//! Interface of the %salome launcher. +/*! This interface is used for interaction with the unique instance + of SalomeLauncher. + The utilisation of this interface is explained in the YACS documentation, + article "Starting a SALOME application in a batch manager". + Other examples of use can be found in the modules JOBMANAGER, PARAMETRIC + and SMESH (PADDER tool). */ interface SalomeLauncher { // Main methods - /*! Create a job and set its parameters, without launching it. - Its state becomes "CREATED". + //! Create a job and set its parameters, without launching it. + /*! Its state becomes "CREATED". \return job id */ long createJob (in Engines::JobParameters job_parameters) raises (SALOME::SALOME_Exception); - /*! Launch an already created job (job's state should be "CREATED"). - Its state changes to "QUEUED". + //! Launch an already created job (job's state should be "CREATED"). + /*! Launching the job consists of: + - create the working directory on the remote file system + - copy the input files into the working directory + - source the environment file if defined + - run the job */ void launchJob (in long job_id) raises (SALOME::SALOME_Exception); - /*! - \return "CREATED", "IN_PROCESS", "QUEUED", "RUNNING", "PAUSED", + //! Get the execution state of the job. + /*! \return "CREATED", "IN_PROCESS", "QUEUED", "RUNNING", "PAUSED", "FINISHED" or "FAILED" - \see LIBBATCH/src/core/Constants.hxx + \see LIBBATCH/src/core/Constants.hxx */ string getJobState (in long job_id) raises (SALOME::SALOME_Exception); //! Get names or ids of hosts assigned to the job string getAssignedHostnames (in long job_id) raises (SALOME::SALOME_Exception); - /*! - Copy the result files from the work directory of the job - to a local directory. - The list of result files is given by the "out_files" parameter. + //! Copy the result files from the work directory of the job + //! to a local directory. + /*! The list of result files is given by the JobParameters::out_files parameter. If a result "file" is a directory, the copy is recursive. The "logs" directory contains the standard and the error outputs of the job. \param job_id Job id returned by createJob(). @@ -189,8 +217,26 @@ interface SalomeLauncher \see createJob */ void getJobResults(in long job_id, in string directory) raises (SALOME::SALOME_Exception); + + //! Try to copy the files named "dumpState*.xml" from the working directory. + /*! The file "dumpState_name.xml" can be produced by the execution of a YACS + schema and it contains the execution state of the schema. + You can activate the creation of this file by adding the parameter + "EnableDumpYACS" in JobParameters::specific_parameters when the job + is created. + \param job_id Job id returned by createJob(). + \param directory Local directory where to copy the file. + If this value is an empty string (""), the file will be + copied to the directory given by + JobParameters::result_directory. + \return 1 if the copy succeeds. + \see JobParameters::specific_parameters + */ boolean getJobDumpState(in long job_id, in string directory) raises (SALOME::SALOME_Exception); + + //! Kill the job and set its state to "FAILED" void stopJob (in long job_id) raises (SALOME::SALOME_Exception); + //! Kill the job and remove it from the jobs list void removeJob (in long job_id) raises (SALOME::SALOME_Exception); // Useful methods @@ -198,17 +244,22 @@ interface SalomeLauncher boolean testBatch (in ResourceParameters params) raises (SALOME::SALOME_Exception); // SALOME kernel service methods + //! Shutdow SalomeLauncher server. void Shutdown(); + //! Get the PID of the current process long getPID(); // Observer and introspection methods + //! Add an observer to be notified of the jobs list modifications void addObserver(in Engines::SalomeLauncherObserver observer); void removeObserver(in Engines::SalomeLauncherObserver observer); Engines::JobsList getJobsList(); Engines::JobParameters getJobParameters(in long job_id) raises (SALOME::SALOME_Exception); // Save and load methods + //! Add to the current jobs list the jobs previously saved in an xml file. void loadJobs(in string jobs_file) raises (SALOME::SALOME_Exception); + //! Save the current list of jobs in an xml file. void saveJobs(in string jobs_file) raises (SALOME::SALOME_Exception); }; diff --git a/idl/SALOME_ResourcesManager.idl b/idl/SALOME_ResourcesManager.idl index 8938ba4cb..8ab9e82b4 100644 --- a/idl/SALOME_ResourcesManager.idl +++ b/idl/SALOME_ResourcesManager.idl @@ -40,7 +40,12 @@ typedef sequence CompoList; struct ResourceParameters { //! resource name - manual selection + /*! If a name is provided, the ressource will be imposed. + If the name is an empty string, the ressource will be chosen to match + the other parameters. + */ string name; + //! host name string hostname; //! if true select only resources that can launch batch jobs @@ -54,13 +59,36 @@ struct ResourceParameters CompoList componentList; // Permits to order resources - //! required number of proc + //! required number of processors + /*! This parameter must be specified explicitly, because it is not provided + by the resource definition. + */ long nb_proc; + //! required memory size + /*! This parameter must be specified explicitly, because it is not provided + by the resource definition. + + The parameter specifies the maximum memory value that could be allocated + for executing the job. This takes into account not only the data that + could be loaded by the batch process but also the linked dynamic library. + A possible problem, for exemple in the case where you use the ssh + emulation of a batch system, is to get an error message as below + when libBatch tries to run the ssh command: +\verbatim +/usr/bin/ssh: error while loading shared libraries: libcrypto.so.0.9.8: failed +to map segment from shared object: Cannot allocate memory +\endverbatim + In this exemple, the mem_mb was set to 1MB, value that is not + sufficient to load the dynamic libraries linked to the ssh + executable (libcrypto.so in the error message). + So, even in the case of a simple test shell script, you should + set this value at least to a standard threshold as 500MB. + */ long mem_mb; //! required frequency long cpu_clock; - //! required number of node + //! required number of nodes long nb_node; //! required number of proc per node long nb_proc_per_node; diff --git a/src/Launcher/Test/test_launcher.py b/src/Launcher/Test/test_launcher.py index 2196ae6a3..a9c4f126c 100755 --- a/src/Launcher/Test/test_launcher.py +++ b/src/Launcher/Test/test_launcher.py @@ -8,6 +8,7 @@ import time # Test of SalomeLauncher. # This test should be run in the salome environment, using "salome shell" +# and salome application should be running. # If YACS_ROOT_DIR is not set, the test of submitting a YACS schema will be # skiped. class TestCompo(unittest.TestCase): @@ -268,6 +269,11 @@ f.close() #job_params.in_files = [case_test_dir] job_params.out_files = ["result.txt"] job_params.result_directory = local_result_dir + + # define the interval between two YACS schema dumps (3 seconds) + import Engines + job_params.specific_parameters = [Engines.Parameter("EnableDumpYACS", "3")] + job_params.resource_required = salome.ResourceParameters() job_params.resource_required.nb_proc = 1 @@ -280,15 +286,33 @@ f.close() import time jobState = launcher.getJobState(job_id) + yacs_dump_success = False print "Job %d state: %s" % (job_id,jobState) while jobState != "FINISHED" and jobState != "FAILED" : time.sleep(5) jobState = launcher.getJobState(job_id) - print "Job %d state: %s" % (job_id,jobState) + yacs_dump_success = launcher.getJobDumpState(job_id, local_result_dir) + print "Job %d state: %s - dump: %s" % (job_id,jobState, yacs_dump_success) pass self.assertEqual(jobState, "FINISHED") + # Verify dumpState file is in the results + self.assertTrue(yacs_dump_success) + dump_file_path = os.path.join(local_result_dir, "dumpState_mySchema.xml") + self.assertTrue(os.path.isfile(dump_file_path)) + + # Load the schema state from the dump file and verify the state of a node + import SALOMERuntime + SALOMERuntime.RuntimeSALOME_setRuntime(1) + import loader + schema = loader.YACSLoader().load(job_script_file) + stateParser = loader.stateParser() + sl = loader.stateLoader(stateParser, schema) + sl.parse(dump_file_path) + # 106 : "DONE" state code + self.assertEqual(106, schema.getChildByName("PyScript0").getEffectiveState()) + # getJobResults to default directory (result_directory) launcher.getJobResults(job_id, "") try: @@ -297,7 +321,7 @@ f.close() f.close() self.assertEqual(text, "expected") except IOError,ex: - self.fail("IO exception:" + str(ex)); + self.fail("IO exception:" + str(ex)) if __name__ == '__main__': # creat study diff --git a/src/UnitTests/prepare_test.py b/src/UnitTests/prepare_test.py index 59bc0dca9..71a296af1 100755 --- a/src/UnitTests/prepare_test.py +++ b/src/UnitTests/prepare_test.py @@ -49,7 +49,6 @@ class TestEnvironment: # some tests need YACS module. yacs_path = os.getenv("YACS_ROOT_DIR", "") if os.path.isdir(yacs_path): - cls.hasYacs = True config_appli_text += ''' ' -- 2.39.2