1 // Copyright (C) 2011 EDF R&D
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Lesser General Public
5 // License as published by the Free Software Foundation; either
6 // version 2.1 of the License.
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
19 // Authors : Guillaume Boulant (EDF) - 01/03/2011
27 #include "MeshJobManager_i.hxx"
29 #include <SALOMEconfig.h>
30 #include CORBA_SERVER_HEADER(SALOME_Exception)
33 #include "Basics_Utils.hxx" // For standard logging
35 #include "SALOME_KernelServices.hxx" // For CORBA logging
41 // ====================================================================
42 // General purpose helper functions (to put elsewhere at least)
43 // ====================================================================
47 * This function must be used to associate a datetime tag to a job
51 static long timetag() {
54 long tag = tv.tv_usec + tv.tv_sec*1000000;
60 * This function returns true if the string text starts with the string
63 static bool myStartsWith(const std::string& text,const std::string& token){
64 if(text.length() < token.length())
66 return (text.compare(0, token.length(), token) == 0);
70 // ====================================================================
71 // Constructor/Destructor
72 // ====================================================================
74 MeshJobManager_i::MeshJobManager_i(CORBA::ORB_ptr orb,
75 PortableServer::POA_ptr poa,
76 PortableServer::ObjectId * contId,
77 const char *instanceName,
78 const char *interfaceName)
79 : Engines_Component_i(orb, poa, contId, instanceName, interfaceName)
81 LOG("Activating MESHJOB::MeshJobManager object");
83 _id = _poa->activate_object(_thisObj);
85 _salomeLauncher = KERNEL::getSalomeLauncher();
86 if(CORBA::is_nil(_salomeLauncher)){
87 LOG("The SALOME launcher can't be reached ==> STOP");
88 throw KERNEL::createSalomeException("SALOME launcher can't be reached");
91 _resourcesManager = KERNEL::getResourcesManager();
92 if(CORBA::is_nil(_resourcesManager)){
93 LOG("The SALOME resource manager can't be reached ==> STOP");
94 throw KERNEL::createSalomeException("The SALOME resource manager can't be reached");
98 MeshJobManager_i::~MeshJobManager_i() {
99 LOG("MeshJobManager_i::~MeshJobManager_i()");
103 // ====================================================================
104 // Helper functions to deals with the local and remote file systems
105 // ====================================================================
107 #include <fstream> // to get the file streams
109 #include <stdlib.h> // to get _splitpath
110 #include <direct.h> // to get _mkdir
112 #include <unistd.h> // to get basename
113 #include <sys/stat.h> // to get mkdir
114 #include <sys/types.h> // to get mkdir options
117 #include <stdlib.h> // to get system and getenv
119 static std::string OUTPUTFILE("output.med");
120 static std::string DATAFILE("data.txt");
121 static std::string SCRIPTFILE("padder.sh");
122 static std::string SEPARATOR(" ");
124 static std::string USER(getenv("USER"));
125 static std::string LOCAL_INPUTDIR("/tmp/spadder.local.inputdir."+USER);
126 static std::string LOCAL_RESULTDIR("/tmp/spadder.local.resultdir."+USER);
127 static std::string REMOTE_WORKDIR("/tmp/spadder.remote.workdir."+USER);
130 * This function creates the padder text input file containing the
131 * input data (list of filenames and groupnames) and returns the path
132 * of the created file. This function is the one that knows the format
133 * of the padder input file. If the input file format changes, then
134 * this function (and only this one) should be updated.
136 const char * MeshJobManager_i::_writeDataFile(std::vector<MESHJOB::MeshJobParameter> listConcreteMesh,
137 std::vector<MESHJOB::MeshJobParameter> listSteelBarMesh) {
139 _mkdir(LOCAL_INPUTDIR.c_str());
141 mkdir(LOCAL_INPUTDIR.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
144 // Make it static so that it's allocated once (constant name)
145 static std::string * dataFilename = new std::string(LOCAL_INPUTDIR+"/"+DATAFILE);
146 std::ofstream dataFile(dataFilename->c_str());
148 // We first specify the concrete mesh data (filename and groupname)
151 char fname[ _MAX_FNAME ];
152 _splitpath( listConcreteMesh[0].file_name, NULL, NULL, fname, NULL );
153 char* bname = &fname[0];
155 char* bname = basename(listConcreteMesh[0].file_name);
157 line = std::string(bname) + " " + std::string(listConcreteMesh[0].group_name);
158 dataFile << line.c_str() << std::endl;
159 // Note that we use here the basename because the files are supposed
160 // to be copied in the REMOTE_WORKDIR for execution.
162 // The, we can specify the steelbar mesh data, starting by the
164 int nbSteelBarMesh=listSteelBarMesh.size();
165 line = std::string("nbSteelbarMesh") + SEPARATOR + ToString(nbSteelBarMesh);
166 dataFile << line.c_str() << std::endl;
167 for (int i=0; i<nbSteelBarMesh; i++) {
169 char fname[ _MAX_FNAME ];
170 _splitpath( listSteelBarMesh[i].file_name, NULL, NULL, fname, NULL );
171 char* bname = &fname[0];
173 char* bname = basename(listSteelBarMesh[i].file_name);
175 line = std::string(bname) + " " + std::string(listSteelBarMesh[i].group_name);
176 dataFile << line.c_str() << std::endl;
179 // Finally, we conclude with the name of the output file
181 dataFile << line.c_str() << std::endl;
183 return dataFilename->c_str();
187 * This function creates a shell script that runs padder whith the
188 * specified data file, and returns the path of the created script
189 * file. The config id is used to retrieve the path to the binary file
190 * and other required files.
192 const char* MeshJobManager_i::_writeScriptFile(const char * dataFileName, const char * configId) {
194 _mkdir(LOCAL_INPUTDIR.c_str());
196 mkdir(LOCAL_INPUTDIR.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
199 // Make it static so that it's allocated once (constant name)
200 static std::string * scriptFilename = new std::string(LOCAL_INPUTDIR+"/"+SCRIPTFILE);
202 char * binpath = _configMap[configId].binpath;
203 char * envpath = _configMap[configId].envpath;
206 char fname[ _MAX_FNAME ];
207 _splitpath( dataFileName, NULL, NULL, fname, NULL );
208 const char* bname = &fname[0];
210 const char* bname = basename(dataFileName);
214 std::ofstream script(scriptFilename->c_str());
215 script << "#!/bin/sh" << std::endl;
216 script << "here=$(dirname $0)" << std::endl;
217 script << ". " << envpath << std::endl;
218 script << binpath << " $here/" << bname << std::endl;
219 // Note that we use the basename of the datafile because all data
220 // files are supposed to have been copied in the REMOTE_WORKDIR.
222 return scriptFilename->c_str();
226 // ====================================================================
227 // Functions to initialize and supervise the mesh computation job
228 // ====================================================================
230 bool MeshJobManager_i::configure(const char *configId,
231 const MESHJOB::ConfigParameter & configParameter)
233 beginService("MeshJobManager_i::configure");
235 _configMap[configId] = configParameter;
237 LOG("Adding configuration for " << configId);
238 LOG("- binpath = " << _configMap[configId].binpath);
239 LOG("- envpath = " << _configMap[configId].envpath);
241 endService("MeshJobManager_i::configure");
245 long MeshJobManager_i::JOBID_UNDEFINED = -1;
247 /*! Initialize a smesh computation job and return the job identifier */
248 CORBA::Long MeshJobManager_i::initialize(const MESHJOB::MeshJobParameterList & meshJobParameterList,
249 const char * configId)
251 beginService("MeshJobManager_i::initialize");
254 // We first analyse the CORBA sequence to store data in C++ vectors
256 std::vector<MESHJOB::MeshJobParameter> listConcreteMesh;
257 std::vector<MESHJOB::MeshJobParameter> listSteelBarMesh;
258 for(CORBA::ULong i=0; i<meshJobParameterList.length(); i++) {
259 MESHJOB::MeshJobParameter currentMesh = meshJobParameterList[i];
260 switch ( currentMesh.file_type ) {
261 case MESHJOB::MED_CONCRETE:
262 listConcreteMesh.push_back(currentMesh);
264 case MESHJOB::MED_STEELBAR:
265 listSteelBarMesh.push_back(currentMesh);
268 LOG("The type of the file is not recognized");
269 return JOBID_UNDEFINED;
273 if ( listConcreteMesh.size() != 1 ) {
274 // Not consistent with the specification
275 LOG("You specify more than one concrete mesh");
276 return JOBID_UNDEFINED;
279 LOG("Nb. concrete mesh = " << listConcreteMesh.size());
280 LOG("Nb. steelbar mesh = " << listSteelBarMesh.size());
282 // We initiate here a datetime to tag the files and folder
283 // associated to this job.
285 DWORD jobDatetimeTag = timeGetTime();
287 long jobDatetimeTag = timetag();
289 // And a MESHJOB::MeshJobPaths structure to hold the directories
290 // where to find data
291 MESHJOB::MeshJobPaths * jobPaths = new MESHJOB::MeshJobPaths();
292 jobPaths->local_inputdir = LOCAL_INPUTDIR.c_str();
293 jobPaths->local_resultdir = (LOCAL_RESULTDIR + "." + ToString(jobDatetimeTag)).c_str();
294 jobPaths->remote_workdir = (REMOTE_WORKDIR + "." + ToString(jobDatetimeTag)).c_str();
297 // Then, we have to create the padder input data file. This input
298 // data is a text file containing the list of file names and group
301 const char * dataFilename = this->_writeDataFile(listConcreteMesh, listSteelBarMesh);
302 LOG("dataFilename = " << dataFilename);
303 const char * scriptFilename = this->_writeScriptFile(dataFilename, configId);
304 LOG("scriptFilename = " << scriptFilename);
307 // Then, the following instructions consists in preparing the job
308 // parameters to request the SALOME launcher for creating a new
311 Engines::JobParameters_var jobParameters = new Engines::JobParameters;
312 jobParameters->job_type = CORBA::string_dup("command");
313 // CAUTION: the job_file must be a single filename specifying a
314 // self-consistent script to be executed without any argument on the
316 jobParameters->job_file = CORBA::string_dup(scriptFilename);
319 // Specification of the working spaces:
321 // - local_directory: can be used to specify where to find the input
322 // files on the local resource. It's optionnal if you specify the
323 // absolute path name of input files.
325 // - result_directory: must be used to specify where to download the
326 // output files on the local resources
328 // - work_directory: must be used to specify the remote directory
329 // where to put all the stuff to run the job. Note that the job
330 // will be executed from within this directory, i.e. a change
331 // directory toward this working directory is done by the batch
332 // system before running the specified job script.
334 jobParameters->local_directory = CORBA::string_dup("");
335 jobParameters->result_directory = CORBA::string_dup(jobPaths->local_resultdir);
336 jobParameters->work_directory = CORBA::string_dup(jobPaths->remote_workdir);
338 // We specify the input files that are required to execute the
339 // job_file. If basenames are specified, then the files are supposed
340 // to be located in local_directory.
341 int nbFiles = listSteelBarMesh.size()+2;
342 // The number of input file is:
343 // (nb. of steelbar meshfile)
344 // + (1 concrete meshfile)
345 // + (1 padder input file)
346 // = nb steelbar meshfile + 2
347 jobParameters->in_files.length(nbFiles);
348 jobParameters->in_files[0] = CORBA::string_dup(listConcreteMesh[0].file_name);
349 for (int i=0; i<listSteelBarMesh.size(); i++) {
350 jobParameters->in_files[1+i] = CORBA::string_dup(listSteelBarMesh[i].file_name);
352 jobParameters->in_files[1+listSteelBarMesh.size()] = CORBA::string_dup(dataFilename);
353 // Note that all these input files will be copied in the
354 // REMOTE_WORKDIR on the remote host
356 // Then, we have to specify the existance of an output
357 // filenames. The path is supposed to be a path on the remote
358 // resource, i.e. where the job is executed.
359 jobParameters->out_files.length(1);
360 std::string outputfile_name = std::string(jobPaths->remote_workdir)+"/"+OUTPUTFILE;
361 jobParameters->out_files[0] = CORBA::string_dup(outputfile_name.c_str());
363 // CAUTION: the maximum duration has to be set with a format like "hh:mm"
364 jobParameters->maximum_duration = CORBA::string_dup("01:00");
365 jobParameters->queue = CORBA::string_dup("");
367 // Setting resource and additionnal properties (if needed)
368 // The resource parameters can be initiated from scratch, for
369 // example by specifying the values in hard coding:
371 //jobParameters->resource_required.name = CORBA::string_dup("localhost");
372 //jobParameters->resource_required.hostname = CORBA::string_dup("localhost");
373 //jobParameters->resource_required.mem_mb = 1024 * 10;
374 //jobParameters->resource_required.nb_proc = 1;
376 // But it's better to initiate these parameters from a resource
377 // definition known by the resource manager. This ensures that the
378 // resource will be available:
379 //const char * resourceName = "localhost";
380 //const char * resourceName = "boulant@claui2p1";
381 //const char * resourceName = "nepal@nepal";
382 const char * resourceName = _configMap[configId].resname;
383 Engines::ResourceDefinition * resourceDefinition = _resourcesManager->GetResourceDefinition(resourceName);
384 // CAUTION: This resource should have been defined in the
385 // CatalogResource.xml associated to the SALOME application.
387 // Then, the values can be used to initiate the resource parameters
389 jobParameters->resource_required.name = CORBA::string_dup(resourceDefinition->name.in());
390 // CAUTION: the additionnal two following parameters MUST be
391 // specified explicitly, because they are not provided by the
392 // resource definition:
393 jobParameters->resource_required.mem_mb = resourceDefinition->mem_mb;
394 jobParameters->resource_required.nb_proc = resourceDefinition->nb_proc_per_node;
395 // CAUTION: the parameter mem_mb specifies the maximum memory value
396 // that could be allocated for executing the job. This takes into
397 // account not only the data that could be loaded by the batch
398 // process but also the linked dynamic library.
400 // A possible problem, for exemple in the case where you use the ssh
401 // emulation of a batch system, is to get an error message as below
402 // when libBatch try to run the ssh command:
404 // ##Â /usr/bin/ssh: error while loading shared libraries: libcrypto.so.0.9.8: failed
405 // ## to map segment from shared object: Cannot allocate memory
407 // In this exemple, the mem_mb was set to 1MB, value that is not
408 // sufficient to load the dynamic libraries linked to the ssh
409 // executable (libcrypto.so in the error message).
411 // So, even in the case of a simple test shell script, you should
412 // set this value at least to a standard threshold as 500MB
414 int jobId = JOBID_UNDEFINED;
416 jobId = _salomeLauncher->createJob(jobParameters);
417 // We register the datetime tag of this job
418 _jobDateTimeMap[jobId]=jobDatetimeTag;
419 _jobPathsMap[jobId] = jobPaths;
421 catch (const SALOME::SALOME_Exception & ex) {
422 LOG("SALOME Exception in createJob !" <<ex.details.text.in());
423 //LOG(ex.details.text.in());
424 return JOBID_UNDEFINED;
426 catch (const CORBA::SystemException& ex) {
427 LOG("Receive SALOME System Exception: "<<ex);
428 LOG("Check SALOME servers...");
429 return JOBID_UNDEFINED;
432 endService("MeshJobManager_i::initialize");
436 /*! Submit the job execution and return true if submission is OK */
437 bool MeshJobManager_i::start(CORBA::Long jobId) {
438 beginService("MeshJobManager_i::start");
441 _salomeLauncher->launchJob(jobId);
443 catch (const SALOME::SALOME_Exception & ex) {
444 LOG("SALOME Exception in createJob !" <<ex.details.text.in());
445 //LOG(ex.details.text.in());
448 catch (const CORBA::SystemException& ex) {
449 LOG("Receive SALOME System Exception: "<<ex);
450 LOG("Check SALOME servers...");
454 endService("MeshJobManager_i::initialize");
458 /*! Request the launch manager for the state of the specified job */
459 char* MeshJobManager_i::getState(CORBA::Long jobId) {
460 beginService("MeshJobManager_i::getState");
465 state = _salomeLauncher->getJobState(jobId);
467 catch (const SALOME::SALOME_Exception & ex)
469 LOG("SALOME Exception in getJobState !");
470 state = ex.details.text;
472 catch (const CORBA::SystemException& ex)
474 LOG("Receive SALOME System Exception: " << ex);
475 state="SALOME System Exception - see logs";
477 LOG("jobId="<<ToString(jobId)<<" state="<<state);
478 endService("MeshJobManager_i::getState");
479 return CORBA::string_dup(state.c_str());
482 MESHJOB::MeshJobPaths * MeshJobManager_i::getPaths(CORBA::Long jobId) {
484 MESHJOB::MeshJobPaths * jobPaths = _jobPathsMap[jobId];
485 if ( jobPaths == NULL ) {
486 LOG("You request the working paths for an undefined job (jobId="<<ToString(jobId)<<")");
487 return NULL; // Maybe raise an exception?
493 MESHJOB::MeshJobResults * MeshJobManager_i::finalize(CORBA::Long jobId) {
494 beginService("MeshJobManager_i::getResults");
495 MESHJOB::MeshJobResults * result = new MESHJOB::MeshJobResults();
497 MESHJOB::MeshJobPaths * jobPaths = this->getPaths(jobId);
498 std::string local_resultdir(jobPaths->local_resultdir);
499 result->results_dirname = local_resultdir.c_str();
502 _salomeLauncher->getJobResults(jobId, local_resultdir.c_str());
504 // __BUG__: to prevent from a bug of the MED driver (SALOME
505 // 5.1.5), we change the basename of the output file to force the
506 // complete reloading of data by the med driver.
507 long jobDatetimeTag = _jobDateTimeMap[jobId];
508 std::string outputFileName = "output"+ToString(jobDatetimeTag)+".med";
509 rename((local_resultdir+"/"+OUTPUTFILE).c_str(), (local_resultdir+"/"+outputFileName).c_str());
511 result->outputmesh_filename = outputFileName.c_str();
512 result->status = "OK";
514 catch (const SALOME::SALOME_Exception & ex)
516 LOG("SALOME Exception in getResults !");
517 result->status = "SALOME Exception in getResults !";
519 catch (const CORBA::SystemException& ex)
521 LOG("Receive CORBA System Exception: " << ex);
522 result->status = "Receive CORBA System Exception: see log";
524 endService("MeshJobManager_i::getResults");
529 /*! Clean all data associated to this job and remove the job from the launch manager */
530 bool MeshJobManager_i::clean(CORBA::Long jobId) {
531 beginService("MeshJobManager_i::clean");
533 // __GBO__ WORK IN PROGRESS: we just clean the temporary local
534 // directories. The remote working directories are tag with the
535 // execution datetime and the we prevent the task from conflict
536 // with files of another task.
537 MESHJOB::MeshJobPaths * jobPaths = this->getPaths(jobId);
538 if ( jobPaths == NULL ) return false;
541 // For safety reason (and prevent from bug that could erase the
542 // filesystem), we cancel the operation in the case where the
543 // directories to delete are not in the /tmp folder.
544 std::string shell_command("rm -rf ");
545 std::string inputdir(jobPaths->local_inputdir);
546 std::string resultdir(jobPaths->local_resultdir);
547 if ( !myStartsWith(inputdir,"/tmp/") ) {
548 LOG("WRN: The directory "<<inputdir<<" is not in /tmp. NO DELETE is done");
550 shell_command+=inputdir+" ";
552 if ( !myStartsWith(resultdir,"/tmp/")) {
553 LOG("WRN: The directory "<<resultdir<<" is not in /tmp. NO DELETE is done");
555 shell_command+=resultdir;
558 LOG("DBG: clean shell command = "<<shell_command);
560 bool cleanOk = false;
561 int error = system(shell_command.c_str());
562 if (error == 0) cleanOk = true;
564 endService("MeshJobManager_i::clean");
569 std::vector<std::string> * MeshJobManager_i::_getResourceNames() {
572 // These part is just to control the available resources
574 Engines::ResourceParameters params;
575 KERNEL::getLifeCycleCORBA()->preSet(params);
577 Engines::ResourceList * resourceList = _resourcesManager->GetFittingResources(params);
578 Engines::ResourceDefinition * resourceDefinition = NULL;
579 LOG("### resource list:");
580 std::vector<std::string>* resourceNames = new std::vector<std::string>();
582 for (int i = 0; i < resourceList->length(); i++) {
583 const char* aResourceName = (*resourceList)[i];
584 resourceNames->push_back(std::string(aResourceName));
585 LOG("resource["<<i<<"] = "<<aResourceName);
586 resourceDefinition = _resourcesManager->GetResourceDefinition(aResourceName);
587 LOG("protocol["<<i<<"] = "<<resourceDefinition->protocol);
591 // Note: a ResourceDefinition is used to create a batch configuration
592 // in the Launcher. This operation is done at Launcher startup from
593 // the configuration file CatalogResources.xml provided by the
594 // SALOME application.
595 // In the code instructions, you just have to choose a resource
596 // configuration by its name and then define the ResourceParameters
597 // that specify additionnal properties for a specific job submission
598 // (use the attribute resource_required of the JobParameters).
600 return resourceNames;
605 // ==========================================================================
607 // ==========================================================================
611 PortableServer::ObjectId * MeshJobManagerEngine_factory( CORBA::ORB_ptr orb,
612 PortableServer::POA_ptr poa,
613 PortableServer::ObjectId * contId,
614 const char *instanceName,
615 const char *interfaceName)
617 LOG("PortableServer::ObjectId * MeshJobManagerEngine_factory()");
618 MeshJobManager_i * myEngine = new MeshJobManager_i(orb, poa, contId, instanceName, interfaceName);
619 return myEngine->getId() ;