1 // Copyright (C) 2007-2013 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
24 #include <libbatch/BatchManagerCatalog.hxx>
25 #include <libbatch/FactBatchManager.hxx>
26 #include <libbatch/BatchManager.hxx>
29 #include "Basics_Utils.hxx"
30 #include "Basics_DirUtils.hxx"
31 #include "SALOME_Launcher_Handler.hxx"
32 #include "Launcher.hxx"
33 #include "Launcher_Job_Command.hxx"
39 //=============================================================================
43 * Define a CORBA single thread policy for the server, which avoid to deal
44 * with non thread-safe usage like Change_Directory in SALOME naming service
46 //=============================================================================
47 Launcher_cpp::Launcher_cpp()
49 LAUNCHER_MESSAGE("Launcher_cpp constructor");
51 _job_cpt_mutex = new pthread_mutex_t();
52 pthread_mutex_init(_job_cpt_mutex, NULL);
55 //=============================================================================
59 //=============================================================================
60 Launcher_cpp::~Launcher_cpp()
62 LAUNCHER_MESSAGE("Launcher_cpp destructor");
64 std::map<int, Launcher::Job *>::const_iterator it_job;
65 for(it_job = _launcher_job_map.begin(); it_job != _launcher_job_map.end(); it_job++)
66 delete it_job->second;
67 std::map <int, Batch::BatchManager * >::const_iterator it1;
68 for(it1=_batchmap.begin();it1!=_batchmap.end();it1++)
72 pthread_mutex_destroy(_job_cpt_mutex);
73 delete _job_cpt_mutex;
78 //=============================================================================
80 * Add a job into the launcher - check resource and choose one
82 //=============================================================================
84 Launcher_cpp::createJob(Launcher::Job * new_job)
86 LAUNCHER_MESSAGE("Creating a new job");
87 // Add job to the jobs map
88 pthread_mutex_lock(_job_cpt_mutex);
89 new_job->setNumber(_job_cpt);
91 pthread_mutex_unlock(_job_cpt_mutex);
92 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(new_job->getNumber());
93 if (it_job == _launcher_job_map.end())
94 _launcher_job_map[new_job->getNumber()] = new_job;
97 LAUNCHER_INFOS("A job as already the same id: " << new_job->getNumber());
99 throw LauncherException("A job as already the same id - job is not created !");
101 LAUNCHER_MESSAGE("New Job created");
104 //=============================================================================
108 //=============================================================================
110 Launcher_cpp::launchJob(int job_id)
112 LAUNCHER_MESSAGE("Launch a job");
114 // Check if job exist
115 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
116 if (it_job == _launcher_job_map.end())
118 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
119 throw LauncherException("Cannot find the job, is it created ?");
122 Launcher::Job * job = it_job->second;
124 // Check job state (cannot launch a job already launched...)
125 if (job->getState() != "CREATED")
127 LAUNCHER_INFOS("Bad state of the job: " << job->getState());
128 throw LauncherException("Bad state of the job: " + job->getState());
131 // Third step search batch manager for the job into the map -> instanciate one if does not exist
133 std::map<int, Batch::BatchManager *>::const_iterator it = _batchmap.find(job_id);
134 if(it == _batchmap.end())
136 createBatchManagerForJob(job);
141 Batch::JobId batch_manager_job_id = _batchmap[job_id]->submitJob(*(job->getBatchJob()));
142 job->setBatchManagerJobId(batch_manager_job_id);
143 job->setState("QUEUED");
145 catch(const Batch::GenericException &ex)
147 LAUNCHER_INFOS("Job is not launched, exception in submitJob: " << ex.message);
148 throw LauncherException(ex.message.c_str());
150 LAUNCHER_MESSAGE("Job launched");
153 //=============================================================================
157 //=============================================================================
159 Launcher_cpp::getJobState(int job_id)
161 LAUNCHER_MESSAGE("Get job state");
163 // Check if job exist
164 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
165 if (it_job == _launcher_job_map.end())
167 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
168 throw LauncherException("Cannot find the job, is it created ?");
171 Launcher::Job * job = it_job->second;
176 state = job->updateJobState();
178 catch(const Batch::GenericException &ex)
180 LAUNCHER_INFOS("getJobState failed, exception: " << ex.message);
181 throw LauncherException(ex.message.c_str());
184 return state.c_str();
187 //=============================================================================
189 * Get job assigned hostnames
191 //=============================================================================
193 Launcher_cpp::getAssignedHostnames(int job_id)
195 LAUNCHER_MESSAGE("Get job assigned hostnames");
197 // Check if job exist
198 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
199 if (it_job == _launcher_job_map.end())
201 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
202 throw LauncherException("Cannot find the job, is it created ?");
205 Launcher::Job * job = it_job->second;
206 std::string assigned_hostnames = job->getAssignedHostnames();
208 return assigned_hostnames.c_str();
211 //=============================================================================
213 * Get Job result - the result directory could be changed
215 //=============================================================================
217 Launcher_cpp::getJobResults(int job_id, std::string directory)
219 LAUNCHER_MESSAGE("Get Job results");
221 // Check if job exist
222 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
223 if (it_job == _launcher_job_map.end())
225 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
226 throw LauncherException("Cannot find the job, is it created ?");
229 Launcher::Job * job = it_job->second;
230 std::string resource_name = job->getResourceDefinition().Name;
234 _batchmap[job_id]->importOutputFiles(*(job->getBatchJob()), directory);
236 _batchmap[job_id]->importOutputFiles(*(job->getBatchJob()), job->getResultDirectory());
238 catch(const Batch::GenericException &ex)
240 LAUNCHER_INFOS("getJobResult is maybe incomplete, exception: " << ex.message);
241 throw LauncherException(ex.message.c_str());
243 LAUNCHER_MESSAGE("getJobResult ended");
246 //=============================================================================
248 * Get Job dump state - the result directory could be changed
250 //=============================================================================
252 Launcher_cpp::getJobDumpState(int job_id, std::string directory)
255 LAUNCHER_MESSAGE("Get Job dump state");
257 // Check if job exist
258 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
259 if (it_job == _launcher_job_map.end())
261 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
262 throw LauncherException("Cannot find the job, is it created ?");
265 Launcher::Job * job = it_job->second;
266 std::string resource_name = job->getResourceDefinition().Name;
270 rtn = _batchmap[job_id]->importDumpStateFile(*(job->getBatchJob()), directory);
272 rtn = _batchmap[job_id]->importDumpStateFile(*(job->getBatchJob()), job->getResultDirectory());
274 catch(const Batch::GenericException &ex)
276 LAUNCHER_INFOS("getJobResult is maybe incomplete, exception: " << ex.message);
277 throw LauncherException(ex.message.c_str());
279 LAUNCHER_MESSAGE("getJobResult ended");
283 //=============================================================================
285 * Remove the job - into the Launcher and its batch manager
287 //=============================================================================
289 Launcher_cpp::removeJob(int job_id)
291 LAUNCHER_MESSAGE("Remove Job");
293 // Check if job exist
294 std::map<int, Launcher::Job *>::iterator it_job = _launcher_job_map.find(job_id);
295 if (it_job == _launcher_job_map.end())
297 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
298 throw LauncherException("Cannot find the job, is it created ?");
301 it_job->second->removeJob();
302 delete it_job->second;
303 _launcher_job_map.erase(it_job);
306 //=============================================================================
310 //=============================================================================
312 Launcher_cpp::stopJob(int job_id)
314 LAUNCHER_MESSAGE("Stop Job");
316 // Check if job exist
317 std::map<int, Launcher::Job *>::iterator it_job = _launcher_job_map.find(job_id);
318 if (it_job == _launcher_job_map.end())
320 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
321 throw LauncherException("Cannot find the job, is it created ?");
324 it_job->second->stopJob();
327 //=============================================================================
329 * create a launcher job based on a file
330 * \param xmlExecuteFile : to define the execution on the batch cluster
332 //=============================================================================
334 Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile,
335 const std::string clusterName)
337 LAUNCHER_MESSAGE("Begin of Launcher_cpp::createJobWithFile");
340 ParserLauncherType job_params = ParseXmlFile(xmlExecuteFile);
342 // Creating a new job
343 Launcher::Job_Command * new_job = new Launcher::Job_Command();
345 std::string cmdFile = Kernel_Utils::GetTmpFileName();
352 os.open(cmdFile.c_str(), std::ofstream::out );
353 os << "#! /bin/sh" << std::endl;
354 os << job_params.Command;
357 new_job->setJobFile(cmdFile);
358 new_job->setLocalDirectory(job_params.RefDirectory);
359 new_job->setWorkDirectory(job_params.MachinesList[clusterName].WorkDirectory);
360 new_job->setEnvFile(job_params.MachinesList[clusterName].EnvFile);
362 for(int i=0; i < job_params.InputFile.size(); i++)
363 new_job->add_in_file(job_params.InputFile[i]);
364 for(int i=0; i < job_params.OutputFile.size();i++)
365 new_job->add_out_file(job_params.OutputFile[i]);
368 p.hostname = clusterName;
371 p.nb_proc = job_params.NbOfProcesses;
373 p.nb_proc_per_node = 0;
376 new_job->setResourceRequiredParams(p);
379 return new_job->getNumber();
382 //=============================================================================
384 * Factory to instanciate the good batch manager for choosen cluster.
386 //=============================================================================
387 Batch::BatchManager *
388 Launcher_cpp::FactoryBatchManager(ParserResourcesType& params)
391 Batch::CommunicationProtocolType protocol;
392 Batch::FactBatchManager * fact;
394 std::string hostname = params.HostName;
396 switch(params.Protocol)
399 protocol = Batch::SH;
402 protocol = Batch::RSH;
405 protocol = Batch::SSH;
408 throw LauncherException("Unknown protocol for this resource");
440 switch( params.Batch )
473 LAUNCHER_MESSAGE("Bad batch description of the resource: Batch = " << params.Batch);
474 throw LauncherException("No batchmanager for that cluster - Bad batch description of the resource");
476 Batch::BatchManagerCatalog & cata = Batch::BatchManagerCatalog::getInstance();
477 fact = dynamic_cast<Batch::FactBatchManager*>(cata(bmType));
479 LAUNCHER_MESSAGE("Cannot find batch manager factory for " << bmType << ". Check your version of libBatch.");
480 throw LauncherException("Cannot find batch manager factory");
482 LAUNCHER_MESSAGE("Instanciation of batch manager of type: " << bmType);
483 Batch::BatchManager * batch_client = (*fact)(hostname.c_str(), params.UserName.c_str(),
484 protocol, mpi.c_str());
488 //----------------------------------------------------------
489 // Without LIBBATCH - Launcher_cpp do nothing...
490 //----------------------------------------------------------
494 Launcher_cpp::createJob(Launcher::Job * new_job)
496 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot create a job !!!");
498 throw LauncherException("Method Launcher_cpp::createJob is not available "
499 "(libBatch was not present at compilation time)");
503 Launcher_cpp::launchJob(int job_id)
505 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot launch a job !!!");
506 throw LauncherException("Method Launcher_cpp::launchJob is not available "
507 "(libBatch was not present at compilation time)");
511 Launcher_cpp::getJobState(int job_id)
513 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job state!!!");
514 throw LauncherException("Method Launcher_cpp::getJobState is not available "
515 "(libBatch was not present at compilation time)");
519 Launcher_cpp::getAssignedHostnames(int job_id)
521 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job assigned hostnames!!!");
522 throw LauncherException("Method Launcher_cpp::getAssignedHostnames is not available "
523 "(libBatch was not present at compilation time)");
527 Launcher_cpp::getJobResults(int job_id, std::string directory)
529 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job results!!!");
530 throw LauncherException("Method Launcher_cpp::getJobResults is not available "
531 "(libBatch was not present at compilation time)");
535 Launcher_cpp::getJobDumpState(int job_id, std::string directory)
537 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job dump state!!!");
538 throw LauncherException("Method Launcher_cpp::getJobDumpState is not available "
539 "(libBatch was not present at compilation time)");
543 Launcher_cpp::removeJob(int job_id)
545 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot remove job!!!");
546 throw LauncherException("Method Launcher_cpp::removeJob is not available "
547 "(libBatch was not present at compilation time)");
551 Launcher_cpp::stopJob(int job_id)
553 throw LauncherException("Method Launcher_cpp::stopJob is not available "
554 "(libBatch was not present at compilation time)");
558 Launcher_cpp::createJobWithFile( const std::string xmlExecuteFile, std::string clusterName)
560 throw LauncherException("Method Launcher_cpp::createJobWithFile is not available "
561 "(libBatch was not present at compilation time)");
568 Launcher_cpp::ParseXmlFile(std::string xmlExecuteFile)
570 ParserLauncherType job_params;
571 SALOME_Launcher_Handler * handler = new SALOME_Launcher_Handler(job_params);
573 const char* aFilePath = xmlExecuteFile.c_str();
574 FILE* aFile = fopen(aFilePath, "r");
577 xmlDocPtr aDoc = xmlReadFile(aFilePath, NULL, 0);
579 handler->ProcessXmlDocument(aDoc);
582 std::string message = "ResourcesManager_cpp: could not parse file: " + xmlExecuteFile;
583 LAUNCHER_MESSAGE(message);
585 throw LauncherException(message);
593 std::string message = "ResourcesManager_cpp: file is not readable: " + xmlExecuteFile;
594 LAUNCHER_MESSAGE(message);
596 throw LauncherException(message);
604 std::map<int, Launcher::Job *>
605 Launcher_cpp::getJobs()
607 return _launcher_job_map;
611 Launcher_cpp::createBatchManagerForJob(Launcher::Job * job)
613 int job_id = job->getNumber();
615 // Select a ressource for the job
616 std::vector<std::string> ResourceList;
617 resourceParams params = job->getResourceRequiredParams();
618 // Consider only resources that can launch batch jobs
619 params.can_launch_batch_jobs = true;
622 ResourceList = _ResManager->GetFittingResources(params);
624 catch(const ResourcesException &ex)
626 throw LauncherException(ex.msg.c_str());
628 if (ResourceList.size() == 0)
630 LAUNCHER_INFOS("No adequate resource found for the job, number " << job->getNumber());
631 job->setState("ERROR");
632 throw LauncherException("No resource found the job");
635 // Configure the job with the resource selected - the first of the list
636 ParserResourcesType resource_definition = _ResManager->GetResourcesDescr(ResourceList[0]);
638 // Set resource definition to the job
639 // The job will check if the definitions needed
642 job->setResourceDefinition(resource_definition);
644 catch(const LauncherException &ex)
646 LAUNCHER_INFOS("Error in the definition of the resource, mess: " << ex.msg);
647 job->setState("ERROR");
651 // Step 2: We can now add a Factory if the resource is correctly define
653 std::map<int, Batch::BatchManager *>::const_iterator it = _batchmap.find(job_id);
654 if(it == _batchmap.end())
658 // Warning cannot write on one line like this, because map object is constructed before
659 // the method is called...
660 //_batchmap[job_id] = FactoryBatchManager(resource_definition);
661 Batch::BatchManager * batch_client = FactoryBatchManager(resource_definition);
662 _batchmap[job_id] = batch_client;
664 catch(const LauncherException &ex)
666 LAUNCHER_INFOS("Error during creation of the batch manager of the job, mess: " << ex.msg);
669 catch(const Batch::GenericException &ex)
671 LAUNCHER_INFOS("Error during creation of the batch manager of the job, mess: " << ex.message);
672 throw LauncherException(ex.message);
679 Launcher_cpp::addJobDirectlyToMap(Launcher::Job * new_job, const std::string job_reference)
681 // Step 0: Calculated job_id
682 pthread_mutex_lock(_job_cpt_mutex);
683 int job_id = _job_cpt;
685 new_job->setNumber(job_id);
686 pthread_mutex_unlock(_job_cpt_mutex);
688 // Step 1: check if resource is already in the map
689 createBatchManagerForJob(new_job);
691 // Step 2: add the job to the batch manager
695 Batch::JobId batch_manager_job_id = _batchmap[job_id]->addJob(*(new_job->getBatchJob()),
697 new_job->setBatchManagerJobId(batch_manager_job_id);
699 catch(const Batch::GenericException &ex)
701 LAUNCHER_INFOS("Job cannot be added, exception in addJob: " << ex.message);
702 throw LauncherException(ex.message.c_str());
705 // Step 3: add job to launcher map
706 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(new_job->getNumber());
707 if (it_job == _launcher_job_map.end())
708 _launcher_job_map[new_job->getNumber()] = new_job;
711 LAUNCHER_INFOS("A job as already the same id: " << new_job->getNumber());
713 throw LauncherException("A job as already the same id - job is not created !");
715 LAUNCHER_MESSAGE("New job added");