1 // Copyright (C) 2007-2012 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
24 #include <Batch/Batch_Date.hxx>
25 #include <Batch/Batch_BatchManagerCatalog.hxx>
26 #include <Batch/Batch_FactBatchManager_eClient.hxx>
27 #include <Batch/Batch_BatchManager_eClient.hxx>
30 #include "Basics_Utils.hxx"
31 #include "Basics_DirUtils.hxx"
32 #include "SALOME_Launcher_Handler.hxx"
33 #include "Launcher.hxx"
34 #include "Launcher_Job_Command.hxx"
40 //=============================================================================
44 * Define a CORBA single thread policy for the server, which avoid to deal
45 * with non thread-safe usage like Change_Directory in SALOME naming service
47 //=============================================================================
48 Launcher_cpp::Launcher_cpp()
50 LAUNCHER_MESSAGE("Launcher_cpp constructor");
52 _job_cpt_mutex = new pthread_mutex_t();
53 pthread_mutex_init(_job_cpt_mutex, NULL);
56 //=============================================================================
60 //=============================================================================
61 Launcher_cpp::~Launcher_cpp()
63 LAUNCHER_MESSAGE("Launcher_cpp destructor");
65 std::map<int, Launcher::Job *>::const_iterator it_job;
66 for(it_job = _launcher_job_map.begin(); it_job != _launcher_job_map.end(); it_job++)
67 delete it_job->second;
68 std::map <int, Batch::BatchManager_eClient * >::const_iterator it1;
69 for(it1=_batchmap.begin();it1!=_batchmap.end();it1++)
73 pthread_mutex_destroy(_job_cpt_mutex);
74 delete _job_cpt_mutex;
79 //=============================================================================
81 * Add a job into the launcher - check resource and choose one
83 //=============================================================================
85 Launcher_cpp::createJob(Launcher::Job * new_job)
87 LAUNCHER_MESSAGE("Creating a new job");
88 // Add job to the jobs map
89 pthread_mutex_lock(_job_cpt_mutex);
90 new_job->setNumber(_job_cpt);
92 pthread_mutex_unlock(_job_cpt_mutex);
93 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(new_job->getNumber());
94 if (it_job == _launcher_job_map.end())
95 _launcher_job_map[new_job->getNumber()] = new_job;
98 LAUNCHER_INFOS("A job as already the same id: " << new_job->getNumber());
100 throw LauncherException("A job as already the same id - job is not created !");
102 LAUNCHER_MESSAGE("New Job created");
105 //=============================================================================
109 //=============================================================================
111 Launcher_cpp::launchJob(int job_id)
113 LAUNCHER_MESSAGE("Launch a job");
115 // Check if job exist
116 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
117 if (it_job == _launcher_job_map.end())
119 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
120 throw LauncherException("Cannot find the job, is it created ?");
123 Launcher::Job * job = it_job->second;
125 // Check job state (cannot launch a job already launched...)
126 if (job->getState() != "CREATED")
128 LAUNCHER_INFOS("Bad state of the job: " << job->getState());
129 throw LauncherException("Bad state of the job: " + job->getState());
132 // Third step search batch manager for the job into the map -> instanciate one if does not exist
134 std::map<int, Batch::BatchManager_eClient *>::const_iterator it = _batchmap.find(job_id);
135 if(it == _batchmap.end())
137 createBatchManagerForJob(job);
142 Batch::JobId batch_manager_job_id = _batchmap[job_id]->submitJob(*(job->getBatchJob()));
143 job->setBatchManagerJobId(batch_manager_job_id);
144 job->setState("QUEUED");
146 catch(const Batch::EmulationException &ex)
148 LAUNCHER_INFOS("Job is not launched, exception in submitJob: " << ex.message);
149 throw LauncherException(ex.message.c_str());
151 LAUNCHER_MESSAGE("Job launched");
154 //=============================================================================
158 //=============================================================================
160 Launcher_cpp::getJobState(int job_id)
162 LAUNCHER_MESSAGE("Get job state");
164 // Check if job exist
165 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
166 if (it_job == _launcher_job_map.end())
168 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
169 throw LauncherException("Cannot find the job, is it created ?");
172 Launcher::Job * job = it_job->second;
177 state = job->updateJobState();
179 catch(const Batch::EmulationException &ex)
181 LAUNCHER_INFOS("getJobState failed, exception: " << ex.message);
182 throw LauncherException(ex.message.c_str());
184 catch(const Batch::RunTimeException &ex)
186 LAUNCHER_INFOS("getJobState failed, exception: " << ex.message);
187 throw LauncherException(ex.message.c_str());
190 return state.c_str();
193 //=============================================================================
195 * Get Job result - the result directory could be changed
197 //=============================================================================
199 Launcher_cpp::getJobResults(int job_id, std::string directory)
201 LAUNCHER_MESSAGE("Get Job results");
203 // Check if job exist
204 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
205 if (it_job == _launcher_job_map.end())
207 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
208 throw LauncherException("Cannot find the job, is it created ?");
211 Launcher::Job * job = it_job->second;
212 std::string resource_name = job->getResourceDefinition().Name;
216 _batchmap[job_id]->importOutputFiles(*(job->getBatchJob()), directory);
218 _batchmap[job_id]->importOutputFiles(*(job->getBatchJob()), job->getResultDirectory());
220 catch(const Batch::EmulationException &ex)
222 LAUNCHER_INFOS("getJobResult is maybe incomplete, exception: " << ex.message);
223 throw LauncherException(ex.message.c_str());
225 LAUNCHER_MESSAGE("getJobResult ended");
228 //=============================================================================
230 * Get Job dump state - the result directory could be changed
232 //=============================================================================
234 Launcher_cpp::getJobDumpState(int job_id, std::string directory)
237 LAUNCHER_MESSAGE("Get Job dump state");
239 // Check if job exist
240 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
241 if (it_job == _launcher_job_map.end())
243 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
244 throw LauncherException("Cannot find the job, is it created ?");
247 Launcher::Job * job = it_job->second;
248 std::string resource_name = job->getResourceDefinition().Name;
252 rtn = _batchmap[job_id]->importDumpStateFile(*(job->getBatchJob()), directory);
254 rtn = _batchmap[job_id]->importDumpStateFile(*(job->getBatchJob()), job->getResultDirectory());
256 catch(const Batch::EmulationException &ex)
258 LAUNCHER_INFOS("getJobResult is maybe incomplete, exception: " << ex.message);
259 throw LauncherException(ex.message.c_str());
261 LAUNCHER_MESSAGE("getJobResult ended");
265 //=============================================================================
267 * Remove the job - into the Launcher and its batch manager
269 //=============================================================================
271 Launcher_cpp::removeJob(int job_id)
273 LAUNCHER_MESSAGE("Remove Job");
275 // Check if job exist
276 std::map<int, Launcher::Job *>::iterator it_job = _launcher_job_map.find(job_id);
277 if (it_job == _launcher_job_map.end())
279 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
280 throw LauncherException("Cannot find the job, is it created ?");
283 it_job->second->removeJob();
284 delete it_job->second;
285 _launcher_job_map.erase(it_job);
288 //=============================================================================
292 //=============================================================================
294 Launcher_cpp::stopJob(int job_id)
296 LAUNCHER_MESSAGE("Stop Job");
298 // Check if job exist
299 std::map<int, Launcher::Job *>::iterator it_job = _launcher_job_map.find(job_id);
300 if (it_job == _launcher_job_map.end())
302 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
303 throw LauncherException("Cannot find the job, is it created ?");
306 it_job->second->stopJob();
309 //=============================================================================
311 * create a launcher job based on a file
312 * \param xmlExecuteFile : to define the execution on the batch cluster
314 //=============================================================================
316 Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile,
317 const std::string clusterName)
319 LAUNCHER_MESSAGE("Begin of Launcher_cpp::createJobWithFile");
322 ParserLauncherType job_params = ParseXmlFile(xmlExecuteFile);
324 // Creating a new job
325 Launcher::Job_Command * new_job = new Launcher::Job_Command();
327 std::string cmdFile = Kernel_Utils::GetTmpFileName();
334 os.open(cmdFile.c_str(), std::ofstream::out );
335 os << "#! /bin/sh" << std::endl;
336 os << job_params.Command;
339 new_job->setJobFile(cmdFile);
340 new_job->setLocalDirectory(job_params.RefDirectory);
341 new_job->setWorkDirectory(job_params.MachinesList[clusterName].WorkDirectory);
342 new_job->setEnvFile(job_params.MachinesList[clusterName].EnvFile);
344 for(int i=0; i < job_params.InputFile.size(); i++)
345 new_job->add_in_file(job_params.InputFile[i]);
346 for(int i=0; i < job_params.OutputFile.size();i++)
347 new_job->add_out_file(job_params.OutputFile[i]);
350 p.hostname = clusterName;
353 p.nb_proc = job_params.NbOfProcesses;
355 p.nb_proc_per_node = 0;
358 new_job->setResourceRequiredParams(p);
361 return new_job->getNumber();
364 //=============================================================================
366 * Factory to instanciate the good batch manager for choosen cluster.
368 //=============================================================================
369 Batch::BatchManager_eClient *
370 Launcher_cpp::FactoryBatchManager(ParserResourcesType& params)
373 Batch::CommunicationProtocolType protocol;
374 Batch::FactBatchManager_eClient* fact;
376 int nb_proc_per_node = params.DataForSort._nbOfProcPerNode;
377 std::string hostname = params.HostName;
379 switch(params.Protocol)
382 protocol = Batch::RSH;
385 protocol = Batch::SSH;
388 throw LauncherException("Unknown protocol for this resource");
420 switch( params.Batch )
447 LAUNCHER_MESSAGE("Bad batch description of the resource: Batch = " << params.Batch);
448 throw LauncherException("No batchmanager for that cluster - Bad batch description of the resource");
450 Batch::BatchManagerCatalog & cata = Batch::BatchManagerCatalog::getInstance();
451 fact = dynamic_cast<Batch::FactBatchManager_eClient*>(cata(bmType));
453 LAUNCHER_MESSAGE("Cannot find batch manager factory for " << bmType << ". Check your version of libBatch.");
454 throw LauncherException("Cannot find batch manager factory");
456 LAUNCHER_MESSAGE("Instanciation of batch manager of type: " << bmType);
457 Batch::BatchManager_eClient * batch_client = (*fact)(hostname.c_str(), params.UserName.c_str(),
458 protocol, mpi.c_str(), nb_proc_per_node);
462 //----------------------------------------------------------
463 // Without LIBBATCH - Launcher_cpp do nothing...
464 //----------------------------------------------------------
468 Launcher_cpp::createJob(Launcher::Job * new_job)
470 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot create a job !!!");
472 throw LauncherException("Method Launcher_cpp::createJob is not available "
473 "(libBatch was not present at compilation time)");
477 Launcher_cpp::launchJob(int job_id)
479 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot launch a job !!!");
480 throw LauncherException("Method Launcher_cpp::launchJob is not available "
481 "(libBatch was not present at compilation time)");
485 Launcher_cpp::getJobState(int job_id)
487 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job state!!!");
488 throw LauncherException("Method Launcher_cpp::getJobState is not available "
489 "(libBatch was not present at compilation time)");
493 Launcher_cpp::getJobResults(int job_id, std::string directory)
495 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job results!!!");
496 throw LauncherException("Method Launcher_cpp::getJobResults is not available "
497 "(libBatch was not present at compilation time)");
501 Launcher_cpp::getJobDumpState(int job_id, std::string directory)
503 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job dump state!!!");
504 throw LauncherException("Method Launcher_cpp::getJobDumpState is not available "
505 "(libBatch was not present at compilation time)");
509 Launcher_cpp::removeJob(int job_id)
511 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot remove job!!!");
512 throw LauncherException("Method Launcher_cpp::removeJob is not available "
513 "(libBatch was not present at compilation time)");
517 Launcher_cpp::stopJob(int job_id)
519 throw LauncherException("Method Launcher_cpp::stopJob is not available "
520 "(libBatch was not present at compilation time)");
524 Launcher_cpp::createJobWithFile( const std::string xmlExecuteFile, std::string clusterName)
526 throw LauncherException("Method Launcher_cpp::createJobWithFile is not available "
527 "(libBatch was not present at compilation time)");
534 Launcher_cpp::ParseXmlFile(std::string xmlExecuteFile)
536 ParserLauncherType job_params;
537 SALOME_Launcher_Handler * handler = new SALOME_Launcher_Handler(job_params);
539 const char* aFilePath = xmlExecuteFile.c_str();
540 FILE* aFile = fopen(aFilePath, "r");
543 xmlDocPtr aDoc = xmlReadFile(aFilePath, NULL, 0);
545 handler->ProcessXmlDocument(aDoc);
548 std::string message = "ResourcesManager_cpp: could not parse file: " + xmlExecuteFile;
549 LAUNCHER_MESSAGE(message);
551 throw LauncherException(message);
559 std::string message = "ResourcesManager_cpp: file is not readable: " + xmlExecuteFile;
560 LAUNCHER_MESSAGE(message);
562 throw LauncherException(message);
570 std::map<int, Launcher::Job *>
571 Launcher_cpp::getJobs()
573 return _launcher_job_map;
577 Launcher_cpp::createBatchManagerForJob(Launcher::Job * job)
579 int job_id = job->getNumber();
581 // Select a ressource for the job
582 std::vector<std::string> ResourceList;
583 resourceParams params = job->getResourceRequiredParams();
586 ResourceList = _ResManager->GetFittingResources(params);
588 catch(const ResourcesException &ex)
590 throw LauncherException(ex.msg.c_str());
592 if (ResourceList.size() == 0)
594 LAUNCHER_INFOS("No adequate resource found for the job, number " << job->getNumber());
595 job->setState("ERROR");
596 throw LauncherException("No resource found the job");
599 // Configure the job with the resource selected - the first of the list
600 ParserResourcesType resource_definition = _ResManager->GetResourcesDescr(ResourceList[0]);
602 // Set resource definition to the job
603 // The job will check if the definitions needed
606 job->setResourceDefinition(resource_definition);
608 catch(const LauncherException &ex)
610 LAUNCHER_INFOS("Error in the definition of the resource, mess: " << ex.msg);
611 job->setState("ERROR");
615 // Step 2: We can now add a Factory if the resource is correctly define
617 std::map<int, Batch::BatchManager_eClient *>::const_iterator it = _batchmap.find(job_id);
618 if(it == _batchmap.end())
622 // Warning cannot write on one line like this, because map object is constructed before
623 // the method is called...
624 //_batchmap[job_id] = FactoryBatchManager(resource_definition);
625 Batch::BatchManager_eClient * batch_client = FactoryBatchManager(resource_definition);
626 _batchmap[job_id] = batch_client;
628 catch(const LauncherException &ex)
630 LAUNCHER_INFOS("Error during creation of the batch manager of the job, mess: " << ex.msg);
633 catch(const Batch::EmulationException &ex)
635 LAUNCHER_INFOS("Error during creation of the batch manager of the job, mess: " << ex.message);
636 throw LauncherException(ex.message);
638 catch(const Batch::InvalidArgumentException &ex)
640 LAUNCHER_INFOS("Error during creation of the batch manager of the job, mess: " << ex.message);
641 throw LauncherException(ex.message);
648 Launcher_cpp::addJobDirectlyToMap(Launcher::Job * new_job, const std::string job_reference)
650 // Step 0: Calculated job_id
651 pthread_mutex_lock(_job_cpt_mutex);
652 int job_id = _job_cpt;
654 new_job->setNumber(job_id);
655 pthread_mutex_unlock(_job_cpt_mutex);
657 // Step 1: check if resource is already in the map
658 createBatchManagerForJob(new_job);
660 // Step 2: add the job to the batch manager
664 Batch::JobId batch_manager_job_id = _batchmap[job_id]->addJob(*(new_job->getBatchJob()),
666 new_job->setBatchManagerJobId(batch_manager_job_id);
668 catch(const Batch::EmulationException &ex)
670 LAUNCHER_INFOS("Job cannot be added, exception in addJob: " << ex.message);
671 throw LauncherException(ex.message.c_str());
674 // Step 3: add job to launcher map
675 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(new_job->getNumber());
676 if (it_job == _launcher_job_map.end())
677 _launcher_job_map[new_job->getNumber()] = new_job;
680 LAUNCHER_INFOS("A job as already the same id: " << new_job->getNumber());
682 throw LauncherException("A job as already the same id - job is not created !");
684 LAUNCHER_MESSAGE("New job added");