1 // Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
24 #include <Batch/Batch_Date.hxx>
25 #include <Batch/Batch_BatchManagerCatalog.hxx>
26 #include <Batch/Batch_FactBatchManager_eClient.hxx>
27 #include <Batch/Batch_BatchManager_eClient.hxx>
30 #include "Basics_Utils.hxx"
31 #include "Basics_DirUtils.hxx"
32 #include "SALOME_Launcher_Handler.hxx"
33 #include "Launcher.hxx"
34 #include "Launcher_Job_Command.hxx"
40 //=============================================================================
44 * Define a CORBA single thread policy for the server, which avoid to deal
45 * with non thread-safe usage like Change_Directory in SALOME naming service
47 //=============================================================================
48 Launcher_cpp::Launcher_cpp()
50 LAUNCHER_MESSAGE("Launcher_cpp constructor");
52 _job_cpt_mutex = new pthread_mutex_t();
53 pthread_mutex_init(_job_cpt_mutex, NULL);
56 //=============================================================================
60 //=============================================================================
61 Launcher_cpp::~Launcher_cpp()
63 LAUNCHER_MESSAGE("Launcher_cpp destructor");
65 std::map<int, Launcher::Job *>::const_iterator it_job;
66 for(it_job = _launcher_job_map.begin(); it_job != _launcher_job_map.end(); it_job++)
67 delete it_job->second;
68 std::map < std::string, Batch::BatchManager_eClient * >::const_iterator it1;
69 for(it1=_batchmap.begin();it1!=_batchmap.end();it1++)
73 pthread_mutex_destroy(_job_cpt_mutex);
74 delete _job_cpt_mutex;
79 //=============================================================================
81 * Add a job into the launcher - check resource and choose one
83 //=============================================================================
85 Launcher_cpp::createJob(Launcher::Job * new_job)
87 LAUNCHER_MESSAGE("Creating a new job");
89 // First step take a resource
90 std::vector<std::string> ResourceList;
91 resourceParams params = new_job->getResourceRequiredParams();
93 ResourceList = _ResManager->GetFittingResources(params);
95 catch(const ResourcesException &ex){
96 throw LauncherException(ex.msg.c_str());
98 if (ResourceList.size() == 0)
100 LAUNCHER_INFOS("No adequate resource found for the job, number " << new_job->getNumber() << " - deleting it");
102 throw LauncherException("No resource found the job");
105 // Second step configure the job with the resource selected - the first of the list
106 ParserResourcesType resource_definition = _ResManager->GetResourcesDescr(ResourceList[0]);
108 // Set resource definition to the job
109 // The job will check if the definitions needed
112 new_job->setResourceDefinition(resource_definition);
114 catch(const LauncherException &ex)
116 LAUNCHER_INFOS("Error in the definition of the resource, mess: " << ex.msg);
121 // Third step search batch manager for the resource into the map -> instanciate one if does not exist
123 std::string resource_name = resource_definition.Name;
124 std::map<std::string, Batch::BatchManager_eClient *>::const_iterator it = _batchmap.find(resource_name);
125 if(it == _batchmap.end())
129 // Warning cannot write on one line like this, because map object is constructed before
130 // the method is called...
131 //_batchmap.[resource_name] = FactoryBatchManager(resource_definition);
132 Batch::BatchManager_eClient * batch_client = FactoryBatchManager(resource_definition);
133 _batchmap[resource_name] = batch_client;
135 catch(const LauncherException &ex)
137 LAUNCHER_INFOS("Error during creation of the batch manager of the resource, mess: " << ex.msg);
141 catch(const Batch::EmulationException &ex)
143 LAUNCHER_INFOS("Error during creation of the batch manager of the resource, mess: " << ex.message);
145 throw LauncherException(ex.message);
150 // Final step - add job to the jobs map
151 pthread_mutex_lock(_job_cpt_mutex);
152 new_job->setNumber(_job_cpt);
154 pthread_mutex_unlock(_job_cpt_mutex);
155 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(new_job->getNumber());
156 if (it_job == _launcher_job_map.end())
157 _launcher_job_map[new_job->getNumber()] = new_job;
160 LAUNCHER_INFOS("A job as already the same id: " << new_job->getNumber());
162 throw LauncherException("A job as already the same id - job is not created !");
164 LAUNCHER_MESSAGE("New Job created");
167 //=============================================================================
171 //=============================================================================
173 Launcher_cpp::launchJob(int job_id)
175 LAUNCHER_MESSAGE("Launch a job");
177 // Check if job exist
178 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
179 if (it_job == _launcher_job_map.end())
181 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
182 throw LauncherException("Cannot find the job, is it created ?");
185 Launcher::Job * job = it_job->second;
187 // Check job state (cannot launch a job already launched...)
188 if (job->getState() != "CREATED")
190 LAUNCHER_INFOS("Bad state of the job: " << job->getState());
191 throw LauncherException("Bad state of the job: " + job->getState());
194 std::string resource_name = job->getResourceDefinition().Name;
196 Batch::JobId batch_manager_job_id = _batchmap[resource_name]->submitJob(*(job->getBatchJob()));
197 job->setBatchManagerJobId(batch_manager_job_id);
198 job->setState("QUEUED");
200 catch(const Batch::EmulationException &ex)
202 LAUNCHER_INFOS("Job is not launched, exception in submitJob: " << ex.message);
203 throw LauncherException(ex.message.c_str());
205 LAUNCHER_MESSAGE("Job launched");
208 //=============================================================================
212 //=============================================================================
214 Launcher_cpp::getJobState(int job_id)
216 LAUNCHER_MESSAGE("Get job state");
218 // Check if job exist
219 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
220 if (it_job == _launcher_job_map.end())
222 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
223 throw LauncherException("Cannot find the job, is it created ?");
226 Launcher::Job * job = it_job->second;
227 std::string state = job->updateJobState();
229 return state.c_str();
232 //=============================================================================
234 * Get Job result - the result directory could be changed
236 //=============================================================================
238 Launcher_cpp::getJobResults(int job_id, std::string directory)
240 LAUNCHER_MESSAGE("Get Job results");
242 // Check if job exist
243 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
244 if (it_job == _launcher_job_map.end())
246 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
247 throw LauncherException("Cannot find the job, is it created ?");
250 Launcher::Job * job = it_job->second;
251 std::string resource_name = job->getResourceDefinition().Name;
255 _batchmap[resource_name]->importOutputFiles(*(job->getBatchJob()), directory);
257 _batchmap[resource_name]->importOutputFiles(*(job->getBatchJob()), job->getResultDirectory());
259 catch(const Batch::EmulationException &ex)
261 LAUNCHER_INFOS("getJobResult is maybe incomplete, exception: " << ex.message);
262 throw LauncherException(ex.message.c_str());
264 LAUNCHER_MESSAGE("getJobResult ended");
267 //=============================================================================
269 * Remove the job - into the Launcher and its batch manager
271 //=============================================================================
273 Launcher_cpp::removeJob(int job_id)
275 LAUNCHER_MESSAGE("Remove Job");
277 // Check if job exist
278 std::map<int, Launcher::Job *>::iterator it_job = _launcher_job_map.find(job_id);
279 if (it_job == _launcher_job_map.end())
281 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
282 throw LauncherException("Cannot find the job, is it created ?");
285 delete it_job->second;
286 _launcher_job_map.erase(it_job);
289 //=============================================================================
291 * create a launcher job based on a file
292 * \param xmlExecuteFile : to define the execution on the batch cluster
294 //=============================================================================
296 Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile,
297 const std::string clusterName)
299 LAUNCHER_MESSAGE("Begin of Launcher_cpp::createJobWithFile");
302 ParserLauncherType job_params = ParseXmlFile(xmlExecuteFile);
304 // Creating a new job
305 Launcher::Job_Command * new_job = new Launcher::Job_Command();
307 std::string cmdFile = Kernel_Utils::GetTmpFileName();
314 os.open(cmdFile.c_str(), std::ofstream::out );
315 os << "#! /bin/sh" << std::endl;
316 os << job_params.Command;
319 new_job->setJobFile(cmdFile);
320 new_job->setLocalDirectory(job_params.RefDirectory);
321 new_job->setWorkDirectory(job_params.MachinesList[clusterName].WorkDirectory);
322 new_job->setEnvFile(job_params.MachinesList[clusterName].EnvFile);
324 for(int i=0; i < job_params.InputFile.size(); i++)
325 new_job->add_in_file(job_params.InputFile[i]);
326 for(int i=0; i < job_params.OutputFile.size();i++)
327 new_job->add_out_file(job_params.OutputFile[i]);
330 p.hostname = clusterName;
333 p.nb_proc = job_params.NbOfProcesses;
335 p.nb_proc_per_node = 0;
338 new_job->setResourceRequiredParams(p);
341 return new_job->getNumber();
344 //=============================================================================
346 * Factory to instanciate the good batch manager for choosen cluster.
348 //=============================================================================
349 Batch::BatchManager_eClient *
350 Launcher_cpp::FactoryBatchManager(ParserResourcesType& params)
353 Batch::CommunicationProtocolType protocol;
354 Batch::FactBatchManager_eClient* fact;
356 int nb_proc_per_node = params.DataForSort._nbOfProcPerNode;
357 std::string hostname = params.HostName;
359 switch(params.Protocol)
362 protocol = Batch::RSH;
365 protocol = Batch::SSH;
368 throw LauncherException("Unknown protocol for this resource");
397 switch( params.Batch )
415 LAUNCHER_MESSAGE("Bad batch description of the resource: Batch = " << params.Batch);
416 throw LauncherException("No batchmanager for that cluster - Bad batch description of the resource");
418 Batch::BatchManagerCatalog & cata = Batch::BatchManagerCatalog::getInstance();
419 fact = dynamic_cast<Batch::FactBatchManager_eClient*>(cata(bmType));
421 LAUNCHER_MESSAGE("Cannot find batch manager factory for " << bmType << ". Check your version of libBatch.");
422 throw LauncherException("Cannot find batch manager factory");
424 LAUNCHER_MESSAGE("Instanciation of batch manager of type: " << bmType);
425 Batch::BatchManager_eClient * batch_client = (*fact)(hostname.c_str(), protocol, mpi.c_str(), nb_proc_per_node);
426 batch_client->setUsername(params.UserName);
430 //----------------------------------------------------------
431 // Without LIBBATCH - Launcher_cpp do nothing...
432 //----------------------------------------------------------
436 Launcher_cpp::createJob(Launcher::Job * new_job)
438 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot create a job !!!");
440 throw LauncherException("Method Launcher_cpp::createJob is not available "
441 "(libBatch was not present at compilation time)");
445 Launcher_cpp::launchJob(int job_id)
447 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot launch a job !!!");
448 throw LauncherException("Method Launcher_cpp::launchJob is not available "
449 "(libBatch was not present at compilation time)");
453 Launcher_cpp::getJobState(int job_id)
455 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job state!!!");
456 throw LauncherException("Method Launcher_cpp::getJobState is not available "
457 "(libBatch was not present at compilation time)");
461 Launcher_cpp::getJobResults(int job_id, std::string directory)
463 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job results!!!");
464 throw LauncherException("Method Launcher_cpp::getJobResults is not available "
465 "(libBatch was not present at compilation time)");
469 Launcher_cpp::removeJob(int job_id)
471 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot remove job!!!");
472 throw LauncherException("Method Launcher_cpp::removeJob is not available "
473 "(libBatch was not present at compilation time)");
477 Launcher_cpp::createJobWithFile( const std::string xmlExecuteFile, std::string clusterName)
479 throw LauncherException("Method Launcher_cpp::createJobWithFile is not available "
480 "(libBatch was not present at compilation time)");
487 Launcher_cpp::ParseXmlFile(std::string xmlExecuteFile)
489 ParserLauncherType job_params;
490 SALOME_Launcher_Handler * handler = new SALOME_Launcher_Handler(job_params);
492 const char* aFilePath = xmlExecuteFile.c_str();
493 FILE* aFile = fopen(aFilePath, "r");
496 xmlDocPtr aDoc = xmlReadFile(aFilePath, NULL, 0);
498 handler->ProcessXmlDocument(aDoc);
501 std::string message = "ResourcesManager_cpp: could not parse file: " + xmlExecuteFile;
502 LAUNCHER_MESSAGE(message);
504 throw LauncherException(message);
512 std::string message = "ResourcesManager_cpp: file is not readable: " + xmlExecuteFile;
513 LAUNCHER_MESSAGE(message);
515 throw LauncherException(message);
523 std::map<int, Launcher::Job *>
524 Launcher_cpp::getJobs()
526 return _launcher_job_map;
530 Launcher_cpp::checkFactoryForResource(const std::string & resource_name)
532 // Step 1: Check if resource exist in the resource manager
533 ParserResourcesType resource_definition;
536 resource_definition = _ResManager->GetResourcesDescr(resource_name);
538 catch(const ResourcesException &ex)
540 LAUNCHER_INFOS(ex.msg);
541 throw LauncherException(ex.msg);
544 // Step 2: We can now add a Factory is the resource is correctly define
546 std::map<std::string, Batch::BatchManager_eClient *>::const_iterator it = _batchmap.find(resource_name);
547 if(it == _batchmap.end())
551 // Warning cannot write on one line like this, because map object is constructed before
552 // the method is called...
553 //_batchmap.[resource_name] = FactoryBatchManager(resource_definition);
554 Batch::BatchManager_eClient * batch_client = FactoryBatchManager(resource_definition);
555 _batchmap[resource_name] = batch_client;
557 catch(const LauncherException &ex)
559 LAUNCHER_INFOS("Error during creation of the batch manager of the resource, mess: " << ex.msg);
562 catch(const Batch::EmulationException &ex)
564 LAUNCHER_INFOS("Error during creation of the batch manager of the resource, mess: " << ex.message);
565 throw LauncherException(ex.message);
572 Launcher_cpp::addJobDirectlyToMap(Launcher::Job * new_job, const std::string job_reference)
574 // Step 1: check if resource is already in the map
575 std::string resource_name = new_job->getResourceDefinition().Name;
576 checkFactoryForResource(resource_name);
577 ParserResourcesType resource_definition = _ResManager->GetResourcesDescr(resource_name);
578 new_job->setResourceDefinition(resource_definition);
580 // Step 2: add the job to the batch manager
584 Batch::JobId batch_manager_job_id = _batchmap[resource_name]->addJob(*(new_job->getBatchJob()),
586 new_job->setBatchManagerJobId(batch_manager_job_id);
588 catch(const Batch::EmulationException &ex)
590 LAUNCHER_INFOS("Job is not launched, exception in submitJob: " << ex.message);
591 throw LauncherException(ex.message.c_str());
594 // Step 3: add job to launcher map
595 pthread_mutex_lock(_job_cpt_mutex);
596 new_job->setNumber(_job_cpt);
598 pthread_mutex_unlock(_job_cpt_mutex);
599 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(new_job->getNumber());
600 if (it_job == _launcher_job_map.end())
601 _launcher_job_map[new_job->getNumber()] = new_job;
604 LAUNCHER_INFOS("A job as already the same id: " << new_job->getNumber());
606 throw LauncherException("A job as already the same id - job is not created !");
608 LAUNCHER_MESSAGE("New job added");