1 // Copyright (C) 2007-2010 CEA/DEN, EDF R&D, OPEN CASCADE
3 // Copyright (C) 2003-2007 OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
4 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // Lesser General Public License for more details.
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
24 #include <Batch/Batch_Date.hxx>
25 #include <Batch/Batch_FactBatchManager_eLSF.hxx>
26 #include <Batch/Batch_FactBatchManager_ePBS.hxx>
27 #include <Batch/Batch_BatchManager_eClient.hxx>
28 #include <Batch/Batch_FactBatchManager_eSGE.hxx>
29 #include <Batch/Batch_FactBatchManager_eSSH.hxx>
32 #include "Basics_Utils.hxx"
33 #include "Basics_DirUtils.hxx"
34 #include "SALOME_Launcher_Handler.hxx"
35 #include "Launcher.hxx"
36 #include "Launcher_Job_Command.hxx"
42 //=============================================================================
46 * Define a CORBA single thread policy for the server, which avoid to deal
47 * with non thread-safe usage like Change_Directory in SALOME naming service
49 //=============================================================================
50 Launcher_cpp::Launcher_cpp()
52 LAUNCHER_MESSAGE("Launcher_cpp constructor");
54 _job_cpt_mutex = new pthread_mutex_t();
55 pthread_mutex_init(_job_cpt_mutex, NULL);
58 //=============================================================================
62 //=============================================================================
63 Launcher_cpp::~Launcher_cpp()
65 LAUNCHER_MESSAGE("Launcher_cpp destructor");
67 std::map < std::string, Batch::BatchManager_eClient * >::const_iterator it1;
68 for(it1=_batchmap.begin();it1!=_batchmap.end();it1++)
70 std::map<int, Launcher::Job *>::const_iterator it_job;
71 for(it_job = _launcher_job_map.begin(); it_job != _launcher_job_map.end(); it_job++)
72 delete it_job->second;
75 pthread_mutex_destroy(_job_cpt_mutex);
76 delete _job_cpt_mutex;
81 //=============================================================================
83 * Add a job into the launcher - check resource and choose one
85 //=============================================================================
87 Launcher_cpp::createJob(Launcher::Job * new_job)
89 LAUNCHER_MESSAGE("Creating a new job");
91 // First step take a resource
92 std::vector<std::string> ResourceList;
93 resourceParams params = new_job->getResourceRequiredParams();
95 ResourceList = _ResManager->GetFittingResources(params);
97 catch(const ResourcesException &ex){
98 throw LauncherException(ex.msg.c_str());
100 if (ResourceList.size() == 0)
102 LAUNCHER_INFOS("No adequate resource found for the job, number " << new_job->getNumber() << " - deleting it");
104 throw LauncherException("No resource found the job");
107 // Second step configure the job with the resource selected - the first of the list
108 ParserResourcesType resource_definition = _ResManager->GetResourcesDescr(ResourceList[0]);
110 // Set resource definition to the job
111 // The job will check if the definitions needed
114 new_job->setResourceDefinition(resource_definition);
116 catch(const LauncherException &ex)
118 LAUNCHER_INFOS("Error in the definition of the resource, mess: " << ex.msg);
123 // Third step search batch manager for the resource into the map -> instanciate one if does not exist
124 std::string resource_name = resource_definition.Name;
125 std::map<std::string, Batch::BatchManager_eClient *>::const_iterator it = _batchmap.find(resource_name);
126 if(it == _batchmap.end())
130 // Warning cannot write on one line like this, because map object is constructed before
131 // the method is called...
132 //_batchmap.[resource_name] = FactoryBatchManager(resource_definition);
133 Batch::BatchManager_eClient * batch_client = FactoryBatchManager(resource_definition);
134 _batchmap[resource_name] = batch_client;
136 catch(const LauncherException &ex)
138 LAUNCHER_INFOS("Error during creation of the batch manager of the resource, mess: " << ex.msg);
142 catch(const Batch::EmulationException &ex)
144 LAUNCHER_INFOS("Error during creation of the batch manager of the resource, mess: " << ex.message);
146 throw LauncherException(ex.message);
151 // Final step - add job to the jobs map
152 pthread_mutex_lock(_job_cpt_mutex);
153 new_job->setNumber(_job_cpt);
155 pthread_mutex_unlock(_job_cpt_mutex);
156 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(new_job->getNumber());
157 if (it_job == _launcher_job_map.end())
158 _launcher_job_map[new_job->getNumber()] = new_job;
161 LAUNCHER_INFOS("A job as already the same id: " << new_job->getNumber());
163 throw LauncherException("A job as already the same id - job is not created !");
165 LAUNCHER_MESSAGE("New Job created");
168 //=============================================================================
172 //=============================================================================
174 Launcher_cpp::launchJob(int job_id)
176 LAUNCHER_MESSAGE("Launch a job");
178 // Check if job exist
179 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
180 if (it_job == _launcher_job_map.end())
182 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
183 throw LauncherException("Cannot find the job, is it created ?");
186 Launcher::Job * job = it_job->second;
188 // Check job state (cannot launch a job already launched...)
189 if (job->getState() != "CREATED")
191 LAUNCHER_INFOS("Bad state of the job: " << job->getState());
192 throw LauncherException("Bad state of the job: " + job->getState());
195 std::string resource_name = job->getResourceDefinition().Name;
197 Batch::JobId batch_manager_job_id = _batchmap[resource_name]->submitJob(*(job->getBatchJob()));
198 job->setBatchManagerJobId(batch_manager_job_id);
199 job->setState("QUEUED");
201 catch(const Batch::EmulationException &ex)
203 LAUNCHER_INFOS("Job is not launched, exception in submitJob: " << ex.message);
204 throw LauncherException(ex.message.c_str());
206 LAUNCHER_MESSAGE("Job launched");
209 //=============================================================================
213 //=============================================================================
215 Launcher_cpp::getJobState(int job_id)
217 LAUNCHER_MESSAGE("Get job state");
219 // Check if job exist
220 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
221 if (it_job == _launcher_job_map.end())
223 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
224 throw LauncherException("Cannot find the job, is it created ?");
227 Launcher::Job * job = it_job->second;
228 std::string state = job->updateJobState();
230 return state.c_str();
233 //=============================================================================
235 * Get Job result - the result directory could be changed
237 //=============================================================================
239 Launcher_cpp::getJobResults(int job_id, std::string directory)
241 LAUNCHER_MESSAGE("Get Job results");
243 // Check if job exist
244 std::map<int, Launcher::Job *>::const_iterator it_job = _launcher_job_map.find(job_id);
245 if (it_job == _launcher_job_map.end())
247 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
248 throw LauncherException("Cannot find the job, is it created ?");
251 Launcher::Job * job = it_job->second;
252 std::string resource_name = job->getResourceDefinition().Name;
256 _batchmap[resource_name]->importOutputFiles(*(job->getBatchJob()), directory);
258 _batchmap[resource_name]->importOutputFiles(*(job->getBatchJob()), job->getResultDirectory());
260 catch(const Batch::EmulationException &ex)
262 LAUNCHER_INFOS("getJobResult is maybe incomplete, exception: " << ex.message);
263 throw LauncherException(ex.message.c_str());
265 LAUNCHER_MESSAGE("getJobResult ended");
268 //=============================================================================
270 * Remove the job - into the Launcher and its batch manager
272 //=============================================================================
274 Launcher_cpp::removeJob(int job_id)
276 LAUNCHER_MESSAGE("Remove Job");
278 // Check if job exist
279 std::map<int, Launcher::Job *>::iterator it_job = _launcher_job_map.find(job_id);
280 if (it_job == _launcher_job_map.end())
282 LAUNCHER_INFOS("Cannot find the job, is it created ? job number: " << job_id);
283 throw LauncherException("Cannot find the job, is it created ?");
286 delete it_job->second;
287 _launcher_job_map.erase(it_job);
290 //=============================================================================
292 * create a launcher job based on a file
293 * \param xmlExecuteFile : to define the execution on the batch cluster
295 //=============================================================================
297 Launcher_cpp::createJobWithFile(const std::string xmlExecuteFile,
298 const std::string clusterName)
300 LAUNCHER_MESSAGE("Begin of Launcher_cpp::createJobWithFile");
303 ParserLauncherType job_params = ParseXmlFile(xmlExecuteFile);
305 // Creating a new job
306 Launcher::Job_Command * new_job = new Launcher::Job_Command();
308 std::string cmdFile = Kernel_Utils::GetTmpFileName();
315 os.open(cmdFile.c_str(), std::ofstream::out );
316 os << "#! /bin/sh" << std::endl;
317 os << job_params.Command;
320 new_job->setJobFile(cmdFile);
321 new_job->setLocalDirectory(job_params.RefDirectory);
322 new_job->setWorkDirectory(job_params.MachinesList[clusterName].WorkDirectory);
323 new_job->setEnvFile(job_params.MachinesList[clusterName].EnvFile);
325 for(int i=0; i < job_params.InputFile.size(); i++)
326 new_job->add_in_file(job_params.InputFile[i]);
327 for(int i=0; i < job_params.OutputFile.size();i++)
328 new_job->add_out_file(job_params.OutputFile[i]);
331 p.hostname = clusterName;
334 p.nb_proc = job_params.NbOfProcesses;
336 p.nb_proc_per_node = 0;
339 new_job->setResourceRequiredParams(p);
342 return new_job->getNumber();
345 //=============================================================================
347 * Factory to instanciate the good batch manager for choosen cluster.
349 //=============================================================================
350 Batch::BatchManager_eClient *
351 Launcher_cpp::FactoryBatchManager(ParserResourcesType& params)
354 Batch::CommunicationProtocolType protocol;
355 Batch::FactBatchManager_eClient* fact;
357 int nb_proc_per_node = params.DataForSort._nbOfProcPerNode;
358 std::string hostname = params.HostName;
360 switch(params.Protocol)
363 protocol = Batch::RSH;
366 protocol = Batch::SSH;
369 throw LauncherException("Unknown protocol for this resource");
397 std::string message = "Instanciation of batch manager of type: ";
398 switch( params.Batch )
402 fact = new Batch::FactBatchManager_ePBS;
406 fact = new Batch::FactBatchManager_eLSF;
410 fact = new Batch::FactBatchManager_eSGE;
414 fact = new Batch::FactBatchManager_eSSH;
417 LAUNCHER_MESSAGE("Bad batch description of the resource: Batch = " << params.Batch);
418 throw LauncherException("No batchmanager for that cluster - Bad batch description of the resource");
420 LAUNCHER_MESSAGE(message);
421 return (*fact)(hostname.c_str(), protocol, mpi.c_str(), nb_proc_per_node);
424 //----------------------------------------------------------
425 // Without LIBBATCH - Launcher_cpp do nothing...
426 //----------------------------------------------------------
430 Launcher_cpp::createJob(Launcher::Job * new_job)
432 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot create a job !!!");
434 throw LauncherException("Method Launcher_cpp::createJob is not available "
435 "(libBatch was not present at compilation time)");
439 Launcher_cpp::launchJob(int job_id)
441 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot launch a job !!!");
442 throw LauncherException("Method Launcher_cpp::launchJob is not available "
443 "(libBatch was not present at compilation time)");
447 Launcher_cpp::getJobState(int job_id)
449 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job state!!!");
450 throw LauncherException("Method Launcher_cpp::getJobState is not available "
451 "(libBatch was not present at compilation time)");
455 Launcher_cpp::getJobResults(int job_id, std::string directory)
457 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot get job results!!!");
458 throw LauncherException("Method Launcher_cpp::getJobResults is not available "
459 "(libBatch was not present at compilation time)");
463 Launcher_cpp::removeJob(int job_id)
465 LAUNCHER_INFOS("Launcher compiled without LIBBATCH - cannot remove job!!!");
466 throw LauncherException("Method Launcher_cpp::removeJob is not available "
467 "(libBatch was not present at compilation time)");
471 Launcher_cpp::createJobWithFile( const std::string xmlExecuteFile, std::string clusterName)
473 throw LauncherException("Method Launcher_cpp::createJobWithFile is not available "
474 "(libBatch was not present at compilation time)");
481 Launcher_cpp::ParseXmlFile(std::string xmlExecuteFile)
483 ParserLauncherType job_params;
484 SALOME_Launcher_Handler * handler = new SALOME_Launcher_Handler(job_params);
486 const char* aFilePath = xmlExecuteFile.c_str();
487 FILE* aFile = fopen(aFilePath, "r");
490 xmlDocPtr aDoc = xmlReadFile(aFilePath, NULL, 0);
492 handler->ProcessXmlDocument(aDoc);
495 std::string message = "ResourcesManager_cpp: could not parse file: " + xmlExecuteFile;
496 LAUNCHER_MESSAGE(message);
498 throw LauncherException(message);
506 std::string message = "ResourcesManager_cpp: file is not readable: " + xmlExecuteFile;
507 LAUNCHER_MESSAGE(message);
509 throw LauncherException(message);