Salome HOME
correction bug spns #16713
[tools/sat.git] / commands / jobs.py
index 6c26ea688ef7c91872bb23b84de043d035805909..c043d7048e094d8463f6f40b6f9f26b9a6c3c7dc 100644 (file)
 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 
 import os
+import sys
+import tempfile
+import traceback
 import datetime
 import time
 import csv
 import shutil
 import itertools
 import re
-import paramiko
+
+# generate problem
+try:
+  import paramiko
+except:
+  paramiko = "import paramiko impossible"
+  pass
 
 import src
+
+
 import src.ElementTree as etree
 
 STYLESHEET_GLOBAL = "jobs_global_report.xsl"
@@ -289,8 +300,7 @@ class Job(object):
                         " job --jobs_config " + 
                         os.path.join(self.machine.sat_path,
                                      self.name_remote_jobs_pyconf) +
-                        " --name " +
-                        self.name)
+                        " --name " + self.name)
         if prefix:
             self.command = prefix + ' "' + self.command +'"'
     
@@ -315,13 +325,16 @@ class Job(object):
         :return: (the output of the kill, the error of the kill)
         :rtype: (str, str)
         '''
-        
-        pids = self.get_pids()
+        try:
+            pids = self.get_pids()
+        except:
+            return ("Unable to get the pid of the command.", "")
+            
         cmd_kill = " ; ".join([("kill -2 " + pid) for pid in pids])
         (_, out_kill, err_kill) = self.machine.exec_command(cmd_kill, 
                                                             self.logger)
         time.sleep(wait)
-        return (out_kill, err_kill)
+        return (out_kill.read().decode(), err_kill.read().decode())
             
     def has_begun(self):
         '''Returns True if the job has already begun
@@ -356,7 +369,10 @@ class Job(object):
             # Put end time
             self._Tf = time.time()
             # And get the remote command status and log files
-            self.get_log_files()
+            try:
+                self.get_log_files()
+            except Exception as e:
+                self.err += _("Unable to get remote log files: %s" % e)
         
         return self._has_finished
           
@@ -503,14 +519,13 @@ class Job(object):
             self._has_finished = True
             self._has_timouted = True
             self._Tf = time.time()
-            self.get_pids()
-            (out_kill, _) = self.kill_remote_process()
-            self.out += "TIMEOUT \n" + out_kill.read().decode()
+            (out_kill, __) = self.kill_remote_process()
+            self.out += "TIMEOUT \n" + out_kill
             self.err += "TIMEOUT : %s seconds elapsed\n" % str(self.timeout)
             try:
                 self.get_log_files()
             except Exception as e:
-                self.err += _("Unable to get remote log files: %s" % e)
+                self.err += _("Unable to get remote log files!\n%s\n" % str(e))
             
     def total_duration(self):
         """Give the total duration of the job
@@ -758,8 +773,11 @@ class Jobs(object):
                     msg = _("WARNING: The job \"%(job_name)s\" requires the "
                             "machine \"%(machine_name)s\" but this machine "
                             "is not defined in the configuration file.\n"
-                            "The job will not be launched")
-                    self.logger.write(src.printcolors.printcWarning(msg))
+                            "The job will not be launched\n")
+                    self.logger.write(src.printcolors.printcWarning(
+                                        msg % {"job_name" : job_def.name,
+                                               "machine_name" : name_machine}))
+                    continue
                                   
             a_job = self.define_job(job_def, a_machine)
                 
@@ -810,6 +828,16 @@ class Jobs(object):
                 self.logger.flush()
                 res_copy = machine.copy_sat(self.runner.cfg.VARS.salometoolsway,
                                             self.job_file_path)
+
+                # set the local settings of sat on the remote machine using
+                # the init command
+                (__, out_dist, __) = machine.exec_command(
+                                os.path.join(machine.sat_path,
+                                    "sat init --base default --workdir"
+                                    " default --log_dir default"),
+                                self.logger)
+                out_dist.read()    
+                
                 # get the remote machine distribution using a sat command
                 (__, out_dist, __) = machine.exec_command(
                                 os.path.join(machine.sat_path,
@@ -817,6 +845,7 @@ class Jobs(object):
                                 self.logger)
                 machine.distribution = out_dist.read().decode().replace("\n",
                                                                         "")
+                
                 # Print the status of the copy
                 if res_copy == 0:
                     self.logger.write('\r%s' % 
@@ -1168,7 +1197,8 @@ class Gui(object):
                 continue
             for board in self.d_xml_board_files:
                 if board_job == board:
-                    if distrib is not None and distrib not in d_dist[board]:
+                    if (distrib not in [None, ''] and 
+                                            distrib not in d_dist[board]):
                         d_dist[board].append(distrib)
                         src.xmlManager.add_simple_node(
                             self.d_xml_board_files[board].xmlroot.find(
@@ -1177,7 +1207,7 @@ class Gui(object):
                                                    attrib={"name" : distrib})
                     
                 if board_job == board:
-                    if (application is not None and 
+                    if (application not in [None, ''] and 
                                     application not in d_application[board]):
                         d_application[board].append(application)
                         src.xmlManager.add_simple_node(
@@ -1309,7 +1339,6 @@ class Gui(object):
                     self.logger.write("%s\n" % src.printcolors.printcWarning(
                                                                         msg), 5)
                     
-
         # Construct the dictionnary self.history 
         for job in l_jobs + l_jobs_not_today:
             l_links = []
@@ -1615,7 +1644,7 @@ class Gui(object):
 def get_config_file_path(job_config_name, l_cfg_dir):
     found = False
     file_jobs_cfg = None
-    if os.path.exists(job_config_name):
+    if os.path.exists(job_config_name) and job_config_name.endswith(".pyconf"):
         found = True
         file_jobs_cfg = job_config_name
     else:
@@ -1631,6 +1660,38 @@ def get_config_file_path(job_config_name, l_cfg_dir):
                 break
     return found, file_jobs_cfg
 
+def develop_factorized_jobs(config_jobs):
+    '''update information about the jobs for the file xml_file   
+    
+    :param config_jobs Config: the config corresponding to the jos description
+    '''
+    developed_jobs_list = []
+    for jb in config_jobs.jobs:
+        # case where the jobs are not developed
+        if type(jb.machine) == type(""):
+            developed_jobs_list.append(jb)
+            continue
+        # Case where the jobs must be developed
+        # Example:
+        # machine : ["CO7.2 physique", ["CO6.4 physique", $MONDAY, $TUESDAY ], "FD22"]
+        name_job = jb.name
+        for machine in jb.machine:
+            new_job = src.pyconf.deepCopyMapping(jb)
+            # case where there is a jobs on the machine corresponding to all
+            # days in when variable. 
+            if type(machine) == type(""):
+                new_job.machine = machine
+                new_job.name = name_job + " / " + machine
+            else:
+                # case the days are re defined
+                new_job.machine = machine[0]
+                new_job.name = name_job + " / " + machine[0]
+                new_job.when = machine[1:]
+            developed_jobs_list.append(new_job)
+    
+    config_jobs.jobs = developed_jobs_list
+            
+
 ##
 # Describes the command
 def description():
@@ -1675,10 +1736,10 @@ def run(args, runner, logger):
     for config_file in options.jobs_cfg:
         found, file_jobs_cfg = get_config_file_path(config_file, l_cfg_dir)
         if not found:
-            msg = _("The file configuration %(name_file)s was not found."
+            msg = _("The file configuration %s was not found."
                     "\nUse the --list option to get the "
                     "possible files." % config_file)
-            src.printcolors.printcError(msg)
+            logger.write("%s\n" % src.printcolors.printcError(msg), 1)
             return 1
         l_conf_files_path.append(file_jobs_cfg)
         # Read the config that is in the file
@@ -1699,6 +1760,9 @@ def run(args, runner, logger):
                 "Job that was given in only_jobs option parameters\n")
         config_jobs.jobs = l_jb
     
+    # Parse the config jobs in order to develop all the factorized jobs
+    develop_factorized_jobs(config_jobs)
+    
     # Make a unique file that contain all the jobs in order to use it 
     # on every machine
     name_pyconf = "_".join([os.path.basename(path)[:-len('.pyconf')] 
@@ -1708,11 +1772,19 @@ def run(args, runner, logger):
     f = file( path_pyconf , 'w')
     config_jobs.__save__(f)
     
+    # log the paramiko problems
+    log_dir = src.get_log_path(runner.cfg)
+    paramiko_log_dir_path = os.path.join(log_dir, "JOBS")
+    src.ensure_path_exists(paramiko_log_dir_path)
+    paramiko.util.log_to_file(os.path.join(paramiko_log_dir_path,
+                                           logger.txtFileName))
+    
     # Initialization
     today_jobs = Jobs(runner,
                       logger,
                       path_pyconf,
                       config_jobs)
+    
     # SSH connection to all machines
     today_jobs.ssh_connection_all_machines()
     if options.test_connection:
@@ -1725,18 +1797,21 @@ def run(args, runner, logger):
         logger.flush()
         
         # Copy the stylesheets in the log directory 
-        log_dir = runner.cfg.USER.log_dir
+        log_dir = log_dir
         xsl_dir = os.path.join(runner.cfg.VARS.srcDir, 'xsl')
         files_to_copy = []
         files_to_copy.append(os.path.join(xsl_dir, STYLESHEET_GLOBAL))
         files_to_copy.append(os.path.join(xsl_dir, STYLESHEET_BOARD))
+        files_to_copy.append(os.path.join(xsl_dir, "command.xsl"))
         files_to_copy.append(os.path.join(xsl_dir, "running.gif"))
         for file_path in files_to_copy:
-            shutil.copy2(file_path, log_dir)
+            # OP We use copy instead of copy2 to update the creation date
+            #    So we can clean the LOGS directories easily
+            shutil.copy(file_path, log_dir)
         
         # Instanciate the Gui in order to produce the xml files that contain all
         # the boards
-        gui = Gui(runner.cfg.USER.log_dir,
+        gui = Gui(log_dir,
                   today_jobs.ljobs,
                   today_jobs.ljobs_not_today,
                   runner.cfg.VARS.datehour,
@@ -1769,6 +1844,18 @@ def run(args, runner, logger):
         interruped = True
         logger.write("\n\n%s\n\n" % 
                 (src.printcolors.printcWarning(_("Forced interruption"))), 1)
+    except Exception as e:
+        msg = _("CRITICAL ERROR: The jobs loop has been interrupted\n")
+        logger.write("\n\n%s\n" % src.printcolors.printcError(msg) )
+        logger.write("%s\n" % str(e))
+        # get stack
+        __, __, exc_traceback = sys.exc_info()
+        fp = tempfile.TemporaryFile()
+        traceback.print_tb(exc_traceback, file=fp)
+        fp.seek(0)
+        stack = fp.read()
+        logger.write("\nTRACEBACK: %s\n" % stack.replace('"',"'"), 1)
+        
     finally:
         res = 0
         if interruped: