3 # Copyright (C) 2010-2013 CEA/DEN
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License.
10 # This library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with this library; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 # create a parser for command line options
24 parser = src.options.Options()
25 parser.add_option("s",
29 _("Search the duplicate files in the SOURCES directory."))
30 parser.add_option("p",
34 _("Optional: Search the duplicate files in the given "
40 _("Optional: Override the default list of filtered files."))
45 _("Optional: Override the default list of filtered "
51 _("Optional: Override the default list of filtered paths."))
53 default_extension_ignored = ['html', 'png', 'txt', 'js', 'xml', 'cmake', 'gif',
54 'm4', 'in', 'pyo', 'pyc', 'doctree', 'css']
55 default_files_ignored = ['__init__.py', 'Makefile.am', 'VERSION',
57 'README', 'AUTHORS', 'NEWS', 'COPYING', 'ChangeLog']
58 default_directories_ignored = []
60 def list_directory(lpath, extension_ignored, files_ignored, directories_ignored):
61 '''Make the list of all files and paths that are not filtered
63 :param lpath List: The list of path to of the directories where to
65 :param extension_ignored List: The list of extensions to ignore
66 :param files_ignored List: The list of files to ignore
67 :param directories_ignored List: The list of directory paths to ignore
68 :return: files_arb_out is the list of [file, path]
69 and files_out is is the list of files
75 for root, __, files in os.walk(path):
77 extension = fic.split('.')[-1]
78 if (extension not in extension_ignored and
79 fic not in files_ignored):
80 in_ignored_dir = False
81 for rep in directories_ignored:
84 if not in_ignored_dir:
85 files_out.append([fic])
86 files_arb_out.append([fic, root])
87 return files_arb_out, files_out
89 def format_list_of_str(l_str):
90 '''Make a list from a string
92 :param l_str List or Str: The variable to format
93 :return: the formatted variable
96 if not isinstance(l_str, list):
98 return ",".join(l_str)
100 def print_info(logger, info, level=2):
103 :param logger Logger: The logger instance
104 :param info List: the list of tuple to display
105 :param valMax float: the maximum value of the variable
106 :param level int: the verbose level that will be used
108 smax = max(map(lambda l: len(l[0]), info))
110 sp = " " * (smax - len(i[0]))
111 src.printcolors.print_value(logger,
113 format_list_of_str(i[1]),
115 logger.write("\n", level)
118 "Create a progress bar in the terminal"
120 def __init__(self, name, valMin, valMax, logger, length = 50):
121 '''Initialization of the progress bar.
123 :param name str: The name of the progress bar
124 :param valMin float: the minimum value of the variable
125 :param valMax float: the maximum value of the variable
126 :param logger Logger: the logger instance
127 :param length int: the lenght of the progress bar
134 if (self.valMax - self.valMin) <= 0 or length <= 0:
135 out_err = _('ERROR: Wrong init values for the progress bar\n')
136 raise src.SatException(out_err)
138 def display_value_progression(self,val):
139 '''Display the progress bar.
141 :param val float: val must be between valMin and valMax.
143 if val < self.valMin or val > self.valMax:
144 self.logger.write(src.printcolors.printcWarning(_(
145 'WARNING : wrong value for the progress bar.\n')), 3)
147 perc = (float(val-self.valMin) / (self.valMax - self.valMin)) * 100.
148 nb_equals = int(perc * self.length / 100)
149 out = '\r %s : %3d %% [%s%s]' % (self.name, perc, nb_equals*'=',
150 (self.length - nb_equals)*' ' )
151 self.logger.write(out, 3)
155 '''method that is called when salomeTools is called with --help option.
157 :return: The text to display for the find_duplicates command description.
160 return _("The find_duplicates command search recursively for all duplicates"
161 " files in a the INSTALL directory (or the optionally given "
162 "directory) and prints the found files to the terminal.\n\n"
163 "example:\nsat find_duplicates --path /tmp")
165 def run(args, runner, logger):
166 '''method that is called when salomeTools is called with find_duplicates
169 # parse the arguments
170 (options, args) = parser.parse_args(args)
172 # Determine the directory path where to search
173 # for duplicates files regarding the options
175 l_dir_path = options.path
177 src.check_config_has_application(runner.cfg)
179 l_dir_path = [os.path.join(runner.cfg.APPLICATION.workdir,
182 # find all installation paths
183 all_products = runner.cfg.APPLICATION.products.keys()
184 l_product_cfg = src.product.get_products_infos(all_products,
186 l_dir_path = [pi.install_dir for __, pi in l_product_cfg]
188 # Get the files to ignore during the searching
189 files_ignored = default_files_ignored
190 if options.exclude_file:
191 files_ignored = options.exclude_file
193 # Get the extension to ignore during the searching
194 extension_ignored = default_extension_ignored
195 if options.exclude_extension:
196 extension_ignored = options.exclude_extension
198 # Get the directory paths to ignore during the searching
199 directories_ignored = default_directories_ignored
200 if options.exclude_path:
201 directories_ignored = options.exclude_path
203 # Check the directories
204 l_path = src.deepcopy_list(l_dir_path)
206 for dir_path in l_path:
207 if not(os.path.isdir(dir_path)):
208 msg = _("%s does not exists or is not a directory path: "
209 "it will be ignored" % dir_path)
210 logger.write("%s\n" % src.printcolors.printcWarning(msg), 3)
212 l_dir_path.append(dir_path)
215 # Display some information
216 info = [(_("Directories"), "\n".join(l_dir_path)),
217 (_("Ignored files"), files_ignored),
218 (_("Ignored extensions"), extension_ignored),
219 (_("Ignored directories"), directories_ignored)
221 print_info(logger, info)
223 # Get all the files and paths
224 logger.write(_("Store all file paths ... "), 3)
226 dic, fic = list_directory(l_dir_path,
230 logger.write(src.printcolors.printcSuccess('OK\n'), 3)
232 # Eliminate all the singletons
234 range_fic = range(0,len_fic)
236 my_bar = Progress_bar(_('Eliminate the files that are not duplicated'),
242 my_bar.display_value_progression(len_fic - i)
243 if fic.count(fic[i])==1:
247 # Format the resulting variable to get a dictionary
248 logger.write(_("\n\nCompute the dict {files : [list of pathes]} ... "), 3)
251 rg_fic = range(0,len_fic)
254 if fic[i-1] != fic[i]:
259 the_file = fichier[0]
262 if fic_path[0] == the_file:
263 l_path.append(fic_path[1])
264 dic_fic_paths[the_file] = l_path
266 logger.write(src.printcolors.printcSuccess('OK\n'), 3)
268 # End the execution if no duplicates were found
269 if len(dic_fic_paths) == 0:
270 logger.write(_("No duplicate files found.\n"), 3)
273 # Check that there are no singletons in the result (it would be a bug)
274 for elem in dic_fic_paths:
275 if len(dic_fic_paths[elem])<2:
276 logger.write(_("Warning : element %s has not more than"
277 " two paths.\n") % elem, 3)
280 # Display the results
281 logger.write(src.printcolors.printcInfo(_('\nResults:\n\n')), 3)
282 max_file_name_lenght = max(map(lambda l: len(l), dic_fic_paths.keys()))
283 for fich in dic_fic_paths:
284 logger.write(src.printcolors.printcLabel(fich), 1)
285 sp = " " * (max_file_name_lenght - len(fich))
287 for rep in dic_fic_paths[fich]:
290 logger.write("\n", 1)