1 | ###############################################################################␊ |
2 | # Copyright 2019 WSL Institute for Snow and Avalanche Research SLF-DAVOS #␊ |
3 | ###############################################################################␊ |
4 | # This file is part of INIshell.␊ |
5 | # INIshell is free software: you can redistribute it and/or modify␊ |
6 | # it under the terms of the GNU General Public License as published by␊ |
7 | # the Free Software Foundation, either version 3 of the License, or␊ |
8 | # (at your option) any later version.␊ |
9 | # INIshell is distributed in the hope that it will be useful,␊ |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
12 | # GNU General Public License for more details.␊ |
13 | # You should have received a copy of the GNU General Public License␊ |
14 | # along with INIshell. If not, see <http://www.gnu.org/licenses/>.␊ |
15 | ␊ |
16 | # Quick script to parse SLF's code base for the use of INI file queries.␊ |
17 | # The goal is to get a list of INI keys the software uses, and compare that to␊ |
18 | # the INI keys found in the INIshell XMLs to get hints for missing documentation.␊ |
19 | # Synopsis: python3 iniqueryparser.py␊ |
20 | # Cf. parseSourceFile() to add new syntax patterns.␊ |
21 | # Michael Reisecker, 2019-12␊ |
22 | ␊ |
23 | import os, re␊ |
24 | ␊ |
25 | def getCodeBaseFiles(listing_file):␊ |
26 | ␉"""Read files of code base to parse from a settings file.␊ |
27 | ␊ |
28 | Keyword arguments:␊ |
29 | listing_file -- The settings file with file extensions, and a file list␊ |
30 | """␊ |
31 | ␉extensions = list()␊ |
32 | ␉files = list()␊ |
33 | ␉exclusions = list()␊ |
34 | ␉ignore_keys = list()␊ |
35 | ␉xml_path = None␊ |
36 | ␉base_path = ''␊ |
37 | ␊ |
38 | ␉infile = open(listing_file, 'r')␊ |
39 | ␉file_content = infile.read().splitlines()␊ |
40 | ␊ |
41 | ␉for line in file_content:␊ |
42 | ␉␉if not line or line.startswith('#'): #skip empty lines and comments␊ |
43 | ␉␉␉continue␊ |
44 | ␉␉if line.startswith('xmlpath='):␊ |
45 | ␉␉␉xml_path = line.split("=")[1]␊ |
46 | ␉␉␉continue␊ |
47 | ␉␉if line.startswith('extensions='): #line holds the extensions␊ |
48 | ␉␉␉extensions.extend(line.split('=')[1].split(',')) #format "extensions=ext,ext2,..."␊ |
49 | ␉␉␉continue␊ |
50 | ␉␉if line.startswith('ignore='): #keys to ignore␊ |
51 | ␉␉␉ignore_keys.extend(line.split('=')[1].split(','))␊ |
52 | ␉␉␉continue␊ |
53 | ␉␉if line.startswith('base='): #base source code directory␊ |
54 | ␉␉␉base_path = line.split('=')[1]␊ |
55 | ␉␉␉continue␊ |
56 | ␉␉if (line.startswith('exclude=')):␊ |
57 | ␉␉␉exclusions.extend(line.split('=')[1].split(','))␊ |
58 | ␉␉else:␊ |
59 | ␉␉␉files.append(line)␊ |
60 | ␊ |
61 | ␉infile.close()␊ |
62 | ␉if base_path: #prepend optional base path ("base" surely read by now if available)␊ |
63 | ␉␉for i, path in enumerate(exclusions):␊ |
64 | ␉␉␉exclusions[i] = base_path + path␊ |
65 | ␉return xml_path, base_path, extensions, files, exclusions, ignore_keys␊ |
66 | ␊ |
67 | def walkXmlFiles(path, keys_in_xml, ignore_keys):␊ |
68 | ␉"""Return a list of all XML files in a folder with subdirectories and hand them to the parser.␊ |
69 | ␊ |
70 | ␉Keyword arguments:␊ |
71 | ␉path -- The path in which to recursively look for XML files.␊ |
72 | ␉keys_in_xml -- Output parameter for found XML keys.␊ |
73 | ␉ignore_keys -- List of keys that should not be reportet.␊ |
74 | ␉"""␊ |
75 | ␉if path is None:␊ |
76 | ␉␉print("---------- [E] No path specified")␊ |
77 | ␉␉return␊ |
78 | ␉for base_dir, dirs, file_list in os.walk(path):␊ |
79 | ␉␉for xml_file in file_list:␊ |
80 | ␉␉␉extension = os.path.splitext(xml_file)[1]␊ |
81 | ␉␉␉if (extension.lower() == '.xml'):␊ |
82 | ␉␉␉␉parseXmlFile(os.path.join(base_dir, xml_file), keys_in_xml, ignore_keys)␊ |
83 | ␊ |
84 | def parseXmlFile(file_name, keys_in_xml, ignore_keys):␊ |
85 | ␉"""Parse an XML file for INI keys.␊ |
86 | ␊ |
87 | ␉Keyword arguments:␊ |
88 | ␉file_name -- The XML file to parse.␊ |
89 | ␉keys_in_xml -- Output parameter for found XML keys.␊ |
90 | ␉ignore_keys -- List of keys that should not be reportet.␊ |
91 | ␉"""␊ |
92 | ␉shortcut_keys = list()␊ |
93 | ␉prefix = 'key='␊ |
94 | ␉last_key_without_sub = ''␊ |
95 | ␉valid_lines = [line for line in open(file_name) if prefix in line]␊ |
96 | ␉valid_lines = [re.search('(.*)' + prefix + '"([^"]*).*', line).group(2) for line in valid_lines]␊ |
97 | ␉for line in valid_lines:␊ |
98 | ␉␉key = cleanKey(line)␊ |
99 | ␉␉if '@' in key:␊ |
100 | ␉␉␉key = key.replace('@', last_key_without_sub)␊ |
101 | ␉␉␉#shortcut_keys are partial keys (e. g. key of a Horizontal panel) that aren't a key by themselves:␊ |
102 | ␉␉␉shortcut_keys.append(last_key_without_sub) #remove retrospectively␊ |
103 | ␉␉else:␊ |
104 | ␉␉␉last_key_without_sub = key␊ |
105 | ␉␉if not key in ignore_keys:␊ |
106 | ␉␉␉keys_in_xml.append(key)␊ |
107 | ␉keys_in_xml[:] = list(set(keys_in_xml) - set(shortcut_keys)) #remove partial keys␊ |
108 | ␊ |
109 | def walkProjectFiles(path, extension_list, exclusions, keys_in_source, ignore_keys):␊ |
110 | ␉"""Traverse a path's subdirectories, look for appropriate files, and hand them to the parser.␊ |
111 | ␊ |
112 | ␉Keyword arguments:␊ |
113 | ␉path -- Base path of the code base.␊ |
114 | ␉extension_list -- List of file extensions to consider.␊ |
115 | ␉exclusions -- List of directories to skip.␊ |
116 | ␉keys_in_source -- Output parameter for found keys in the source code.␊ |
117 | ␉ignore_keys -- List of keys that should not be reportet.␊ |
118 | ␉"""␊ |
119 | ␉for base_dir, dirs, file_list in os.walk(path):␊ |
120 | ␉␉for d in dirs[:]: #in place so that everything is handled by os.walk()␊ |
121 | ␉␉␉if os.path.join(base_dir, d) in exclusions: #remove exclusion dirs with all subdirs␊ |
122 | ␉␉␉␉dirs.remove(d)␊ |
123 | ␊ |
124 | ␉␉for file_name in file_list:␊ |
125 | ␉␉␉if os.path.islink(os.path.join(base_dir, file_name)):␊ |
126 | ␉␉␉␉continue #guard against broken symlinks␊ |
127 | ␉␉␉extension = os.path.splitext(file_name)[1]␊ |
128 | ␉␉␉if not extension_list or extension[1:] in extension_list:␊ |
129 | ␉␉␉␉try:␊ |
130 | ␉␉␉␉␉parseSourceFile(os.path.join(base_dir, file_name), keys_in_source, ignore_keys)␊ |
131 | ␉␉␉␉except UnicodeDecodeError: #binary file␊ |
132 | ␉␉␉␉␉pass␊ |
133 | ␊ |
134 | def parseSourceFile(file_name, keys_in_source, ignore_keys):␊ |
135 | ␉"""Parse an SLF software source file for INI keys.␊ |
136 | ␊ |
137 | ␉Keyword arguments:␊ |
138 | ␉file_name -- The source code file to parse.␊ |
139 | ␉keys_in_source -- Output parameter for found keys in the source code.␊ |
140 | ␉ignore_keys -- List of keys that should not be reportet.␊ |
141 | ␉"""␊ |
142 | ␉prefix_list=['cfg.getValue(', 'cfg.get(', 'cfg.keyExists(', 'vecArgs[ii].first==', 'vecArgs[0].first==', \␊ |
143 | ␉ 'outputConfig[', 'inputConfig[', 'advancedConfig[']␊ |
144 | ␊ |
145 | ␉try:␊ |
146 | ␉␉infile = open(file_name, 'r')␊ |
147 | ␉except PermissionError:␊ |
148 | ␉␉print("---------- [E] Can not open file for reading:", file_name)␊ |
149 | ␉␉return␊ |
150 | ␉file_content = infile.read().splitlines()␊ |
151 | ␉for line in file_content:␊ |
152 | ␉␉for prefix in prefix_list:␊ |
153 | ␉␉␉if prefix in line.replace(' ', ''): #allow different whitespace styles␊ |
154 | ␉␉␉␉key_match = re.search('(.*)' + re.escape(prefix) + '[^"]*"([^"]*).*', line.replace(' ', ''))␊ |
155 | ␉␉␉␉if key_match is not None:␊ |
156 | ␉␉␉␉␉key = cleanKey(key_match.group(2))␊ |
157 | ␉␉␉␉␉if not key in ignore_keys:␊ |
158 | ␉␉␉␉␉␉keys_in_source.append(key)␊ |
159 | ␊ |
160 | ␉#Examples of settings query calls:␊ |
161 | ␉# cfg.getValue("ZRXP_STATUS_UNALTERED_NODATA", "Output", qa_unaltered_nodata, IOUtils::nothrow);␊ |
162 | ␉# const double in_TZ = cfg.get("TIME_ZONE", "Input");␊ |
163 | ␉# if (cfg.keyExists("ZRXP_STATUS_NODATA", "Output"))␊ |
164 | ␉# if (vecArgs[ii].first=="SENSITIVITY") {␊ |
165 | ␉# } else if (vecArgs[0].first=="SUPPR") {␊ |
166 | ␉# outputConfig["AGGREGATE_PRO"] = "false";␊ |
167 | ␉# inputConfig["METEOPATH"] = "./input";␊ |
168 | ␉# advancedConfig["WIND_SCALING_FACTOR"] = "1.0";␊ |
169 | ␊ |
170 | def removeDuplicates(listing):␊ |
171 | ␉"""Remove duplicate items of a list.␊ |
172 | ␊ |
173 | ␉Keyword arguments:␊ |
174 | ␉listing -- The list to remove duplicates from.␊ |
175 | ␉"""␊ |
176 | ␉listing = list(dict.fromkeys(listing))␊ |
177 | ␊ |
178 | def cleanKey(key):␊ |
179 | ␉"""Get the clean name of a key that is embedded in section markers etc.␊ |
180 | ␊ |
181 | ␉Keyword arguments:␊ |
182 | ␉key -- The key to clean.␊ |
183 | ␉"""␊ |
184 | ␉idx = key.rfind(':')␊ |
185 | ␉key = key[idx+1:]␊ |
186 | ␉return key.upper()␊ |
187 | ␊ |
188 | def printDifferences(keys_in_xml, keys_in_source):␊ |
189 | ␉"""Print keys found in XML but not in source and vice versa.␊ |
190 | ␊ |
191 | ␉Keyword arguments:␊ |
192 | ␉keys_in_xml -- List of found XML keys.␊ |
193 | ␉keys_in_source -- List of found keys in the source code.␊ |
194 | ␉"""␊ |
195 | ␉xml_minus_source = set(keys_in_xml) - set(keys_in_source)␊ |
196 | ␉print('---------- Keys in XMLs but not in source code (%i): ----------' % len(xml_minus_source))␊ |
197 | ␉print(sorted(xml_minus_source))␊ |
198 | ␉source_minus_xml = set(keys_in_source) - set(keys_in_xml)␊ |
199 | ␉print('---------- Keys in source code but not in XMLs (%i): ----------' % len(source_minus_xml))␊ |
200 | ␉print(sorted(source_minus_xml))␊ |
201 | ␊ |
202 | def real_main():␊ |
203 | ␉"""Called by the entry point and performs all work.␊ |
204 | ␉"""␊ |
205 | ␉xml_path, base_path, extensions, folder_list, exclusions, ignore_keys = getCodeBaseFiles("code_base_files.ini")␊ |
206 | ␉keys_in_xml = list()␊ |
207 | ␉walkXmlFiles(xml_path, keys_in_xml, ignore_keys)␊ |
208 | ␊ |
209 | ␉keys_in_source = list()␊ |
210 | ␉for folder in folder_list:␊ |
211 | ␉␉folder = base_path + folder␊ |
212 | ␉␉if not os.path.isdir(folder):␊ |
213 | ␉␉␉print("---------- [E] Not a directory:", folder)␊ |
214 | ␉␉␉continue␊ |
215 | ␉␉walkProjectFiles(folder, extensions, exclusions, keys_in_source, ignore_keys)␊ |
216 | ␊ |
217 | ␉removeDuplicates(keys_in_xml)␊ |
218 | ␉removeDuplicates(keys_in_source)␊ |
219 | ␊ |
220 | ␉printDifferences(keys_in_xml, keys_in_source)␊ |
221 | ␊ |
222 | if __name__ == '__main__':␊ |
223 | ␉real_main()␊ |