INIshell-ng

INIshell-ng Git Source Tree

Root/tools/iniqueryparser/iniqueryparser.py

1###############################################################################
2# Copyright 2019 WSL Institute for Snow and Avalanche Research SLF-DAVOS #
3###############################################################################
4# This file is part of INIshell.
5# INIshell is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# INIshell is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13# You should have received a copy of the GNU General Public License
14# along with INIshell. If not, see <http://www.gnu.org/licenses/>.
15
16# Quick script to parse SLF's code base for the use of INI file queries.
17# The goal is to get a list of INI keys the software uses, and compare that to
18# the INI keys found in the INIshell XMLs to get hints for missing documentation.
19# Synopsis: python3 iniqueryparser.py
20# Cf. parseSourceFile() to add new syntax patterns.
21# Michael Reisecker, 2019-12
22
23import os, re
24
25def getCodeBaseFiles(listing_file):
26"""Read files of code base to parse from a settings file.
27
28 Keyword arguments:
29 listing_file -- The settings file with file extensions, and a file list
30 """
31extensions = list()
32files = list()
33exclusions = list()
34ignore_keys = list()
35xml_path = None
36base_path = ''
37
38infile = open(listing_file, 'r')
39file_content = infile.read().splitlines()
40
41for line in file_content:
42if not line or line.startswith('#'): #skip empty lines and comments
43continue
44if line.startswith('xmlpath='):
45xml_path = line.split("=")[1]
46continue
47if line.startswith('extensions='): #line holds the extensions
48extensions.extend(line.split('=')[1].split(',')) #format "extensions=ext,ext2,..."
49continue
50if line.startswith('ignore='): #keys to ignore
51ignore_keys.extend(line.split('=')[1].split(','))
52continue
53if line.startswith('base='): #base source code directory
54base_path = line.split('=')[1]
55continue
56if (line.startswith('exclude=')):
57exclusions.extend(line.split('=')[1].split(','))
58else:
59files.append(line)
60
61infile.close()
62if base_path: #prepend optional base path ("base" surely read by now if available)
63for i, path in enumerate(exclusions):
64exclusions[i] = base_path + path
65return xml_path, base_path, extensions, files, exclusions, ignore_keys
66
67def walkXmlFiles(path, keys_in_xml, ignore_keys):
68"""Return a list of all XML files in a folder with subdirectories and hand them to the parser.
69
70Keyword arguments:
71path -- The path in which to recursively look for XML files.
72keys_in_xml -- Output parameter for found XML keys.
73ignore_keys -- List of keys that should not be reportet.
74"""
75if path is None:
76print("---------- [E] No path specified")
77return
78for base_dir, dirs, file_list in os.walk(path):
79for xml_file in file_list:
80extension = os.path.splitext(xml_file)[1]
81if (extension.lower() == '.xml'):
82parseXmlFile(os.path.join(base_dir, xml_file), keys_in_xml, ignore_keys)
83
84def parseXmlFile(file_name, keys_in_xml, ignore_keys):
85"""Parse an XML file for INI keys.
86
87Keyword arguments:
88file_name -- The XML file to parse.
89keys_in_xml -- Output parameter for found XML keys.
90ignore_keys -- List of keys that should not be reportet.
91"""
92shortcut_keys = list()
93prefix = 'key='
94last_key_without_sub = ''
95valid_lines = [line for line in open(file_name) if prefix in line]
96valid_lines = [re.search('(.*)' + prefix + '"([^"]*).*', line).group(2) for line in valid_lines]
97for line in valid_lines:
98key = cleanKey(line)
99if '@' in key:
100key = key.replace('@', last_key_without_sub)
101#shortcut_keys are partial keys (e. g. key of a Horizontal panel) that aren't a key by themselves:
102shortcut_keys.append(last_key_without_sub) #remove retrospectively
103else:
104last_key_without_sub = key
105if not key in ignore_keys:
106keys_in_xml.append(key)
107keys_in_xml[:] = list(set(keys_in_xml) - set(shortcut_keys)) #remove partial keys
108
109def walkProjectFiles(path, extension_list, exclusions, keys_in_source, ignore_keys):
110"""Traverse a path's subdirectories, look for appropriate files, and hand them to the parser.
111
112Keyword arguments:
113path -- Base path of the code base.
114extension_list -- List of file extensions to consider.
115exclusions -- List of directories to skip.
116keys_in_source -- Output parameter for found keys in the source code.
117ignore_keys -- List of keys that should not be reportet.
118"""
119for base_dir, dirs, file_list in os.walk(path):
120for d in dirs[:]: #in place so that everything is handled by os.walk()
121if os.path.join(base_dir, d) in exclusions: #remove exclusion dirs with all subdirs
122dirs.remove(d)
123
124for file_name in file_list:
125if os.path.islink(os.path.join(base_dir, file_name)):
126continue #guard against broken symlinks
127extension = os.path.splitext(file_name)[1]
128if not extension_list or extension[1:] in extension_list:
129try:
130parseSourceFile(os.path.join(base_dir, file_name), keys_in_source, ignore_keys)
131except UnicodeDecodeError: #binary file
132pass
133
134def parseSourceFile(file_name, keys_in_source, ignore_keys):
135"""Parse an SLF software source file for INI keys.
136
137Keyword arguments:
138file_name -- The source code file to parse.
139keys_in_source -- Output parameter for found keys in the source code.
140ignore_keys -- List of keys that should not be reportet.
141"""
142prefix_list=['cfg.getValue(', 'cfg.get(', 'cfg.keyExists(', 'vecArgs[ii].first==', 'vecArgs[0].first==', \
143 'outputConfig[', 'inputConfig[', 'advancedConfig[']
144
145try:
146infile = open(file_name, 'r')
147except PermissionError:
148print("---------- [E] Can not open file for reading:", file_name)
149return
150file_content = infile.read().splitlines()
151for line in file_content:
152for prefix in prefix_list:
153if prefix in line.replace(' ', ''): #allow different whitespace styles
154key_match = re.search('(.*)' + re.escape(prefix) + '[^"]*"([^"]*).*', line.replace(' ', ''))
155if key_match is not None:
156key = cleanKey(key_match.group(2))
157if not key in ignore_keys:
158keys_in_source.append(key)
159
160#Examples of settings query calls:
161# cfg.getValue("ZRXP_STATUS_UNALTERED_NODATA", "Output", qa_unaltered_nodata, IOUtils::nothrow);
162# const double in_TZ = cfg.get("TIME_ZONE", "Input");
163# if (cfg.keyExists("ZRXP_STATUS_NODATA", "Output"))
164# if (vecArgs[ii].first=="SENSITIVITY") {
165# } else if (vecArgs[0].first=="SUPPR") {
166# outputConfig["AGGREGATE_PRO"] = "false";
167# inputConfig["METEOPATH"] = "./input";
168# advancedConfig["WIND_SCALING_FACTOR"] = "1.0";
169
170def removeDuplicates(listing):
171"""Remove duplicate items of a list.
172
173Keyword arguments:
174listing -- The list to remove duplicates from.
175"""
176listing = list(dict.fromkeys(listing))
177
178def cleanKey(key):
179"""Get the clean name of a key that is embedded in section markers etc.
180
181Keyword arguments:
182key -- The key to clean.
183"""
184idx = key.rfind(':')
185key = key[idx+1:]
186return key.upper()
187
188def printDifferences(keys_in_xml, keys_in_source):
189"""Print keys found in XML but not in source and vice versa.
190
191Keyword arguments:
192keys_in_xml -- List of found XML keys.
193keys_in_source -- List of found keys in the source code.
194"""
195xml_minus_source = set(keys_in_xml) - set(keys_in_source)
196print('---------- Keys in XMLs but not in source code (%i): ----------' % len(xml_minus_source))
197print(sorted(xml_minus_source))
198source_minus_xml = set(keys_in_source) - set(keys_in_xml)
199print('---------- Keys in source code but not in XMLs (%i): ----------' % len(source_minus_xml))
200print(sorted(source_minus_xml))
201
202def real_main():
203"""Called by the entry point and performs all work.
204"""
205xml_path, base_path, extensions, folder_list, exclusions, ignore_keys = getCodeBaseFiles("code_base_files.ini")
206keys_in_xml = list()
207walkXmlFiles(xml_path, keys_in_xml, ignore_keys)
208
209keys_in_source = list()
210for folder in folder_list:
211folder = base_path + folder
212if not os.path.isdir(folder):
213print("---------- [E] Not a directory:", folder)
214continue
215walkProjectFiles(folder, extensions, exclusions, keys_in_source, ignore_keys)
216
217removeDuplicates(keys_in_xml)
218removeDuplicates(keys_in_source)
219
220printDifferences(keys_in_xml, keys_in_source)
221
222if __name__ == '__main__':
223real_main()

Archive Download this file