Skip to content

Commit 60241a4

Browse files
author
Francisco Redondo Marchena
committed
Add stderr logs organiser
This script classifies the messages in the stderr files by type of error/warning and write them in files or excel sheets.
1 parent 34e39f9 commit 60241a4

File tree

2 files changed

+233
-0
lines changed

2 files changed

+233
-0
lines changed

tools/README

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Tools description
2+
=================
3+
4+
## organise-ofc-stderr-output.py
5+
6+
- Description:
7+
usage: organise-ofc-stderr-output.py [-h] [--excel] -f FILE [-o DIR]
8+
9+
Classify and count warnings and errors in OFC stderr files
10+
11+
optional arguments:
12+
-h, --help show this help message and exit
13+
--excel The OFC stderr analysis output will be an excel file,
14+
if not it will be multiple files in plain text
15+
-f FILE, --filename FILE
16+
OFC stderr log file, produced when running OFC on a
17+
file or set of them
18+
-o DIR, --output_dir DIR
19+
Output directory where to find result files
20+
21+
- Dependencies:
22+
- xlsxwriter - http://xlsxwriter.readthedocs.io/

tools/organise-ofc-stderr-output.py

+211
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright 2018 Codethink Ltd.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
19+
import argparse
20+
import os
21+
import re
22+
import string
23+
import ntpath
24+
25+
rep_page_index = 0
26+
27+
def is_valid_file(file_path):
28+
"""
29+
'Type' for argparse - checks that file exists but does not open.
30+
"""
31+
if not os.path.exists(file_path):
32+
raise argparse.ArgumentError(
33+
"{0} does not exist".format(file_path))
34+
return file_path
35+
36+
def is_valid_directory(dir_path):
37+
if not os.path.isdir(dir_path):
38+
try:
39+
os.makedirs(dir_path)
40+
except OSError as exception:
41+
if exception.errno != errno.EEXIST:
42+
raise
43+
else:
44+
dir_path = dir_path + datetime.datetime.now().isoformat()
45+
os.makedirs(dir_path)
46+
return dir_path
47+
48+
def add_to_stderr_dict(stderr_dict, text):
49+
identifier = ""
50+
for line in text:
51+
# The lines which end in ":" are paths to the files where the
52+
# error or warning is. The following line to the last line which
53+
# contains ":", is the description of the error/warning and
54+
# it will be used as identifier for this error/warning
55+
# (dictionary key).
56+
if line.endswith(":"):
57+
continue
58+
59+
# OFC output enclose some words in sigle quotes which are variable
60+
# names or types which makes the same error/warning being different,
61+
# removing this words creates the real identifier.
62+
identifier = re.sub('\'(\S+?)\'', "", line.strip())
63+
# TODO: remove ^ from the identifier, if this happens probably the
64+
# text is just the character "^" with some spaces, and probably
65+
# it has happening because there were 2 ^ in consecutive lines?
66+
# Removing this identifier will avoid to create an entrance in
67+
# the dictionary for this and therefore stops creating a file
68+
# with no information.
69+
identifier = re.sub('\^', "", identifier)
70+
identifier = re.sub('[^0-9a-zA-Z]+', "_", identifier)
71+
# Merge warning/errors which only differs because digits
72+
identifier = re.sub('\d', "", identifier)
73+
break
74+
75+
if identifier.strip() != "":
76+
if identifier not in stderr_dict.keys():
77+
stderr_dict[str(identifier)] = []
78+
79+
stderr_dict[str(identifier)].append(text)
80+
81+
def find_filename(path):
82+
head, tail = ntpath.split(path)
83+
return tail or ntpath.basename(head)
84+
85+
def write_message_in_separate_worksheet(workbook, message, ocurrences):
86+
bold = workbook.add_format({'bold': True, 'font_name': 'Courier'})
87+
courier_font = workbook.add_format({'font_name': 'Courier'})
88+
stripped_message = message[0:26]
89+
if not stripped_message:
90+
return
91+
try:
92+
# Hack because sheets can not have more than 30 characters
93+
worksheet = workbook.add_worksheet(stripped_message + "...")
94+
except:
95+
# Hack because there are similar messages if we use less than 30 characters.
96+
global rep_page_index
97+
rep_page_index += 1
98+
worksheet = workbook.add_worksheet(stripped_message + "_" + str(rep_page_index) + "...")
99+
100+
row_number = 1
101+
worksheet.write_row('A' + str(row_number), [ message + " ocurrences" ], bold)
102+
103+
for text in ocurrences:
104+
for line in text:
105+
row_number +=1
106+
row = [ str(line) ]
107+
worksheet.write_row('A' + str(row_number), row, courier_font)
108+
109+
def write_to_xlsx(stderr_dict, output_path, input_file_path):
110+
import xlsxwriter
111+
row_number = 1
112+
headings = [
113+
'Error/Warning message',
114+
'Number of Occurrences',
115+
]
116+
117+
# Open workbook and worksheet
118+
output_filename = find_filename(input_file_path) + "_classified.xlsx"
119+
output_workbook = os.path.join(output_dir, output_filename)
120+
workbook = xlsxwriter.Workbook(output_workbook)
121+
bold = workbook.add_format({'bold': True, 'font_name': 'Courier'})
122+
courier_font = workbook.add_format({'font_name': 'Courier'})
123+
124+
# Add summary worksheet
125+
worksheet = workbook.add_worksheet('Summary')
126+
worksheet.write_row('A' + str(row_number), ['OFC stderr Analysis'], bold)
127+
row_number += 2
128+
129+
# Fill up the summary table
130+
worksheet.write_row('A' + str(row_number), headings, bold)
131+
for message, ocurrences in stderr_dict.iteritems():
132+
row_number += 1
133+
row = [ message, len(ocurrences) ]
134+
worksheet.write_row('A' + str(row_number), row, courier_font)
135+
write_message_in_separate_worksheet(workbook, message, ocurrences)
136+
137+
def write_to_plain_text(stderr_dict, output_dir, input_file_path):
138+
output_filename = "SUMMARY_" + find_filename(input_file_path) + ".txt"
139+
output_filepath = os.path.join(output_dir, output_filename)
140+
# Write the summary in cvs ";" separated format
141+
with open(output_filepath, 'w') as f:
142+
heading = "Error/Warning Message;Number of Occurrences\n"
143+
f.write(heading)
144+
f.write("=" * len(heading))
145+
f.write("\n")
146+
for message, ocurrences in stderr_dict.iteritems():
147+
f.write("%s;%s\n" % (message, str(len(ocurrences))))
148+
filename = str(message) + ".txt"
149+
filepath = os.path.join(output_dir, filename.replace(" ", "_"))
150+
with open(filepath, 'w') as sf:
151+
for text in ocurrences:
152+
for line in text:
153+
sf.write(line + "\n")
154+
155+
parser = argparse.ArgumentParser(
156+
description='Classify and count warnings and errors in OFC stderr files')
157+
parser.add_argument('--excel', action='store_true',
158+
help="""The OFC stderr analysis output will be an excel file, if not it
159+
will be multiple files in plain text""")
160+
parser.add_argument('-f', '--filename',
161+
dest="filename", required=True, type=is_valid_file,
162+
help="""OFC stderr log file, produced when running OFC on a file or
163+
set of them""",
164+
metavar="FILE")
165+
parser.add_argument('-o','--output_dir',
166+
dest="output_dir", required=False, type=is_valid_directory,
167+
help="Output directory where to find result files",
168+
metavar="DIR")
169+
args = parser.parse_args()
170+
171+
if not args.output_dir:
172+
output_dir = "output"
173+
is_valid_directory(output_dir)
174+
else:
175+
output_dir = args.output_dir
176+
177+
with open(args.filename, 'r') as f:
178+
content = f.read().splitlines()
179+
180+
set_lines = []
181+
count = 0
182+
stderr_dict = {}
183+
for line in content:
184+
# Filter out non valid lines
185+
if not line:
186+
continue
187+
188+
# Remove any character non printable
189+
line = line.decode("ascii", 'ignore')
190+
line = filter(string.printable.__contains__, line)
191+
192+
# Add lines into a list which can be parsed later
193+
set_lines.append(line)
194+
195+
# OFC stderr files (warnings and errors) use the "^" character to
196+
# point the position in the line where the warning/error happens.
197+
# This character appears only in the latest line of the error before
198+
# a blank line so can be used as token to define the end of the error/warning
199+
# message.
200+
if "^" not in line:
201+
continue
202+
else:
203+
add_to_stderr_dict(stderr_dict, set_lines)
204+
# Prepare for the next text
205+
set_lines = []
206+
207+
# Write the results to files
208+
if args.excel == True:
209+
write_to_xlsx(stderr_dict, output_dir, args.filename)
210+
else:
211+
write_to_plain_text(stderr_dict, output_dir, args.filename)

0 commit comments

Comments
 (0)