-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgen_details.py
executable file
·1281 lines (1017 loc) · 47.2 KB
/
gen_details.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
#===============================================================================
#
# gen_details.py - Generate billing details for month/year.
#
# ARGS:
# 1st: the BillingConfig spreadsheet.
#
# SWITCHES:
# --accounting_file: Location of accounting file (overrides BillingConfig.xlsx)
# --billing_root: Location of BillingRoot directory (overrides BillingConfig.xlsx)
# [default if no BillingRoot in BillingConfig.xlsx or switch given: CWD]
# --year: Year of snapshot requested. [Default is this year]
# --month: Month of snapshot requested. [Default is last month]
# --no_storage: Don't run the storage calculations.
# --no_usage: Don't run the storage usage calculations (only the quotas).
# --no_computing: Don't run the computing calculations.
# --no_consulting: Don't run the consulting calculations.
# --all_jobs_billable: Consider all jobs to be billable. [default=False]
# --ignore_job_timestamps: Ignore timestamps in job and allow jobs not in month selected [default=False]
#
# INPUT:
# BillingConfig spreadsheet.
# SGE Accounting snapshot file (from snapshot_accounting.py).
# - Expected in BillingRoot/<year>/<month>/SGEAccounting.<year>-<month>.xlsx
#
# OUTPUT:
# BillingDetails spreadsheet in BillingRoot/<year>/<month>/BillingDetails.<year>-<month>.xlsx
# Various messages about current processing status to STDOUT.
#
# ASSUMPTIONS:
# Depends on xlrd and xlsxwriter modules.
# The input spreadsheet has been certified by check_config.py.
#
# AUTHOR:
# Keith Bettinger
#
#==============================================================================
#=====
#
# IMPORTS
#
#=====
import argparse
import codecs
import collections
import csv
import datetime
import locale
import os
import os.path
import re
import sys
import openpyxl
import openpyxl.styles
from job_accounting_file import JobAccountingFile
# Simulate an "include billing_common.py".
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
exec(compile(open(os.path.join(SCRIPT_DIR, "billing_common.py"), "rb").read(), os.path.join(SCRIPT_DIR, "billing_common.py"), 'exec'))
#=====
#
# CONSTANTS
#
#=====
#=====
#
# GLOBALS
#
#=====
# In billing_common.py
global SLURMACCOUNTING_PREFIX
global GOOGLE_INVOICE_PREFIX
global BILLING_DETAILS_PREFIX
global CONSULTING_PREFIX
global STORAGE_PREFIX
global ACCOUNT_PREFIXES
global EXCEL_MAX_ROWS
global SUBDIR_RAWDATA
#
# Formats for the output workbook, to be initialized along with the workbook.
#
BOLD_FORMAT = None
DATE_FORMAT = None
INT_FORMAT = None
FLOAT_FORMAT = None
MONEY_FORMAT = None
PERCENT_FORMAT = None
# Set locale to be US english for converting strings with commas into floats.
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
#=====
#
# FUNCTIONS
#
#=====
# In billing_common.py
global read_config_sheet
global sheet_get_named_column
global sheet_name_to_sheet
global from_timestamp_to_excel_date
global from_excel_date_to_timestamp
global from_timestamp_to_date_string
global from_excel_date_to_date_string
global from_ymd_date_to_timestamp
global from_timestamp_to_datetime
global from_datetime_to_timestamp
global from_excel_date_to_datetime
global from_datetime_to_excel_date
global from_ymd_date_to_datetime
global remove_unicode_chars
global filter_by_dates
global argparse_get_parent_parser
global argparse_get_year_month
global argparse_get_billingroot_billingconfig
global get_subdirectory
# Initialize the output BillingDetails workbook, given as argument.
# It creates all the formats used within the workbook, and saves them
# as the global variables listed at the top of the method.
# It also creates all the sheets within the workbook, and the column
# headers within those sheets. The method returns a dict of mappings
# from sheet_name to the workbook's Sheet object for that name.
def init_billing_details_wkbk(workbook):
global BOLD_FORMAT
global DATE_FORMAT
global FLOAT_FORMAT
global INT_FORMAT
global MONEY_FORMAT
global PERCENT_FORMAT
global BILLING_DETAILS_SHEET_COLUMNS # In billing_common.py
# Create formats for use within the workbook.
BOLD_FORMAT = openpyxl.styles.NamedStyle(name="bold")
BOLD_FORMAT.font = openpyxl.styles.Font(bold=True)
workbook.add_named_style(BOLD_FORMAT)
DATE_FORMAT = openpyxl.styles.NamedStyle(name="date")
DATE_FORMAT.number_format = 'mm/dd/yy'
workbook.add_named_style(DATE_FORMAT)
INT_FORMAT = openpyxl.styles.NamedStyle(name="int")
INT_FORMAT.number_format = '0'
workbook.add_named_style(INT_FORMAT)
FLOAT_FORMAT = openpyxl.styles.NamedStyle(name="float")
FLOAT_FORMAT.number_format = '0.0'
workbook.add_named_style(FLOAT_FORMAT)
MONEY_FORMAT = openpyxl.styles.NamedStyle(name="money")
MONEY_FORMAT.number_format = '$0.00'
workbook.add_named_style(MONEY_FORMAT)
PERCENT_FORMAT = openpyxl.styles.NamedStyle(name="percent")
PERCENT_FORMAT.number_format = '0%'
workbook.add_named_style(PERCENT_FORMAT)
sheet_name_to_sheet_map = dict()
# Control the size of the Workbook when it opens
view = [openpyxl.workbook.views.BookView(windowWidth=18140, windowHeight=30000)]
workbook.views = view
for sheet_name in BILLING_DETAILS_SHEET_COLUMNS:
sheet = workbook.create_sheet(sheet_name)
for col in range(0, len(BILLING_DETAILS_SHEET_COLUMNS[sheet_name])):
sheet.cell(1, col+1, BILLING_DETAILS_SHEET_COLUMNS[sheet_name][col]).style = BOLD_FORMAT
sheet_name_to_sheet_map[sheet_name] = sheet
return sheet_name_to_sheet_map
# Given a list of tuples of job details, writes the job details to
# the sheet given. Possible sheets for use in this method are
# typically the "Computing", "Nonbillable Jobs", and "Failed Jobs" sheets.
def write_job_details(workbook, sheet, sheet_name, job_details):
# If no job details, write "No Jobs".
if len(job_details) == 0:
sheet.write(1, 0, "No jobs")
print()
return
num_jobs = len(job_details)
job_rows_left = num_jobs
first_job_row = 0
last_job_row = first_job_row + min(job_rows_left, EXCEL_MAX_ROWS-1) # Subtract 1 for header line
sheet_number = 1
print(num_jobs)
while True:
sheet_row = 0
# If we have job details, write them to the sheet, below the headers.
for row in range(first_job_row, last_job_row):
# Bump rows down below header line.
sheet_row += 1
# A little feedback for the people.
if not args.verbose:
if sheet_row % 1000 == 0:
sys.stdout.write('.')
sys.stdout.flush()
# 'Job Date'
col = 0
sheet.cell(sheet_row+1, col+1, from_timestamp_to_excel_date(job_details[row][col])).style = DATE_FORMAT
# 'Job Timestamp'
col += 1
sheet.cell(sheet_row+1, col+1, job_details[row][col]).style = INT_FORMAT
if args.verbose: print(job_details[row][col], end=' ')
# 'Username'
col += 1
sheet.cell(sheet_row+1, col+1, job_details[row][col])
if args.verbose: print(job_details[row][col], end=' ')
# 'Job Name'
col += 1
sheet.cell(sheet_row+1, col+1, str(job_details[row][col]))
if args.verbose: print(job_details[row][col], end=' ')
# 'Account'
col += 1
sheet.cell(sheet_row+1, col+1, job_details[row][col])
if args.verbose: print(job_details[row][col], end=' ')
# 'Node'
col += 1
sheet.cell(sheet_row+1, col+1, job_details[row][col])
if args.verbose: print(job_details[row][col], end=' ')
# 'Slots'
col += 1
sheet.cell(sheet_row+1, col+1, job_details[row][col]).style = INT_FORMAT
if args.verbose: print(job_details[row][col], end=' ')
# 'Wallclock Secs'
col += 1
sheet.cell(sheet_row+1, col+1, job_details[row][col]).style = INT_FORMAT
if args.verbose: print(job_details[row][col], end=' ')
# 'JobID'
col += 1
sheet.cell(sheet_row+1, col+1, job_details[row][col]).style = INT_FORMAT
if args.verbose: print(job_details[row][col], end=' ')
# Extra column if needed: 'Reason' or 'Failed Code'
if col < len(job_details[row])-1:
col += 1
sheet.cell(sheet_row+1, col+1, job_details[row][col])
if args.verbose: print(job_details[row][col], end=' ')
if args.verbose: print()
job_rows_left -= last_job_row - first_job_row
first_job_row = last_job_row
last_job_row = first_job_row + min(job_rows_left, EXCEL_MAX_ROWS-1) # Subtract 1 for header line
if job_rows_left > 0:
# Generate new sheet of the form "<sheet name> <sheet number>" (starting with 2).
sheet_number += 1
sheet = workbook.create_sheet("%s %d" % (sheet_name, sheet_number))
# Create same headers on new numbered sheets as on the original sheet name.
for col in range(0, len(BILLING_DETAILS_SHEET_COLUMNS[sheet_name])):
sheet.cell(1, col+1, BILLING_DETAILS_SHEET_COLUMNS[sheet_name][col]).style = BOLD_FORMAT
sys.stdout.write('|')
sys.stdout.flush()
else:
break # Leave while True loop.
print()
#
# Returns True/False if string is a valid account.
#
def is_valid_account(acct):
global account_list
global pi_tag_list
# Let's go case insensitive.
acct = acct.lower()
# If this is a known account or matches a PI Tag, we are good.
if acct in account_list or acct in pi_tag_list:
return True
# Otherwise, does it match the pattern of <PREFIX>_<PITag>, where PREFIX is in ACCOUNT_PREFIXES?
else:
# Split account into underline-separated words.
ul_words = acct.split('_')
# The prefix is everything but the last word, which is then expected to be a PI Tag.
prefix = '_'.join(ul_words[0:-1])
pi_tag_word = ul_words[-1]
if prefix in ACCOUNT_PREFIXES and pi_tag_word in pi_tag_list:
return True
else:
print(acct, "is not a valid account (%s %s)." % (prefix, pi_tag_word))
return False
# Read the Storage Usage file.
# Returns mapping from folders to [timestamp, total, used]
def read_storage_usage_file(storage_usage_file):
# Mapping from folders to [timestamp, total, used].
folder_size_dict = collections.OrderedDict()
usage_fileobj = open(storage_usage_file)
usage_csvdict = csv.DictReader(usage_fileobj)
for row in usage_csvdict:
folder_size_dict[row['Folder']] = \
(float(row['Timestamp']), float(row['Size']), float(row['Used']),
int(row['Inodes Quota']), int(row['Inodes Used']))
return folder_size_dict
# Write storage usage data into the Storage sheet of the BillingDetails file.
# Takes in mapping from folders to [timestamp, total, used].
def write_storage_usage_data(folder_size_dict, storage_sheet):
# Write space-used mapping into details workbook.
row = 0
for folder in sorted(list(folder_size_dict.keys())):
[timestamp, total, used, inodes_quota, inodes_used] = folder_size_dict[folder]
sheet_row = row + 1
# 'Date Measured'
col = 0
storage_sheet.cell(sheet_row+1, col+1, from_timestamp_to_excel_date(timestamp)).style = DATE_FORMAT
# 'Timestamp'
col += 1
storage_sheet.cell(sheet_row+1, col+1, timestamp)
if args.verbose: print(timestamp, end=' ')
# 'Folder'
col += 1
storage_sheet.cell(sheet_row+1, col+1, folder)
if args.verbose: print(folder, end=' ')
# 'Size'
col += 1
storage_sheet.cell(sheet_row+1, col+1, total).style = FLOAT_FORMAT
if args.verbose: print(total, end=' ')
# 'Used'
col += 1
storage_sheet.cell(sheet_row+1, col+1, used).style = FLOAT_FORMAT
if args.verbose: print(used, end=' ')
# 'Inodes Quota'
col += 1
storage_sheet.cell(sheet_row+1, col+1, inodes_quota).style = INT_FORMAT
if args.verbose: print(inodes_quota, end=' ')
# 'Inodes Used'
col += 1
storage_sheet.cell(sheet_row+1, col+1, inodes_used).style = INT_FORMAT
if args.verbose: print(inodes_used)
# Next row, please.
row += 1
# Generates the job details stored in the "Computing", "Nonbillable Jobs", and "Failed Jobs" sheets.
def compute_computing_charges(config_wkbk, begin_timestamp, end_timestamp, accounting_file,
computing_sheet, nonbillable_job_sheet, failed_job_sheet):
# In billing_common.py
global ACCOUNTING_FIELDS
global ACCOUNTING_FAILED_CODES
global BILLABLE_HOSTNAME_PREFIXES
global NONBILLABLE_HOSTNAME_PREFIXES
global IGNORED_ACCOUNTS
global account_list
global pi_tag_list
print("Computing computing charges...")
# Read in the Usernames from the Users sheet.
users_sheet = config_wkbk['Users']
users_list = sheet_get_named_column(users_sheet, "Username")
# NOTE: This column may have some duplicates in it.
# Need to make a set out of the result.
users_list = set(users_list)
print(" Reading accounting file %s" % (os.path.abspath(accounting_file)))
#
# Open the current accounting file for input.
#
accounting_fp = JobAccountingFile(accounting_file)
#
# Read all the lines of the current accounting file.
# Output to the details spreadsheet those jobs
# which have "end_times" in the given month,
# and "owner"s in the list of users.
#
not_in_users_list = set()
not_in_account_list = collections.defaultdict(set)
both_proj_and_acct_list = collections.defaultdict(set)
failed_job_details = [] # Jobs which failed.
billable_job_details = [] # Jobs that are on hosts we can bill for.
nonbillable_node_job_details = [] # Jobs not on hosts we can bill for.
unknown_node_job_details = [] # Jobs on unknown nodes.
both_billable_and_non_node_job_details = [] # Jobs which have both billable and nonbillable nodes.
unknown_user_job_details = [] # Jobs from users we don't know.
unknown_job_nodes = set() # Set of nodes we don't know.
jobids_with_unknown_billable_nodes = set() # Set of job IDs for jobs which have nodes that can't be identified as billable.
jobids_with_billable_and_non_nodes = set() # Set of job IDs for jobs which have both billable and nonbillable nodes.
for accounting_record in accounting_fp:
# If the job failed, the submission_time is the job date.
# Else, the end_time is the job date.
failed_code = accounting_record.failed_code
job_failed = failed_code in ACCOUNTING_FAILED_CODES
if job_failed:
job_date = accounting_record.submission_time # The only valid date in the record.
else:
job_date = accounting_record.end_time
# Create a list of job details for this job.
job_details = list()
job_details.append(job_date)
job_details.append(job_date) # Two columns used for the date: one date formatted, one timestamp.
job_details.append(accounting_record.owner)
job_details.append(accounting_record.job_name)
#
# Look for accounts in both account and project fields.
# If values occur in both, use the project field and record the discrepancy.
#
job_account = remove_unicode_chars(accounting_record.account)
if job_account == 'sge' or job_account == '': # Edit out the default account 'sge'.
job_account = None
job_project = remove_unicode_chars(accounting_record.project)
if job_project == 'NONE' or job_project == '': # Edit out the placeholder project 'NONE'.
job_project = None
#
# Add account (project/account) info to job_details.
#
# If project is set and not in the ignored account list:
if job_project is not None and job_project not in IGNORED_ACCOUNTS:
# Find out if the project name is a known one.
job_project_is_valid_account = is_valid_account(job_project)
if not job_project_is_valid_account:
# If this project/account is unknown, save details for later output.
not_in_account_list[accounting_record.owner].add(job_project)
else:
job_project_is_valid_account = False
job_project = None # we could be ignoring a given account
# If account is set and not in the ignored account list:
if job_account is not None and job_account not in IGNORED_ACCOUNTS:
# Find out if the account name is a known one.
job_account_is_valid_account = is_valid_account(job_account)
if not job_account_is_valid_account:
# If this account is unknown, save details for later output.
not_in_account_list[accounting_record.owner].add(job_account)
else:
job_account_is_valid_account = False
job_account = None # we could be ignoring a given account
#
# Decide which of project and account will be used for account.
#
# If project is valid, choose project for account.
if job_project_is_valid_account:
# If there's both a project and an account, choose the project and save details for later output.
job_account = job_project
if job_account is not None:
both_proj_and_acct_list[accounting_record.owner].add((job_project,job_account))
# Else if project is present and account is not valid, choose project for account.
# (Non-valid project trumps non-valid account).
elif job_project is not None and not job_account_is_valid_account:
# If there's both a project and an account, choose the project and save details for later output.
job_account = job_project
if job_account is not None:
both_proj_and_acct_list[accounting_record.owner].add((job_project,job_account))
# Else if account is present, choose account for account.
# (either account is valid and the project is non-valid, or there is no project).
elif job_account is not None:
job_account = job_account
# If there's both an account and a project, save the details for later output.
if job_project is not None:
both_proj_and_acct_list[accounting_record.owner].add((job_project,job_account))
# else No project and No account = No account.
else:
job_account = None
# Add the computed account to the job_details, if any.
if job_account is not None:
job_details.append(job_account)
else:
job_details.append('')
# Support for Slurm: 'hostname' is now a comma-separated node list.
node_list = accounting_record.node_list
# Edit hostname to remove trailing ".local".
node_list = node_list.replace(".local","")
job_details.append(node_list)
job_details.append(accounting_record.cpus)
wallclock = accounting_record.wallclock # run time in seconds
job_details.append(wallclock)
job_details.append(accounting_record.job_id)
# If the end date of this job was within the month or we aren't reading job timestamps,
# examine it.
if args.ignore_job_timestamps or begin_timestamp <= job_date < end_timestamp:
job_nodes_are_billable = list()
job_nodes_are_nonbillable = list()
# Is the job's node billable?
if not args.all_jobs_billable:
# Need to convert commas to semicolons in lists marked by [ ]'s
match_bracket_lists = re.findall('(\[.*?\]+)',node_list)
for substr in match_bracket_lists:
new_substr = substr.replace(',', ';')
node_list = node_list.replace(substr,new_substr)
# Now, with the commas only separating the node, we can split the node list by commas.
list_of_nodes = node_list.split(',')
for node_name in list_of_nodes:
# Put the commas back for an individual node.
node_name = node_name.replace(';',',')
# Job is billable if it ran on a host starting with one of the BILLABLE_HOSTNAME_PREFIXES.
billable = any([node_name.startswith(p) for p in BILLABLE_HOSTNAME_PREFIXES])
# Job is not billable if it ran on a host starting with one of the NONBILLABLE_HOSTNAME_PREFIXES.
nonbillable = any([node_name.startswith(p) for p in NONBILLABLE_HOSTNAME_PREFIXES])
# Screen for cases where a node is either billable and nonbillable or neither.
if billable and nonbillable:
print("*** Error: Node %s of Job %s is both billable and non-billable" % (node_name, accounting_record.job_id), file=sys.stderr)
jobids_with_billable_and_non_nodes.add(accounting_record.job_id)
elif not (billable or nonbillable):
print("*** Error: Node %s of Job %s is neither billable nor non-billable" % (node_name, accounting_record.job_id), file=sys.stderr)
jobids_with_unknown_billable_nodes.add(accounting_record.job_id)
job_nodes_are_billable.append(billable)
job_nodes_are_nonbillable.append(nonbillable)
job_is_billable = any(job_nodes_are_billable)
job_is_nonbillable = any(job_nodes_are_nonbillable)
else:
job_is_billable = True
job_is_nonbillable = False
job_is_both_billable_and_non = job_is_billable and job_is_nonbillable
job_is_unknown_billable = not (job_is_billable or job_is_nonbillable)
# Do we know this job's user?
job_user_is_known = accounting_record.owner in users_list
# If not, save the username in an unknown-user list.
if not job_user_is_known:
# Save unknown user and job details in unknown user lists.
not_in_users_list.add(accounting_record.owner)
# If we know the user or the job has a account...
if job_user_is_known or job_account is not None:
# If job failed, save in Failed job list.
if job_failed:
failed_job_details.append(job_details + [failed_code])
else:
# If hostname doesn't have a billable prefix, save in an nonbillable list.
if job_is_both_billable_and_non:
both_billable_and_non_node_job_details.append(job_details + ['Both Billable and Non Nodes'])
elif job_is_unknown_billable:
unknown_node_job_details.append(job_details + ['Unknown Node'])
unknown_job_nodes.add(node_list)
elif job_is_billable:
billable_job_details.append(job_details)
elif job_is_nonbillable:
nonbillable_node_job_details.append(job_details + ['Nonbillable Node'])
else:
print(" *** Pathological state for job %s billingness. *** " % (accounting_record.job_id))
else:
# Save the job details in an unknown-user list.
unknown_user_job_details.append(job_details + ['Unknown User'])
else:
if job_date != 0 and job_date is not None:
dates_tuple = (from_timestamp_to_date_string(job_date),
from_timestamp_to_date_string(begin_timestamp),
from_timestamp_to_date_string(end_timestamp))
print("Job date %s is not between %s and %s" % dates_tuple)
else:
print("Job date is zero/None.")
#
# ERROR FLAGGING:
#
# Print out list of users who had jobs but were not in any lab list.
if len(not_in_users_list) > 0:
print(" *** Job submitters not in users list:", end=' ')
for user in not_in_users_list:
print(user, end=' ')
print()
# Print out list of unknown accounts.
if len(list(not_in_account_list.keys())) > 0:
print(" *** Jobs with unknown accounts:")
for user in sorted(not_in_account_list.keys()):
print(' ', user)
for job_account in sorted(not_in_account_list[user]):
print(' ', job_account)
# Print out list of jobs with both project and account accounts.
if len(list(both_proj_and_acct_list.keys())) > 0:
print(" *** Jobs with both project and account accounts:")
for user in sorted(both_proj_and_acct_list.keys()):
print(' ', user)
for (proj, acct) in both_proj_and_acct_list[user]:
print(' Project:', proj, 'Account:', acct)
# Print out how many jobs were run on unknown nodes.
if len(unknown_job_nodes) > 0:
print(" *** Unknown Nodes with jobs:")
for node in sorted(unknown_job_nodes):
print(' ', node)
# Print out job IDs of jobs which have both billable and nonbillable nodes.
if len(jobids_with_billable_and_non_nodes) > 0:
print(" *** Job IDs with both billable and nonbillable nodes:")
for jobid in jobids_with_billable_and_non_nodes:
print(' ', jobid)
# Print out job IDs of jobs which have nodes which are neither billable nor nonbillable.
if len(jobids_with_unknown_billable_nodes) > 0:
print(" *** Job IDs with nodes which are neither billable nor nonbillable:")
for jobid in jobids_with_unknown_billable_nodes:
print(' ', jobid)
# Output the accounting details to the BillingDetails worksheet.
print(" Outputting accounting details")
# Output jobs to sheet for billable jobs.
if len(billable_job_details) > 0:
print(" Billable Jobs: ", end=' ')
write_job_details(billing_details_wkbk, computing_sheet, "Computing", billable_job_details)
# Output nonbillable jobs to sheet for nonbillable jobs.
if len(nonbillable_node_job_details) > 0:
print(" Nonbillable Jobs: ", end=' ')
all_nonbillable_job_details = \
nonbillable_node_job_details + unknown_user_job_details + unknown_node_job_details + both_billable_and_non_node_job_details
write_job_details(billing_details_wkbk, nonbillable_job_sheet, "Nonbillable Jobs", all_nonbillable_job_details)
# Output jobs to sheet for failed jobs.
if len(failed_job_details) > 0:
print(" Failed Jobs: ", end=' ')
write_job_details(billing_details_wkbk, failed_job_sheet, "Failed Jobs", failed_job_details)
print("Computing charges computed.")
# Generates the "Consulting" sheet.
def compute_consulting_charges(config_wkbk, begin_timestamp, end_timestamp, consulting_timesheet, consulting_sheet):
print("Computing consulting charges...")
begin_datetime = datetime.datetime.utcfromtimestamp(begin_timestamp)
end_datetime = datetime.datetime.utcfromtimestamp(end_timestamp)
###
# Read the config workbook to get a list of active PIs
###
pis_sheet = config_wkbk["PIs"]
pis_list = sheet_get_named_column(pis_sheet, "PI Tag")
dates_added = sheet_get_named_column(pis_sheet, "Date Added")
dates_remvd = sheet_get_named_column(pis_sheet, "Date Removed")
# Note: Previous versions had a bug here, passing begin_timestamp and end_timestamp directly to filter_by_dates()
active_pis_list = filter_by_dates(pis_list, list(zip(dates_added, dates_remvd)), begin_datetime, end_datetime)
###
# Read the Consulting Timesheet.
###
#consulting_workbook = xlrd.open_workbook(consulting_timesheet)
consulting_workbook = openpyxl.load_workbook(consulting_timesheet, data_only=True) # read_only=True)
#hours_sheet = consulting_workbook.sheet_by_name("Hours")
hours_sheet = consulting_workbook["Hours"]
dates = sheet_get_named_column(hours_sheet, "Date")
pi_tags = sheet_get_named_column(hours_sheet, "PI Tag")
hours = sheet_get_named_column(hours_sheet, "Hours")
travel_hours = sheet_get_named_column(hours_sheet, "Travel Hours")
participants = sheet_get_named_column(hours_sheet, "Participants")
clients = sheet_get_named_column(hours_sheet, "Clients")
summaries = sheet_get_named_column(hours_sheet, "Summary")
notes = sheet_get_named_column(hours_sheet, "Notes")
cumul_hours = sheet_get_named_column(hours_sheet, "Cumul Hours")
# Mar 2018: new column denoting that these entries should be ignored
# (this entries are paid for by FTE% and not hourly).
sdrc_members = sheet_get_named_column(hours_sheet, "SDRC ?")
# If there is no "SDRC?" column (backward compatibility),
# just make a list of empty strings to zip with the columns above.
if sdrc_members is None:
sdrc_members = [""] * len(dates)
# Convert empty travel hours to zeros.
travel_hours = [0 if h=='' else h for h in travel_hours]
row = 1
for (date, pi_tag, hours_spent, travel_hrs, participant, client, summary, note, cumul_hours_spent, sdrc_member) in \
zip(dates, pi_tags, hours, travel_hours, participants, clients, summaries, notes, cumul_hours, sdrc_members):
# If date is blank, we are done.
if date == "" or date is None:
break
# Ignore this entry if there is an X in the "SDRC ?" column.
if sdrc_member == "X":
continue
# Confirm date is within this month.
try:
# date_timestamp = from_excel_date_to_timestamp(date)
date_timestamp = from_datetime_to_timestamp(date)
except:
print("Date Error:", date, "Summary:", summary, "Hours:", hours_spent, "Cumul:", cumul_hours_spent, file=sys.stderr)
continue
# If date is before beginning of the month or after the end of the month, skip this entry.
if begin_timestamp > date_timestamp or date_timestamp >= end_timestamp: continue
# Confirm that pi_tag is in the list of active PIs for this month.
if pi_tag not in active_pis_list:
print(" PI %s not in active PI list...skipping" % pi_tag)
# Copy the entry into the output consulting sheet.
col = 0
#consulting_sheet.write(row, col, date, DATE_FORMAT)
consulting_sheet.cell(row+1, col+1, date).style = DATE_FORMAT
col += 1
#consulting_sheet.write(row, col, pi_tag)
consulting_sheet.cell(row+1, col+1, pi_tag)
col += 1
#consulting_sheet.write(row, col, float(hours_spent), FLOAT_FORMAT)
consulting_sheet.cell(row+1, col+1, float(hours_spent)).style = FLOAT_FORMAT
col += 1
#consulting_sheet.write(row, col, float(travel_hrs), FLOAT_FORMAT)
if travel_hrs is not None:
consulting_sheet.cell(row+1, col+1, float(travel_hrs)).style = FLOAT_FORMAT
col += 1
#consulting_sheet.write(row, col, participant)
consulting_sheet.cell(row+1, col+1, participant)
col += 1
#consulting_sheet.write(row, col, client)
consulting_sheet.cell(row+1, col+1, client)
col += 1
#consulting_sheet.write(row, col, summary)
consulting_sheet.cell(row+1, col+1, summary)
col += 1
#consulting_sheet.write(row, col, note)
consulting_sheet.cell(row+1, col+1, note)
col += 1
try:
consulting_sheet.cell(row+1, col+1, float(cumul_hours_spent)).style = FLOAT_FORMAT
except ValueError:
if cumul_hours_spent == '#NAME?':
print("Entry dated {0} for '{1}' for PI {2} has uncalculated cumul_hours".format(from_timestamp_to_date_string(from_datetime_to_timestamp(date)), summary, pi_tag))
sys.exit(-1)
col += 1
row += 1
def write_cloud_details_V1(cloud_sheet, row_dict, output_row):
output_col = 1 # 0
total_amount = 0.0
# Write Google data into Cloud sheet.
# Output 'Platform' field.
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Product'])
output_col += 1
# Output 'Account' Field.
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Order'])
output_col += 1
# Output 'Project' field.
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Source'])
output_col += 1
# Output 'Description' field.
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Description'])
output_col += 1
# Output 'Dates' field.
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Interval'])
output_col += 1
# Parse quantity.
if len(row_dict['Quantity']) > 0:
quantity = locale.atof(row_dict['Quantity'])
else:
quantity = ''
# Output 'Quantity' field.
cloud_sheet.cell(output_row+1, output_col+1, quantity).style = FLOAT_FORMAT
output_col += 1
# Output 'Unit of Measure' field.
cloud_sheet.cell(output_row+1, output_col+1, row_dict['UOM'])
output_col += 1
# Parse charge.
amount = locale.atof(row_dict['Amount'])
# Accumulate total charges.
total_amount += amount
# Output 'Charge' field.
cloud_sheet.cell(output_row+1, output_col+1, amount, MONEY_FORMAT)
output_col += 1
return total_amount
def write_cloud_details_V2(cloud_sheet, row_dict, output_row):
output_col = 0
total_amount = 0.0
# Write Google data into Cloud sheet.
# Output 'Platform' field.
cloud_sheet.cell(output_row+1, output_col+1, "Google Cloud Platform")
output_col += 1
# Output 'Account' field. (subaccount)
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Account ID'])
output_col += 1
# Output 'Project' field. (Project Name + Project ID)
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Source'])
output_col += 1
# Output 'Description' field. (SKU description of the charge)
sku_description = "%s %s" % (row_dict['Product'], row_dict['Resource Type'])
cloud_sheet.cell(output_row+1, output_col+1, sku_description)
output_col += 1
# Output 'Dates' field.
date_range = "%s-%s" % (row_dict['Start Date'], row_dict['End Date'])
cloud_sheet.cell(output_row+1, output_col+1, date_range)
output_col += 1
# Parse quantity.
quantity_str = row_dict['Quantity'].strip()
if len(quantity_str) > 0:
quantity = locale.atof(quantity_str)
else:
quantity = ''
# Output 'Quantity' field.
cloud_sheet.cell(output_row+1, output_col+1, quantity).style = FLOAT_FORMAT
output_col += 1
# Output 'Unit of Measure' field.
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Unit'])
output_col += 1
# Parse charge.
amount = locale.atof(row_dict['Amount'])
# Accumulate total charges.
total_amount += amount
# Output 'Charge' field.
cloud_sheet.cell(output_row+1, output_col+1, amount).style = MONEY_FORMAT
output_col += 1
return total_amount
def write_cloud_details_V3(cloud_sheet, row_dict, output_row):
# If Cost type is not Usage, then ignore line.
if row_dict['Cost type'] != "Usage":
return 0.0
output_col = 0
total_amount = 0.0
# Write Google data into Cloud sheet.
# Output 'Platform' field.
cloud_sheet.cell(output_row+1, output_col+1, "Google Cloud Platform")
output_col += 1
service = row_dict['Service description']
project_id = row_dict['Project ID']
account = row_dict['Billing account ID']
# Output 'Account' field. (subacccount)
cloud_sheet.cell(output_row+1, output_col+1, account)
output_col += 1
# Output 'Project' field. (Project Name + Project ID)
cloud_sheet.cell(output_row+1, output_col+1, project_id)
output_col += 1
# Output 'Description' field. (SKU description of the charge)
sku_description = "%s %s" % (service, row_dict['SKU description'])
cloud_sheet.cell(output_row+1, output_col+1, sku_description)
output_col += 1
# Output 'Dates' field.
date_range = "%s-%s" % (row_dict['Usage start date'], row_dict['Usage end date'])
cloud_sheet.cell(output_row+1, output_col+1, date_range)
output_col += 1
# Parse quantity.
quantity_str = row_dict['Usage amount'].strip()
if len(quantity_str) > 0:
quantity = locale.atof(quantity_str)
else:
quantity = ''
# Output 'Quantity' field.
cloud_sheet.cell(output_row+1, output_col+1, quantity).style = FLOAT_FORMAT
output_col += 1
# Output 'Unit of Measure' field.
cloud_sheet.cell(output_row+1, output_col+1, row_dict['Usage unit'])
output_col += 1
# Parse charge.
amount = locale.atof(row_dict['Cost ($)'])
# Accumulate total charges.
total_amount += amount
# Output 'Charge' field.
cloud_sheet.cell(output_row+1, output_col+1, amount).style = MONEY_FORMAT
output_col += 1
return total_amount