@@ -225,7 +225,7 @@ def build_database(
225
225
return database_dir
226
226
227
227
228
- def generate_models (config , project : Project , database_dir : str ) -> None :
228
+ def generate_models (config , args , project : Project , database_dir : str ) -> None :
229
229
"""
230
230
Generate models for a project.
231
231
@@ -243,6 +243,8 @@ def generate_models(config, project: Project, database_dir: str) -> None:
243
243
generator .generateSources = should_generate_sources (project )
244
244
generator .generateSummaries = should_generate_summaries (project )
245
245
generator .setenvironment (database = database_dir , folder = name )
246
+ generator .threads = args .codeql_threads
247
+ generator .ram = args .codeql_ram
246
248
generator .run ()
247
249
248
250
@@ -333,43 +335,44 @@ def pretty_name_from_artifact_name(artifact_name: str) -> str:
333
335
334
336
def download_dca_databases (
335
337
language : str ,
336
- experiment_name : str ,
338
+ experiment_names : list [ str ] ,
337
339
pat : str ,
338
340
projects : List [Project ],
339
341
) -> List [tuple [Project , str | None ]]:
340
342
"""
341
343
Download databases from a DCA experiment.
342
344
Args:
343
- experiment_name : The name of the DCA experiment to download databases from.
345
+ experiment_names : The names of the DCA experiments to download databases from.
344
346
pat: Personal Access Token for GitHub API authentication.
345
347
projects: List of projects to download databases for.
346
348
Returns:
347
349
List of (project_name, database_dir) pairs, where database_dir is None if the download failed.
348
350
"""
349
351
print ("\n === Finding projects ===" )
350
- response = get_json_from_github (
351
- f"https://raw.githubusercontent.com/github/codeql-dca-main/data/{ experiment_name } /reports/downloads.json" ,
352
- pat ,
353
- )
354
- targets = response ["targets" ]
355
352
project_map = {project ["name" ]: project for project in projects }
356
353
analyzed_databases = {}
357
- for data in targets .values ():
358
- downloads = data ["downloads" ]
359
- analyzed_database = downloads ["analyzed_database" ]
360
- artifact_name = analyzed_database ["artifact_name" ]
361
- pretty_name = pretty_name_from_artifact_name (artifact_name )
362
-
363
- if not pretty_name in project_map :
364
- print (f"Skipping { pretty_name } as it is not in the list of projects" )
365
- continue
366
-
367
- if pretty_name in analyzed_databases :
368
- print (
369
- f"Skipping previous database { analyzed_databases [pretty_name ]['artifact_name' ]} for { pretty_name } "
370
- )
354
+ for experiment_name in experiment_names :
355
+ response = get_json_from_github (
356
+ f"https://raw.githubusercontent.com/github/codeql-dca-main/data/{ experiment_name } /reports/downloads.json" ,
357
+ pat ,
358
+ )
359
+ targets = response ["targets" ]
360
+ for data in targets .values ():
361
+ downloads = data ["downloads" ]
362
+ analyzed_database = downloads ["analyzed_database" ]
363
+ artifact_name = analyzed_database ["artifact_name" ]
364
+ pretty_name = pretty_name_from_artifact_name (artifact_name )
365
+
366
+ if not pretty_name in project_map :
367
+ print (f"Skipping { pretty_name } as it is not in the list of projects" )
368
+ continue
369
+
370
+ if pretty_name in analyzed_databases :
371
+ print (
372
+ f"Skipping previous database { analyzed_databases [pretty_name ]['artifact_name' ]} for { pretty_name } "
373
+ )
371
374
372
- analyzed_databases [pretty_name ] = analyzed_database
375
+ analyzed_databases [pretty_name ] = analyzed_database
373
376
374
377
def download_and_decompress (analyzed_database : dict ) -> str :
375
378
artifact_name = analyzed_database ["artifact_name" ]
@@ -450,23 +453,6 @@ def main(config, args) -> None:
450
453
if not os .path .exists (build_dir ):
451
454
os .makedirs (build_dir )
452
455
453
- # Check if any of the MaD directories contain working directory changes in git
454
- for project in projects :
455
- mad_dir = get_mad_destination_for_project (config , project ["name" ])
456
- if os .path .exists (mad_dir ):
457
- git_status_output = subprocess .check_output (
458
- ["git" , "status" , "-s" , mad_dir ], text = True
459
- ).strip ()
460
- if git_status_output :
461
- print (
462
- f"""ERROR: Working directory changes detected in { mad_dir } .
463
-
464
- Before generating new models, the existing models are deleted.
465
-
466
- To avoid loss of data, please commit your changes."""
467
- )
468
- sys .exit (1 )
469
-
470
456
database_results = []
471
457
match get_strategy (config ):
472
458
case "repo" :
@@ -477,8 +463,8 @@ def main(config, args) -> None:
477
463
projects ,
478
464
)
479
465
case "dca" :
480
- experiment_name = args .dca
481
- if experiment_name is None :
466
+ experiment_names = args .dca
467
+ if experiment_names is None :
482
468
print ("ERROR: --dca argument is required for DCA strategy" )
483
469
sys .exit (1 )
484
470
@@ -492,7 +478,7 @@ def main(config, args) -> None:
492
478
pat = f .read ().strip ()
493
479
database_results = download_dca_databases (
494
480
language ,
495
- experiment_name ,
481
+ experiment_names ,
496
482
pat ,
497
483
projects ,
498
484
)
@@ -518,7 +504,7 @@ def main(config, args) -> None:
518
504
519
505
for project , database_dir in database_results :
520
506
if database_dir is not None :
521
- generate_models (config , project , database_dir )
507
+ generate_models (config , args , project , database_dir )
522
508
523
509
524
510
if __name__ == "__main__" :
@@ -529,14 +515,26 @@ def main(config, args) -> None:
529
515
parser .add_argument (
530
516
"--dca" ,
531
517
type = str ,
532
- help = "Name of a DCA run that built all the projects" ,
533
- required = False ,
518
+ help = "Name of a DCA run that built all the projects. Can be repeated, with sources taken from all provided runs, "
519
+ "the last provided ones having priority" ,
520
+ action = "append" ,
534
521
)
535
522
parser .add_argument (
536
523
"--pat" ,
537
524
type = str ,
538
525
help = "Path to a file containing the PAT token required to grab DCA databases (the same as the one you use for DCA)" ,
539
- required = False ,
526
+ )
527
+ parser .add_argument (
528
+ "--codeql-ram" ,
529
+ type = int ,
530
+ help = "What `--ram` value to pass to `codeql` while generating models (by default the flag is not passed)" ,
531
+ default = None ,
532
+ )
533
+ parser .add_argument (
534
+ "--codeql-threads" ,
535
+ type = int ,
536
+ help = "What `--threads` value to pass to `codeql` (default %(default)s)" ,
537
+ default = 0 ,
540
538
)
541
539
args = parser .parse_args ()
542
540
0 commit comments