|
| 1 | +""" |
| 2 | +Module to define methods that fetch data to store in the oss metric |
| 3 | +entity objects. |
| 4 | +""" |
| 5 | +import os |
| 6 | +import json |
| 7 | +from metricsLib.metrics_definitions import SIMPLE_METRICS, ORG_METRICS, ADVANCED_METRICS |
| 8 | +from metricsLib.metrics_definitions import PERIODIC_METRICS, RESOURCE_METRICS |
| 9 | +from metricsLib.oss_metric_entities import GithubOrg, Repository |
| 10 | +from metricsLib.constants import PATH_TO_METADATA |
| 11 | + |
| 12 | +def parse_tracked_repos_file(org=None): |
| 13 | + """ |
| 14 | + Function to parse projects_tracked.json |
| 15 | +
|
| 16 | + Returns: |
| 17 | + Tuple of lists of strings that represent repos and orgs |
| 18 | + """ |
| 19 | + |
| 20 | + # TODO: Create a read repos-to-include.txt |
| 21 | + metadata_path = os.path.join(PATH_TO_METADATA, "projects_tracked.json") |
| 22 | + with open(metadata_path, "r", encoding="utf-8") as file: |
| 23 | + tracking_file = json.load(file) |
| 24 | + |
| 25 | + # Only parse the desired org if an org was passed as an argument |
| 26 | + if org: |
| 27 | + repo_urls = { |
| 28 | + org : tracking_file["Open Source Projects"][org] |
| 29 | + } |
| 30 | + return [org], repo_urls |
| 31 | + |
| 32 | + repo_urls = tracking_file["Open Source Projects"] |
| 33 | + |
| 34 | + # Get two lists of objects that will hold all the new metrics |
| 35 | + return tracking_file["orgs"], repo_urls |
| 36 | + |
| 37 | +def parse_repos_and_orgs_into_objects(org_name_list, repo_name_list): |
| 38 | + """ |
| 39 | + This function parses lists of strings into oss metric entities and |
| 40 | + returns lists of corresponding oss metric entitiy objects. |
| 41 | +
|
| 42 | + Arguments: |
| 43 | + org_name_list: list of logins for github orgs |
| 44 | + repo_name_list: list of urls for git repositories with groups labeled |
| 45 | +
|
| 46 | + Returns: |
| 47 | + Tuple of lists of oss metric entity objects |
| 48 | + """ |
| 49 | + orgs = [GithubOrg(org) for org in org_name_list] |
| 50 | + |
| 51 | + repos = [] # [Repository(repo_url) for repo_url in repo_name_list] |
| 52 | + |
| 53 | + for owner, urls in repo_name_list.items(): |
| 54 | + print(owner) |
| 55 | + # search for matching org |
| 56 | + org_id = next( |
| 57 | + (x.repo_group_id for x in orgs if x.login.lower() == owner.lower()), None) |
| 58 | + |
| 59 | + # print(f"!!{org_id}") |
| 60 | + for repo_url in urls: |
| 61 | + repos.append(Repository(repo_url, org_id)) |
| 62 | + return orgs, repos |
| 63 | + |
| 64 | +def get_all_data(all_orgs, all_repos): |
| 65 | + """ |
| 66 | + Call relevant methods on orgs and repos |
| 67 | +
|
| 68 | + Arguments: |
| 69 | + all_orgs: List of all orgs to gather metrics for |
| 70 | + all_repos: List of all repos to gather metrics for |
| 71 | + """ |
| 72 | + fetch_all_new_metric_data(all_orgs, all_repos) |
| 73 | + read_previous_metric_data(all_repos, all_orgs) |
| 74 | + write_metric_data_json_to_file(all_orgs, all_repos) |
| 75 | + |
| 76 | + |
| 77 | +def add_info_to_org_from_list_of_repos(repo_list, org): |
| 78 | + """ |
| 79 | + This method serves to iterate through previously collected metric |
| 80 | + data that is associated with a repo and derive the cumulative metric data |
| 81 | + for the whole organization instead of the repository. |
| 82 | +
|
| 83 | + This is mainly to avoid using more api calls than we have to. |
| 84 | +
|
| 85 | + Arguments: |
| 86 | + repo_list: List of all repos with metrics |
| 87 | + org: The github org to add metrics to |
| 88 | + """ |
| 89 | + # Define counts to update based on tracked repositories. |
| 90 | + org_counts = {"commits_count": 0, |
| 91 | + "issues_count": 0, |
| 92 | + "open_issues_count": 0, |
| 93 | + "closed_issues_count": 0, |
| 94 | + "pull_requests_count": 0, |
| 95 | + "open_pull_requests_count": 0, |
| 96 | + "merged_pull_requests_count": 0, |
| 97 | + "closed_pull_requests_count": 0, |
| 98 | + "forks_count": 0, |
| 99 | + "stargazers_count": 0, |
| 100 | + "watchers_count": 0 |
| 101 | + } |
| 102 | + |
| 103 | + # Add repo data to org that repo is a part of |
| 104 | + for repo in repo_list: |
| 105 | + # Check for membership |
| 106 | + #print(repo.needed_parameters["repo_group_id"]) |
| 107 | + #print(org.needed_parameters["repo_group_id"]) |
| 108 | + if repo.needed_parameters["repo_group_id"] == org.needed_parameters["repo_group_id"]: |
| 109 | + # Add metric data. |
| 110 | + for key, _ in org_counts.items(): |
| 111 | + raw_count = repo.metric_data.get(key) |
| 112 | + if raw_count: |
| 113 | + org_counts[key] += raw_count |
| 114 | + |
| 115 | + org.store_metrics(org_counts) |
| 116 | + |
| 117 | + |
| 118 | +def fetch_all_new_metric_data(all_orgs, all_repos): |
| 119 | + """ |
| 120 | + This method applies all desired methods to all desired repos |
| 121 | + and orgs. It applies and stores all the metrics |
| 122 | +
|
| 123 | + This is mainly to avoid using more api calls than we have to. |
| 124 | +
|
| 125 | + Arguments: |
| 126 | + all_orgs: List of all orgs to gather metrics for |
| 127 | + all_repos: List of all repos to gather metrics for |
| 128 | + """ |
| 129 | + |
| 130 | + # Capture the metric data from all repos |
| 131 | + # Returns a nested dictionary |
| 132 | + for repo in all_repos: |
| 133 | + print(f"Fetching metrics for repo {repo.name}, id #{repo.repo_id}.") |
| 134 | + # Get info from all metrics for each repo |
| 135 | + for metric in SIMPLE_METRICS: |
| 136 | + repo.apply_metric_and_store_data(metric) |
| 137 | + |
| 138 | + for metric in PERIODIC_METRICS: |
| 139 | + repo.apply_metric_and_store_data(metric) |
| 140 | + |
| 141 | + for metric in RESOURCE_METRICS: |
| 142 | + repo.apply_metric_and_store_data(metric, oss_entity=repo) |
| 143 | + |
| 144 | + for metric in ADVANCED_METRICS: |
| 145 | + repo.apply_metric_and_store_data(metric) |
| 146 | + |
| 147 | + # Capture all metric data for all Github orgs |
| 148 | + for org in all_orgs: |
| 149 | + print(f"Fetching metrics for org {org.name} id #{org.repo_group_id}") |
| 150 | + for metric in ORG_METRICS: |
| 151 | + org.apply_metric_and_store_data(metric) |
| 152 | + print(metric.name) |
| 153 | + add_info_to_org_from_list_of_repos(all_repos, org) |
| 154 | + |
| 155 | +def read_current_metric_data(repos,orgs): |
| 156 | + """ |
| 157 | + Read current metrics and load previous metrics that |
| 158 | + were saved in .old files. |
| 159 | +
|
| 160 | + Arguments: |
| 161 | + orgs: orgs to read data for. |
| 162 | + repos: repos to read data for. |
| 163 | + """ |
| 164 | + |
| 165 | + for org in orgs: |
| 166 | + |
| 167 | + path = org.get_path_to_json_data() |
| 168 | + #generate dict of previous and save it as {path}.old |
| 169 | + #previous_metric_org_json = json.dumps(org.previous_metric_data, indent=4) |
| 170 | + |
| 171 | + with open(f"{path}.old","r",encoding="utf-8") as file: |
| 172 | + previous_metric_org_json = json.load(file) |
| 173 | + |
| 174 | + #generate dict of current metric data. |
| 175 | + org.previous_metric_data.update(previous_metric_org_json) |
| 176 | + |
| 177 | + |
| 178 | + with open(path, "r", encoding="utf-8") as file: |
| 179 | + #file.write(org_metric_data) |
| 180 | + print(path) |
| 181 | + current_metric_org_json = json.load(file) |
| 182 | + |
| 183 | + org.metric_data.update(current_metric_org_json) |
| 184 | + |
| 185 | + for repo in repos: |
| 186 | + #previous_metric_repo_json = json.dumps(repo.previous_metric_data, indent=4) |
| 187 | + path = repo.get_path_to_json_data() |
| 188 | + |
| 189 | + with open(f"{path}.old","r",encoding="utf-8") as file: |
| 190 | + #file.write(previous_metric_repo_json) |
| 191 | + previous_metric_repo_json = json.load(file) |
| 192 | + |
| 193 | + repo.previous_metric_data.update(previous_metric_repo_json) |
| 194 | + |
| 195 | + |
| 196 | + with open(path, "r", encoding="utf-8") as file: |
| 197 | + #file.write(repo_metric_data) |
| 198 | + metric_repo_json = json.load(file) |
| 199 | + |
| 200 | + repo.metric_data.update(metric_repo_json) |
| 201 | + |
| 202 | + |
| 203 | +def read_previous_metric_data(repos, orgs): |
| 204 | + """ |
| 205 | + This method reads the previously gathered metric data and |
| 206 | + stores it in the OSSEntity objects passed in. |
| 207 | +
|
| 208 | + This is for the reports that compare changes since last collection. |
| 209 | +
|
| 210 | + Arguments: |
| 211 | + repos: List of all orgs to read metrics for |
| 212 | + orgs: List of all repos to read metrics for |
| 213 | + """ |
| 214 | + for org in orgs: |
| 215 | + try: |
| 216 | + with open(org.get_path_to_json_data(), "r", encoding="utf-8") as file: |
| 217 | + prev_data = json.load(file) |
| 218 | + org.previous_metric_data.update(prev_data) |
| 219 | + except FileNotFoundError: |
| 220 | + print("Could not find previous data for records for org" + |
| 221 | + f"{org.login}") |
| 222 | + |
| 223 | + |
| 224 | + for repo in repos: |
| 225 | + try: |
| 226 | + with open(repo.get_path_to_json_data(), "r", encoding="utf-8") as file: |
| 227 | + prev_data = json.load(file) |
| 228 | + repo.previous_metric_data.update(prev_data) |
| 229 | + except FileNotFoundError: |
| 230 | + print("Could not find previous data for records for repo" + |
| 231 | + repo.name) |
| 232 | + |
| 233 | + |
| 234 | +def write_metric_data_json_to_file(orgs, repos): |
| 235 | + """ |
| 236 | + Write all metric data to json files. |
| 237 | + |
| 238 | + Keep old metrics as a .old file. |
| 239 | +
|
| 240 | + Arguments: |
| 241 | + orgs: orgs to write to file |
| 242 | + repos: repos to write to file |
| 243 | + """ |
| 244 | + |
| 245 | + for org in orgs: |
| 246 | + |
| 247 | + path = org.get_path_to_json_data() |
| 248 | + #generate dict of previous and save it as {path}.old |
| 249 | + previous_metric_org_json = json.dumps(org.previous_metric_data, indent=4) |
| 250 | + |
| 251 | + with open(f"{path}.old","w+",encoding="utf-8") as file: |
| 252 | + file.write(previous_metric_org_json) |
| 253 | + |
| 254 | + #generate dict of current metric data. |
| 255 | + org_dict = org.previous_metric_data |
| 256 | + org_dict.update(org.metric_data) |
| 257 | + org_metric_data = json.dumps(org_dict, indent=4) |
| 258 | + |
| 259 | + #print(org_metric_data) |
| 260 | + |
| 261 | + with open(path, "w+", encoding="utf-8") as file: |
| 262 | + file.write(org_metric_data) |
| 263 | + |
| 264 | + for repo in repos: |
| 265 | + path = repo.get_path_to_json_data() |
| 266 | + |
| 267 | + previous_metric_repo_json = json.dumps(repo.previous_metric_data, indent=4) |
| 268 | + |
| 269 | + with open(f"{path}.old","w+",encoding="utf-8") as file: |
| 270 | + file.write(previous_metric_repo_json) |
| 271 | + |
| 272 | + repo_dict = repo.previous_metric_data |
| 273 | + repo_dict.update(repo.metric_data) |
| 274 | + repo_metric_data = json.dumps(repo_dict, indent=4) |
| 275 | + |
| 276 | + |
| 277 | + with open(path, "w+", encoding="utf-8") as file: |
| 278 | + file.write(repo_metric_data) |
0 commit comments