-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdc-tangle.py
210 lines (181 loc) · 7.9 KB
/
pdc-tangle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
from __future__ import absolute_import
import click
import pdc_client
import asciitree
import itertools
import multiprocessing.pool
import six
from six.moves import range
from requests.exceptions import ConnectionError
@click.command()
@click.option('--server', '-s', default='pdc.stg.fedoraproject.org',
help=('Specifies the FQDN of the PDC instance to connect to.'
' This defaults to pdc.stg.fedoraproject.org.'))
@click.option('--dep_type', '-d', multiple=True,
default=['RPMBuildRequires', 'RPMRequires'],
help=('Specifies the dependency types to search for. This can be'
' used more than once. This defaults to RPMBuildRequires'
' and RPMRequires.'))
@click.option('--release', '-r', default='fedora-26',
help=('Specifies the release to search under.'
' This defaults to fedora-26.'))
@click.argument('artifact')
def cli(artifact, release, dep_type, server):
"""
The entry point of the tool
"""
api_url = 'https://{0}/rest_api/v1/'.format(server)
client = pdc_client.PDCClient(api_url, develop=True)
try:
if is_artifact_in_pdc(client, artifact):
deps_dict = {artifact: {}}
collect_dependencies(
client, [artifact], release, dep_type, deps_dict)
if len(deps_dict.get(artifact)) == 0:
click.echo(
'There are no dependencies for "{}"'.format(artifact))
else:
print_deps_dict_as_tree(artifact, deps_dict)
else:
click.echo('"{0}" is not in PDC'.format(artifact))
except ConnectionError:
click.echo(
'The PDC instance "{}" could not be contacted'.format(server))
def collect_dependencies(pdc_obj, artifacts, release, dep_type, deps_dict):
"""
Updates the deps_dict with the artifacts' recursive dependencies
:param pdc_obj: an instantiated and configured PDC object
:param artifacts: a list of artifacts to query for dependencies
:param release: the release to search under (e.g. fedora-26)
:param dep_type: the types of dependencies to search for
(e.g. ['RPMBuildRequires', 'RPMRequires'])
:param deps_dict: the dictionary keeping track of dependencies. This will
be updated with the results.
:return: None
"""
# Filter down the artifact list to those that have empty dicts in deps_dict
# This gives us the artifacts that haven't been processed
artifacts_to_process = [artifact for artifact in artifacts
if not deps_dict.get(artifact)]
# After filtering, check to see if there are any artifacts to process
if artifacts_to_process:
# This function is created to take advantage of mapping
def get_paged_wrapper(chunk):
# Return as a list in order to exhaust the generator
return list(pdc_obj.get_paged(
res=pdc_obj['release-component-relationships/'],
from_component_name=chunk,
from_component_release=release,
type=dep_type,
page_size=100
))
dependencies_results = []
search_in_chunks_of = 100
# If there is more than `search_in_chunks_of` dependencies to process,
# do it in parallel
if len(artifacts_to_process) > search_in_chunks_of:
# Get the artifacts in chunks of `search_in_chunks_of`. This is a
# current workaround due to URI length limitations that
# was inspired by Ralph Bean <[email protected]>.
chunks = chunked_iter(artifacts_to_process, search_in_chunks_of)
# Start a thread pool of 8
pool = multiprocessing.pool.ThreadPool(8)
# Run the the chunked queries in parallel
for result in pool.map(get_paged_wrapper, chunks):
dependencies_results = itertools.chain(
dependencies_results, result)
# Close the thread pool
pool.close()
# Delete the variables that are no longer used to save memory
del pool
del chunks
# If there's less than `search_in_chunks_of` dependencies, avoid the
# overhead and don't setup multi-threading
else:
dependencies_results = get_paged_wrapper(artifacts_to_process)
dependencies_list = []
for dependency in dependencies_results:
artifact_name = dependency['from_component']['name']
dependency_name = dependency['to_component']['name']
dependencies_list.append(dependency_name)
add_dependency(artifact_name, dependency_name, deps_dict)
# Clear the API results out of memory
del dependencies_results
# Recursively collect all the dependencies' dependencies
collect_dependencies(pdc_obj, dependencies_list, release, dep_type,
deps_dict)
def add_dependency(artifact, new_dependency, deps_dict):
"""
Adds a dependency to the deps_dict, both in flat and tree format
:param artifact: the artifact to add the new dependency to
:param new_dependency: the dependency to add
:param deps_dict: the dictionary keeping track of dependencies. This will
be updated with the results.
:return: None
"""
if new_dependency not in deps_dict:
deps_dict.update({new_dependency: {}})
deps_dict.get(artifact).update(
{new_dependency: deps_dict.get(new_dependency)})
# Function written by Ralph Bean <[email protected]>
def chunked_iter(iterable, chunk_size):
"""
Yield successive chunks from an iterable based on the desired chunk size
Written by Ralph Bean <[email protected]>
:param iterable: the iterable that will be chunked out
:param chunk_size: the amount of items per chunk desired from the iterable
:return: a generator object containing the next chunk
"""
for i in range(0, len(iterable), chunk_size):
yield iterable[i: i + chunk_size]
def is_artifact_in_pdc(pdc_obj, artifact):
"""
Checks to see if an artifact exists in PDC
:param pdc_obj: an instantiated and configured PDC object
:param artifact: the name of the artifact to search for
:return: a boolean based on if the artifact is in PDC
"""
results = pdc_obj.get_paged(
pdc_obj['release-components/'],
name=artifact
)
for item in results:
if item.get('name') == artifact:
return True
return False
def strip_circular_deps(deps_dict, deps_list=[]):
"""
Takes the dependency dictionary and replaces any circular dependencies with
<CircularDep on dep_name> so that when printing the dict out,
it isn't stuck in infinite recursion.
:param deps_dict: the dependency dict to replace circular dependencies from
:param deps_list: a static list that is used to determine where in the
nested dict it is
:return: None
"""
for dep_name, dep_value in six.iteritems(deps_dict):
is_circular_dep = dep_name in deps_list
deps_list.append(dep_name)
if is_circular_dep:
deps_dict.pop(dep_name)
deps_dict.update({'<CircularDep on {}>'.format(dep_name): {}})
elif dep_value != {}:
strip_circular_deps(dep_value, deps_list)
deps_list.pop()
def print_deps_dict_as_tree(artifact, deps_dict):
"""
Prints out dependency dictionary in a tree structure
:param artifact: the artifact in deps_dict to print out (i.e. what the
user originally queried for)
:param deps_dict: the dependency dictionary
:return:
"""
# Remove the flat dictionary portion
deps_dict = {artifact: deps_dict.get(artifact)}
# Remove any circular dependencies to avoid infinite recursion
strip_circular_deps(deps_dict)
# Print the dependency dictionary as a tree
tr = asciitree.LeftAligned()
click.echo(tr(deps_dict))
if __name__ == '__main__':
cli()