Skip to content

Commit 92c25c4

Browse files
feat(RELEASE-1246): implement script to push files to CGW
This script automates pushing files to the CGW: - Reads metadata and processes files from the content directory. - Checks if a file already exists in the product version based on label, short URL, and download URL, skipping duplicates to avoid failure. - Creates new files and returns their IDs upon success. - Generates a JSON report summarizing the number of files created, skipped, and the metadata used. Signed-off-by: Sean Conroy <[email protected]>
1 parent 0f82be4 commit 92c25c4

5 files changed

+988
-0
lines changed

Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ COPY templates /home/templates
6565
COPY pubtools-pulp-wrapper /home/pubtools-pulp-wrapper
6666
COPY pubtools-marketplacesvm-wrapper /home/pubtools-marketplacesvm-wrapper
6767
COPY developer-portal-wrapper /home/developer-portal-wrapper
68+
COPY publish-to-cgw-wrapper /home/publish-to-cgw-wrapper
6869
COPY sbom /home/sbom
6970

7071
# It is mandatory to set these labels
@@ -83,4 +84,5 @@ ENV PATH="$PATH:/home/utils"
8384
ENV PATH="$PATH:/home/pubtools-pulp-wrapper"
8485
ENV PATH="$PATH:/home/pubtools-marketplacesvm-wrapper"
8586
ENV PATH="$PATH:/home/developer-portal-wrapper"
87+
ENV PATH="$PATH:/home/publish-to-cgw-wrapper"
8688
ENV PATH="$PATH:/home/sbom"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
publish_to_cgw_wrapper.py
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
#!/usr/bin/env python3
2+
"""
3+
This script interacts with the Content Gateway (CGW) API to create and manage content files.
4+
It ensures each file is checked before creation and skips files that already exist.
5+
The script is idempotent,it can be executed multiple times as long as the label,
6+
short URL, and download URL remain unchanged.
7+
8+
### **Functionality:**
9+
1. Reads a JSON metadata file and a directory containing content files.
10+
2. Retrieves the product ID using the provided product name and product code.
11+
3. Retrieves the version ID using the product version name.
12+
4. Generates metadata for each file in the content directory.
13+
5. Checks for existing files and skips them if they match the label, short URL, and download
14+
URL.
15+
6. Creates new files using the metadata.
16+
7. Rolls back created files if an error occurs during execution.
17+
8. Writes the final result, including processed, created, and skipped files, to a JSON file.
18+
9. Outputs the path of the generated result.json file to the an output file.
19+
"""
20+
21+
import os
22+
import argparse
23+
import json
24+
import hashlib
25+
import logging
26+
import requests
27+
from requests.auth import HTTPBasicAuth
28+
29+
# Default values for each component,
30+
# values from data_file takes presedence over these
31+
default_values_per_component = {
32+
"type": "FILE",
33+
"hidden": False,
34+
"invisible": False,
35+
}
36+
37+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
38+
39+
40+
def parse_args():
41+
"""Parse command line arguments."""
42+
parser = argparse.ArgumentParser(
43+
prog="publish_to_cgw_wrapper", description="Publish content to the Content Gateway"
44+
)
45+
parser.add_argument(
46+
"--cgw_host",
47+
required=True,
48+
help="The hostname of the content-gateway to publish the metadata to",
49+
)
50+
parser.add_argument(
51+
"--data_file",
52+
required=True,
53+
help="Path to the JSON file containing merged data",
54+
)
55+
parser.add_argument(
56+
"--content_dir",
57+
required=True,
58+
help="Path to the directory containing content to push",
59+
)
60+
parser.add_argument(
61+
"--output_file",
62+
required=True,
63+
help="Path to the file which write the result.json file path",
64+
)
65+
66+
return parser.parse_args()
67+
68+
69+
def call_cgw_api(host, method, endpoint, auth, data=None):
70+
"""Make an API call to the Content Gateway service."""
71+
try:
72+
response = requests.request(
73+
method=method.upper(),
74+
url=f"{host}{endpoint}",
75+
headers={"Accept": "application/json", "Content-Type": "application/json"},
76+
auth=auth,
77+
json=data,
78+
)
79+
80+
if not response.ok:
81+
error_message = (
82+
response.text.strip() or f"HTTP {response.status_code}:{response.reason}"
83+
)
84+
raise RuntimeError(f"API call failed: {error_message}")
85+
86+
return response
87+
except requests.RequestException as e:
88+
raise RuntimeError(f"API call failed: {e}")
89+
90+
91+
def get_product_id(host, auth, product_name, product_code):
92+
"""Retrieve the product ID by name and product code."""
93+
products = call_cgw_api(host, "GET", "/products", auth)
94+
products = products.json()
95+
for product in products:
96+
if product.get("name") == product_name and product.get("productCode") == product_code:
97+
logging.info(f"Found product: {product_name} with ID {product.get('id')}")
98+
return product.get("id")
99+
raise ValueError(f"Product {product_name} not found with product code {product_code}")
100+
101+
102+
def get_version_id(host, auth, product_id, version_name):
103+
"""Retrieve the version ID for a specific product."""
104+
versions = call_cgw_api(host, "GET", f"/products/{product_id}/versions", auth)
105+
versions = versions.json()
106+
for version in versions:
107+
if version.get("versionName") == version_name:
108+
logging.info(f"Found version: {version_name} with ID {version.get('id')}")
109+
return version.get("id")
110+
raise ValueError(f"Version not found: {version_name}")
111+
112+
113+
def generate_download_url(content_dir, file_name):
114+
"""
115+
Generate a download URL in this format:
116+
/content/origin/files/sha256/{checksum[:2]}{checksum}/{file_name}
117+
"""
118+
prefix = "/content/origin/files/sha256"
119+
sha256_hash = hashlib.sha256()
120+
with open(content_dir + "/" + file_name, "rb") as f:
121+
for byte_block in iter(lambda: f.read(4096), b""):
122+
sha256_hash.update(byte_block)
123+
checksum = sha256_hash.hexdigest()
124+
return f"{prefix}/{checksum[:2]}/{checksum}/{file_name}"
125+
126+
127+
def generate_metadata(
128+
content_dir, components, product_Code, version_id, version_name, mirror_openshift_Push
129+
):
130+
"""
131+
Generate metadata for each file in
132+
content_list that starts with the component name
133+
"""
134+
shortURL_base = "/pub/"
135+
if mirror_openshift_Push:
136+
shortURL_base = "/pub/cgw"
137+
metadata = []
138+
shasum_files_processed = []
139+
logging.info(f"Generating metadata for files in {content_dir}")
140+
for file in os.listdir(content_dir):
141+
matching_component = None
142+
for component in components:
143+
if file.startswith(component["name"]):
144+
matching_component = component.copy()
145+
break
146+
147+
if matching_component:
148+
logging.info(f"Processing file: {file}")
149+
matching_component.update(
150+
{
151+
"productVersionId": version_id,
152+
"downloadURL": generate_download_url(content_dir, file),
153+
"shortURL": f"{shortURL_base}/{product_Code}/{version_name}/{file}",
154+
"label": file,
155+
}
156+
)
157+
del matching_component["name"]
158+
metadata.append(
159+
{"type": "file", **default_values_per_component, **matching_component}
160+
)
161+
else:
162+
if file.startswith("sha256") and file not in shasum_files_processed:
163+
shasum_files_processed.append(file)
164+
logging.info(f"Processing file: {file}")
165+
if file.endswith(".gpg"):
166+
label = "Checksum - GPG"
167+
elif file.endswith(".sig"):
168+
label = "Checksum - Signature"
169+
elif file.endswith(".txt"):
170+
label = "Checksum"
171+
172+
metadata.append(
173+
{
174+
"productVersionId": version_id,
175+
"downloadURL": generate_download_url(content_dir, file),
176+
"shortURL": f"{shortURL_base}/{product_Code}/{version_name}/{file}",
177+
"label": label,
178+
**default_values_per_component,
179+
}
180+
)
181+
else:
182+
# Skip files that do not start with any component name or
183+
# sha256
184+
logging.info(
185+
f"Skipping file: {file} as it does not start with any component name"
186+
)
187+
continue
188+
189+
return metadata
190+
191+
192+
def file_already_exists(existing_files, new_file):
193+
"""Check if a file already exists"""
194+
for file in existing_files:
195+
if all(
196+
file.get(key) == new_file.get(key) for key in ["label", "downloadURL", "shortURL"]
197+
):
198+
return file
199+
return None
200+
201+
202+
def rollback_files(host, auth, product_id, version_id, created_file_ids):
203+
"""Rollback created files by listing and deleting them."""
204+
if created_file_ids:
205+
logging.warning("Rolling back created files due to failure")
206+
207+
for file_id in created_file_ids:
208+
try:
209+
call_cgw_api(
210+
host,
211+
"DELETE",
212+
f"/products/{product_id}/versions/{version_id}/files/{file_id}",
213+
auth,
214+
)
215+
except Exception as e:
216+
raise RuntimeError(f"Failed to rollback file: {e}")
217+
218+
219+
def create_files(host, auth, product_id, version_id, metadata):
220+
"""Create files using the metadata created and rollback on failure."""
221+
created_file_ids = []
222+
skipped_files_ids = []
223+
try:
224+
existing_files = call_cgw_api(
225+
host, "GET", f"/products/{product_id}/versions/{version_id}/files", auth
226+
)
227+
existing_files = existing_files.json()
228+
229+
for file_metadata in metadata:
230+
file_check = file_already_exists(existing_files, file_metadata)
231+
if file_check:
232+
skipped_files_ids.append(file_check.get("id"))
233+
logging.info(
234+
"Skipping creation: File {} already exists with ShortURL {}".format(
235+
file_check["label"], file_check["shortURL"]
236+
)
237+
)
238+
continue
239+
logging.info(
240+
"Creating file: {} with ShortURL {}".format(
241+
file_metadata["label"], file_metadata["shortURL"]
242+
)
243+
)
244+
created_file_id = call_cgw_api(
245+
host,
246+
"POST",
247+
f"/products/{product_id}/versions/{version_id}/files",
248+
auth,
249+
file_metadata,
250+
)
251+
created_file_id = created_file_id.json()
252+
logging.info(f"Succesfully created file with ID: {created_file_id}")
253+
created_file_ids.append(created_file_id)
254+
return created_file_ids, skipped_files_ids
255+
except Exception as e:
256+
rollback_files(host, auth, product_id, version_id, created_file_ids)
257+
raise RuntimeError(f"Failed to create file: {e}")
258+
259+
260+
def main():
261+
try:
262+
args = parse_args()
263+
264+
USERNAME = os.getenv("CGW_USERNAME")
265+
PASSWORD = os.getenv("CGW_PASSWORD")
266+
267+
if not USERNAME or not PASSWORD:
268+
raise ValueError(
269+
"CGW_USERNAME and CGW_PASSWORD environment variables are required"
270+
)
271+
272+
auth = HTTPBasicAuth(USERNAME, PASSWORD)
273+
with open(args.data_file, "r") as file:
274+
data = json.load(file)
275+
276+
productName = data["contentGateway"]["productName"]
277+
productCode = data["contentGateway"]["productCode"]
278+
productVersionName = data["contentGateway"]["productVersionName"]
279+
mirrorOpenshiftPush = data["contentGateway"].get("mirrorOpenshiftPush")
280+
components = data["contentGateway"]["components"]
281+
282+
product_id = get_product_id(args.cgw_host, auth, productName, productCode)
283+
product_version_id = get_version_id(
284+
args.cgw_host, auth, product_id, productVersionName
285+
)
286+
metadata = generate_metadata(
287+
args.content_dir,
288+
components,
289+
productCode,
290+
product_version_id,
291+
productVersionName,
292+
mirrorOpenshiftPush,
293+
)
294+
created, skipped = create_files(
295+
args.cgw_host, auth, product_id, product_version_id, metadata
296+
)
297+
logging.info(f"Created {len(created)} files and skipped {len(skipped)} files")
298+
299+
result_data = {
300+
"no_of_files_processed": len(metadata),
301+
"no_of_files_created": len(created),
302+
"no_of_files_skipped": len(skipped),
303+
"metadata": metadata,
304+
}
305+
result_file = os.path.join(os.path.dirname(args.data_file), "result.json")
306+
with open(result_file, "w") as f:
307+
json.dump(result_data, f)
308+
with open(args.output_file, "w") as f:
309+
f.write(result_file)
310+
311+
except Exception as e:
312+
logging.error(e)
313+
exit(1)
314+
315+
316+
if __name__ == "__main__":
317+
main()

0 commit comments

Comments
 (0)