Skip to content

Commit 35918af

Browse files
chg: [RELEASE] Updated CHANGELOG and pyproject.toml
1 parent 0b1f196 commit 35918af

File tree

4 files changed

+34
-5
lines changed

4 files changed

+34
-5
lines changed

CHANGELOG.md

+17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
11
# Changelog
22

3+
## Release 0.3.0 (2025-02-20)
4+
5+
### News
6+
7+
Dataset generation: allow specifying a commit message when uploading to Hugging Face.
8+
9+
Validation: Added a simple validation script for model optimized for text generation. The script is
10+
able to pull a model and send tasks via a Pipeline
11+
12+
### Changes
13+
14+
For the training step: added the choices of model: gpt2, distilgpt2,
15+
meta-llama/Llama-3.3-70B-Instruct, distilbert-base-uncased
16+
17+
Various improvements to the command line parsing.
18+
19+
320
## Release 0.2.0 (2025-02-20)
421

522
### News

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
55

66
[project]
77
name = "VulnTrain"
8-
version = "0.2.0"
8+
version = "0.3.0"
99
description = "Generate datasets amd models based on vulnerabilities descriptions from Vulnerability-Lookup."
1010
authors = [
1111
{name = "Cédric Bonhomme",email = "[email protected]"}

vulntrain/create_dataset.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def __call__(self):
8383
# else:
8484
# vuln_title = ""
8585
for description in vuln["containers"]["cna"].get("descriptions", []):
86-
if description["lang"].lower() in ["en", "en-en", "en-us"]:
86+
if description["lang"].lower() in ["eng", "en", "en-en", "en-us"]:
8787
vuln_description = description["value"]
8888
break
8989
else:
@@ -172,6 +172,13 @@ def main():
172172
help="Number of rows in the dataset.",
173173
default=0,
174174
)
175+
parser.add_argument(
176+
"--commit-message",
177+
dest="commit_message",
178+
type=str,
179+
help="Commit message when publishing.",
180+
default="",
181+
)
175182

176183
args = parser.parse_args()
177184

@@ -191,8 +198,8 @@ def gen():
191198

192199
print(dataset_dict)
193200
if args.upload:
194-
# dataset_dict.push_to_hub("cedricbonhomme/vulnerability-descriptions")
195-
dataset_dict.push_to_hub(args.repo_id)
201+
# dataset_dict.push_to_hub("CIRCL/vulnerability-dataset")
202+
dataset_dict.push_to_hub(args.repo_id, commit_message=args.commit_message)
196203

197204

198205
if __name__ == "__main__":

vulntrain/summarize.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,12 @@ def main():
100100
"--base-model",
101101
dest="base_model",
102102
default="gpt2",
103-
choices=["gpt2", "distilgpt2", "meta-llama/Llama-3.3-70B-Instruct", "distilbert-base-uncased"],
103+
choices=[
104+
"gpt2",
105+
"distilgpt2",
106+
"meta-llama/Llama-3.3-70B-Instruct",
107+
"distilbert-base-uncased",
108+
],
104109
help="Base model to use.",
105110
)
106111
parser.add_argument(

0 commit comments

Comments
 (0)