Skip to content

Commit 6aaa31f

Browse files
new: Added support of configuration file.
1 parent d922d3a commit 6aaa31f

7 files changed

+57
-5
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,6 @@ eproject.cfg
3434
*.log
3535

3636
emissions.csv
37+
vulntrain/conf.py
3738
vulnerability/
3839

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## Release 0.5.0 (2025-02-21)
4+
5+
Added support of configuration file.
6+
7+
38
## Release 0.4.0 (2025-02-21)
49

510
The dataset generation step now uses data from GitHub Advisories,

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
55

66
[project]
77
name = "VulnTrain"
8-
version = "0.4.0"
8+
version = "0.5.0"
99
description = "Generate datasets amd models based on vulnerabilities descriptions from Vulnerability-Lookup."
1010
authors = [
1111
{name = "Cédric Bonhomme",email = "[email protected]"}

vulntrain/conf.py.sample

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
valkey_host = "127.0.0.1"
2+
valkey_port = 10002
3+
4+
hf_token = ""

vulntrain/config.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#! /usr/bin/env python
2+
3+
"""This module is responsible for loading the configuration variables."""
4+
5+
import importlib.util
6+
import os
7+
8+
9+
def load_config(path):
10+
spec = importlib.util.spec_from_file_location("config", path)
11+
if spec:
12+
config = importlib.util.module_from_spec(spec)
13+
if spec.loader:
14+
spec.loader.exec_module(config)
15+
return config
16+
17+
18+
conf = None
19+
try:
20+
conf = load_config(os.environ.get("VulnTrain_CONFIG", "fedivuln/conf_sample.py"))
21+
except Exception as exc:
22+
raise Exception("No configuration file provided.") from exc
23+
finally:
24+
if not conf:
25+
raise Exception("No configuration file provided.")
26+
27+
try:
28+
valkey_host = conf.valkey_host
29+
valkey_port = conf.valkey_port
30+
except AttributeError as e:
31+
raise Exception(f"Missing configuration variable: {e}")
32+
33+
try:
34+
hf_token = conf.hf_token
35+
except Exception:
36+
hf_token = ""

vulntrain/create_dataset.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import valkey
1313
from datasets import Dataset, DatasetDict # type: ignore[import-untyped]
1414

15+
from vulntrain.conf import hf_token, valkey_host, valkey_port
1516
from vulntrain.utils import strip_markdown
1617

1718

@@ -20,8 +21,8 @@ def __init__(self, sources, nb_rows):
2021
self.sources = sources
2122
self.nb_rows = nb_rows
2223
self.valkey_client = valkey.Valkey(
23-
host="127.0.0.1",
24-
port=10002,
24+
host=valkey_host,
25+
port=valkey_port,
2526
decode_responses=True,
2627
)
2728

@@ -237,7 +238,12 @@ def gen():
237238

238239
print(dataset_dict)
239240
if args.upload:
240-
dataset_dict.push_to_hub(args.repo_id, commit_message=args.commit_message)
241+
if hf_token:
242+
dataset_dict.push_to_hub(args.repo_id, commit_message=args.commit_message)
243+
else:
244+
dataset_dict.push_to_hub(
245+
args.repo_id, commit_message=args.commit_message, token=hf_token
246+
)
241247

242248

243249
if __name__ == "__main__":

vulntrain/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from markdown_it import MarkdownIt
2-
from nltk.tokenize import sent_tokenize
2+
from nltk.tokenize import sent_tokenize # type: ignore[import-untyped]
33

44

55
def sentences(text, num_sentences=5):

0 commit comments

Comments
 (0)