Skip to content

Commit 74062e8

Browse files
NirantKAnush008
andauthored
Add attention export functionality to experiments (#134)
* Add attention export functionality * Update experiments/attention_export.py Co-authored-by: Anush <[email protected]> --------- Co-authored-by: Anush <[email protected]>
1 parent 1e298a0 commit 74062e8

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

experiments/attention_export.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import numpy as np
2+
import onnx
3+
import onnxruntime
4+
from optimum.exporters.onnx import main_export
5+
from transformers import AutoTokenizer
6+
7+
model_id = "sentence-transformers/paraphrase-MiniLM-L6-v2"
8+
output_dir = f"models/{model_id.replace('/', '_')}"
9+
model_kwargs = {"output_attentions": True, "return_dict": True}
10+
tokenizer = AutoTokenizer.from_pretrained(model_id)
11+
12+
# export if the output model does not exist
13+
# try:
14+
# sess = onnxruntime.InferenceSession(f"{output_dir}/model.onnx")
15+
# print("Model already exported")
16+
# except FileNotFoundError:
17+
print(f"Exporting model to {output_dir}")
18+
main_export(model_id, output=output_dir, no_post_process=True, model_kwargs=model_kwargs)

experiments/try_attention_export.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
import onnx
3+
import onnxruntime
4+
from optimum.exporters.onnx import main_export
5+
from transformers import AutoTokenizer
6+
7+
model_id = "sentence-transformers/paraphrase-MiniLM-L6-v2"
8+
output_dir = f"models/{model_id.replace('/', '_')}"
9+
model_kwargs = {"output_attentions": True, "return_dict": True}
10+
tokenizer = AutoTokenizer.from_pretrained(model_id)
11+
12+
model_path = f"{output_dir}/model.onnx"
13+
onnx_model = onnx.load(model_path)
14+
ort_session = onnxruntime.InferenceSession(model_path)
15+
text = "This is a test sentence"
16+
tokenizer_output = tokenizer(text, return_tensors="np")
17+
input_ids = tokenizer_output["input_ids"]
18+
attention_mask = tokenizer_output["attention_mask"]
19+
print(attention_mask)
20+
# Prepare the input
21+
input_ids = np.array(input_ids).astype(np.int64) # Replace your_input_ids with actual input data
22+
23+
# Run the ONNX model
24+
outputs = ort_session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})
25+
26+
# Get the attention weights
27+
attentions = outputs[-1]
28+
29+
# Print the attention weights for the first layer and first head
30+
print(attentions[0][0])

0 commit comments

Comments
 (0)