Skip to content

Commit b6279eb

Browse files
authored
Add files via upload
1 parent eb21e0c commit b6279eb

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "7e53efda-8330-4f93-8b0a-a26d25c95ce1",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stderr",
11+
"output_type": "stream",
12+
"text": [
13+
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
14+
]
15+
},
16+
{
17+
"data": {
18+
"application/vnd.jupyter.widget-view+json": {
19+
"model_id": "c18328a4fa614a84b23facc7e233de76",
20+
"version_major": 2,
21+
"version_minor": 0
22+
},
23+
"text/plain": [
24+
"Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
25+
]
26+
},
27+
"metadata": {},
28+
"output_type": "display_data"
29+
},
30+
{
31+
"name": "stdout",
32+
"output_type": "stream",
33+
"text": [
34+
"<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Anneme onu ne kadar sevdiğimi anlatan bir mektup yaz<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Sevgili Anne,\n",
35+
"\n",
36+
"Bugün sana, seni ne kadar çok sevdiğimi ifade etmek istiyorum. Senin için hissettiklerim kelimelerle ifade edilemeyecek kadar derin ve güçlü. Sen benim hayatımın ışığı, en büyük destekçim ve en sevdiğim insansın.\n",
37+
"\n",
38+
"Her gün seninle geçirdiğim her an için minnettarım. Senin sevgin, rehberliğin ve desteğin olmadan hayatım çok farklı olurdu. Bana verdiğin\n"
39+
]
40+
}
41+
],
42+
"source": [
43+
"# pip install transformers==4.41.1\n",
44+
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
45+
"\n",
46+
"# https://huggingface.co/CohereForAI/aya-23-8B/\n",
47+
"model_id = \"/home/lyz/hf-models/aya-23-8B/\" # 本地路径\n",
48+
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
49+
"model = AutoModelForCausalLM.from_pretrained(model_id)\n",
50+
"\n",
51+
"# Format message with the command-r-plus chat template\n",
52+
"messages = [{\"role\": \"user\", \"content\": \"Anneme onu ne kadar sevdiğimi anlatan bir mektup yaz\"}]\n",
53+
"input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors=\"pt\")\n",
54+
"## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Anneme onu ne kadar sevdiğimi anlatan bir mektup yaz<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>\n",
55+
"\n",
56+
"gen_tokens = model.generate(\n",
57+
" input_ids, \n",
58+
" max_new_tokens=100, \n",
59+
" do_sample=True, \n",
60+
" temperature=0.3,\n",
61+
" )\n",
62+
"\n",
63+
"gen_text = tokenizer.decode(gen_tokens[0])\n",
64+
"print(gen_text)"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": null,
70+
"id": "21d274a9-b0da-425c-82bb-bc93c0a8c254",
71+
"metadata": {},
72+
"outputs": [
73+
{
74+
"name": "stderr",
75+
"output_type": "stream",
76+
"text": [
77+
" 0%| | 3/1000 [00:30<2:58:12, 10.72s/it]"
78+
]
79+
}
80+
],
81+
"source": [
82+
"from tqdm import tqdm\n",
83+
"test_lines = open('testA.nl').readlines()\n",
84+
"\n",
85+
"result = []\n",
86+
"for line in tqdm(test_lines):\n",
87+
" # Format message with the command-r-plus chat template\n",
88+
" messages = [{\"role\": \"user\", \"content\": f\"将下面荷兰语翻译为中文:{line}\"}]\n",
89+
" input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors=\"pt\")\n",
90+
" \n",
91+
" gen_tokens = model.generate(\n",
92+
" input_ids, \n",
93+
" max_new_tokens=100, \n",
94+
" do_sample=True, \n",
95+
" temperature=0.3,\n",
96+
" )\n",
97+
" \n",
98+
" gen_text = tokenizer.decode(gen_tokens[0])\n",
99+
" result.append(\n",
100+
" gen_text.split('<|CHATBOT_TOKEN|>')[1].split('<|')[0]\n",
101+
" )\n",
102+
"\n",
103+
" with open('submit.csv', 'a') as up:\n",
104+
" up.write(result[-1].replace('\\n', '') + '\\n')"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": null,
110+
"id": "06b375b3-8a5f-411d-b1e1-049e6350c5c5",
111+
"metadata": {},
112+
"outputs": [],
113+
"source": []
114+
}
115+
],
116+
"metadata": {
117+
"kernelspec": {
118+
"display_name": "py3.11",
119+
"language": "python",
120+
"name": "py3.11"
121+
},
122+
"language_info": {
123+
"codemirror_mode": {
124+
"name": "ipython",
125+
"version": 3
126+
},
127+
"file_extension": ".py",
128+
"mimetype": "text/x-python",
129+
"name": "python",
130+
"nbconvert_exporter": "python",
131+
"pygments_lexer": "ipython3",
132+
"version": "3.11.8"
133+
}
134+
},
135+
"nbformat": 4,
136+
"nbformat_minor": 5
137+
}

0 commit comments

Comments
 (0)