Skip to content

Commit af2e8a2

Browse files
Upload make_cas_database.py
1 parent 5369960 commit af2e8a2

File tree

1 file changed

+63
-0
lines changed

1 file changed

+63
-0
lines changed

make_cas_database.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
with open('SLmic1.0_gene_models.gff') as fp:
2+
data = fp.readlines()
3+
data = [item.replace('\n','') for item in data]
4+
for i in range(len(data)):
5+
data[i] = data[i].split()
6+
cds_dict = {}
7+
for i in range(len(data)):
8+
#print(data[i][0])
9+
cds_dict[f'{data[i][0]}'] = []
10+
# print(cds_dict)
11+
for i in range(len(data)):
12+
if data[i][2] == 'CDS':
13+
cds_dict[f'{data[i][0]}'].append([data[i][-1][3:13],int(data[i][3])-1,int(data[i][4])])# 파일의 1-based에서 0-based로 바뀐 것을 고려(단, 슬라이싱을 고려하여 end좌표는 1을 빼지 않음)
14+
cds_dict = {key:value for key,value in cds_dict.items() if key == 'SLmic1.0_chr1'
15+
or key == 'SLmic1.0_chr2'
16+
or key == 'SLmic1.0_chr3'
17+
or key == 'SLmic1.0_chr4'
18+
or key == 'SLmic1.0_chr5'
19+
or key == 'SLmic1.0_chr6'
20+
or key == 'SLmic1.0_chr7'
21+
or key == 'SLmic1.0_chr8'
22+
or key == 'SLmic1.0_chr9'
23+
or key == 'SLmic1.0_chr10'
24+
or key == 'SLmic1.0_chr11'
25+
or key == 'SLmic1.0_chr12'}
26+
27+
from Bio import SeqIO
28+
# 파일 형식: (sequence), (chr), (location), (+/-), (gene)
29+
cleavage_dict = {}
30+
chromo_list = ['SLmic1.0_chr1','SLmic1.0_chr2','SLmic1.0_chr3','SLmic1.0_chr4','SLmic1.0_chr5','SLmic1.0_chr6','SLmic1.0_chr7','SLmic1.0_chr8','SLmic1.0_chr9','SLmic1.0_chr10','SLmic1.0_chr11','SLmic1.0_chr12']
31+
32+
for i in chromo_list:
33+
cleavage_dict[i] = []
34+
35+
36+
for seq_record in SeqIO.parse("GCA_012431665.1_SLYMIC_genomic.fa", "fasta"):
37+
seq = seq_record.seq
38+
39+
seq=str(seq.upper())
40+
41+
print(seq_record.id)
42+
43+
# cleavage, start_point, sequence, ngg/ccn list
44+
if seq_record.id in chromo_list:
45+
for i in range(21,len(seq)-1):
46+
if seq[i] == 'G' and seq[i+1]=='G':# ngg-3, ngg-20, sequence
47+
cleavage_dict[f'{seq_record.id}'].append([i-4,i-21,seq[i-21:i+2],'+'])
48+
for i in range(len(seq)-24):
49+
if seq[i] == 'C' and seq[i+1]=='C' and (seq_record.id in chromo_list):# ccn+6, ccn, sequence
50+
cleavage_dict[f'{seq_record.id}'].append([i+5,i,seq[i:i+23][::1],'-'])
51+
52+
53+
# 파일 형식: (sequence), (chr), (location), (+/-), (gene)
54+
f = open("first.txt", 'w')
55+
f.write('# (sequence), (chr), (location), (+/-), (gene)\n')
56+
for i in cleavage_dict:
57+
print(i)
58+
for j in cds_dict[i]:
59+
for k in cleavage_dict[i]:
60+
if (j[1] <= k[0]) and (k[0] < j[2]) :
61+
f.write(f'({k[2]}), ({i}), ({k[1]}), ({k[-1]}), ({j[0]})\n')
62+
f.close()
63+

0 commit comments

Comments
 (0)