-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrimmer.py
60 lines (51 loc) · 1.35 KB
/
trimmer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Functions of this script include:
# 1) truncation of sequences in .fastq files for BVDV IRES RNA zones as required for SHAPE-MaP
# 2) merging truncatated files(R1 and R2) into a common file for complete sequence coverage
# input files (EDIT AS NEEDED)
infiles = [\
'Z1plusR1.fastq',\
'Z1plusR2.fastq',\
'Z2plusR1.fastq',\
'Z2plusR2.fastq']
# output filename (EDIT AS NEEDED)
outfile = 'BVDVallplus.fastq'
# hybridization site lengths for respective primers for BVDV IRES RNA zones; these correspond to the length of primers; indices match infiles
PHSLen = [\
20,\
20,\
20,\
20]
# trimming and merging
for i in range (0,6):
output = open(outfile, 'a')
infile = open(infiles[i], 'r')
cntr = 0
seq_trunk = ''
q_trunk = ''
# read input file (.fastq) line by line
for line in infile:
cntr += 1
# first line in .fastq data quartet contains sequence header
if cntr == 1:
seq_header = line
elif cntr == 2:
start_index = 5 + PHSLen[i]
seq_len = len(line)
if start_index < len(line):
seq_trunk = line[start_index:len(line)]
else:
seq_trunk = ''
elif cntr == 3:
continue
elif cntr == 4:
if seq_trunk != '':
q_trunk = line[start_index:len(line)]
output.write(seq_header)
output.write(seq_trunk)
output.write("+\n")
output.write(q_trunk)
seq_trunk = ''
q_trunk = ''
cntr = 0
infile.close()
output.close()