-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate.py
43 lines (32 loc) · 1.02 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import argparse
from Bio import SeqIO
from random import random, choice
parser = argparse.ArgumentParser()
parser.add_argument("input")
parser.add_argument("--error_rate", type = float, default = 0.18)
args = parser.parse_args()
def otherNt(nucleotide):
other = ["A","C","G","T"]
other.remove(nucleotide)
return choice(other)
def generateNPRead(read):
nts = ["A","C","G","T"]
fuzzy_read = ""
for letter in read:
if random() > args.error_rate:
fuzzy_read += letter
else:
mistake = random()
if mistake > 0.667:
fuzzy_read += otherNt(letter)
elif mistake > 0.333:
# add insert
fuzzy_read += choice(nts)
while random() < 0.333 * args.error_rate:
fuzzy_read += choice(nts)
fuzzy_read += letter
return fuzzy_read
for read in SeqIO.parse(args.input,"fasta"):
#print(read.seq)
print(">"+read.id)
print(generateNPRead(read.seq))