我正在一个文件中处理多个FASTA条目。 我有一个python脚本,可以找到所有打开的阅读框架。排除小于30bp的orf,寻找起始/终止密码子bp的位置。 现在我需要转移程序来查找/附加??特定的序列,以便我可以将orf序列传输到一个新的FASTA.file
#FUNCTION START
def orfFINDER(dna,frame):
stop_codons = ['tga', 'tag', 'taa']
start_codon = ['atg']
start_positions = []
stop_positions = []
num_starts=0
num_stops=0
for i in range(frame,len(dna),3):
codon=dna[i:i+3].lower()
if codon in start_codon:
start_positions += str(i+1).splitlines()
if codon in stop_codons:
stop_positions += str(i+1).splitlines()
for line in stop_positions:
num_stops += 1
for line in start_positions:
num_starts += 1
orffound = {}
if num_stops >=1 and num_starts >=1: #first statment: the number of stop codons and start condos are greater than or equal to 1;
orfs = True
stop_before = 0
start_before = 0
if num_starts > num_stops:
num_runs = num_starts
if num_stops > num_starts:
num_runs = num_stops
if num_starts == num_stops:
num_runs = num_starts
position_stop_previous = 0
position_start_previous = 0
counter = 0
for position_stop in stop_positions:
position_stop = int(position_stop.rstrip()) + 2
for position_start in start_positions:
position_start = position_start.rstrip()
if int(position_start) < int(position_stop) and int(position_stop) > int(position_stop_previous) and int(position_start) > int(position_stop_previous):
counter += 1
nameorf = "orf"+str(counter)
position_stop_previous += int(position_stop) - int(position_stop_previous)
position_start_previous += int(position_start) - int(position_start_previous)
sizeorf = int(position_stop) - int(position_start) + 1
orffound[nameorf] = position_start,position_stop,sizeorf,frame
else:
pass
else:
orfs = False
return orffound
#READ FASTA FILE AND SAVE HEADERS AND SEQUENCES IN A DICTIONARY
seqs={}
for line in infile:
line = line.rstrip()
if line[0] == '>':
words=line.split()
name=words[0][1:]
seqs[name]=''
else:
seqs[name] = seqs[name] + line
#DEFINE FRAME TO FIND ORF
#if frame = 0, start from the first position in the sequence
frame=0
#EXECUTE THE ORFFINDER FUNCTION
for i in seqs.items():
header= i[0]
seq = i[1]
orf = orfFINDER(seq,frame)
for i in orf.items():
numorf=i[0]
startorf=orf[numorf][0]
stoporf=orf[numorf][1]
lengthorf=orf[numorf][2]
frameorf=orf[numorf][3]
if int(lengthorf) > 30:
pass
print(header,numorf,"start",startorf,"stop",stoporf,"length",lengthorf,"frame",frameorf)
infile.close()
目前没有回答
相关问题 更多 >
编程相关推荐