𧬠1.6 μλ¬Όνμ μμ΄μ ν΄λμ€
1.6 μλ¬Όνμ μμ΄μ ν΄λμ€Permalink
𧬠μμ΄μ μ²λ¦¬νλ ν¨μλ€λ‘ μ΄λ£¨μ΄μ§ MySeq ν΄λμ€ μμ±
# μ½λμ μλ―Έλ
Έμ°μΌλ‘ λ²μνκΈ° μν νμ€ μ μ μ½λ λμ
λ리
def translate_codon(cod):
tc = {"GCT":"A", "GCC":"A", "GCG":"A",
"TGT":"C", "TGC":"C",
"GAT":"D", "GAC":"D",
"GAA":"E", "GAG":"E",
"TTT":"F", "TTC":"F",
"GGT":"G", "GGC":"G", "GGA":"G", "GGG":"G",
"CAT":"H", "CAC":"H",
"ATA":"I", "ATT":"I", "ATC":"I",
"AAA":"K", "AAG":"K",
"TTA":"L", "TTG":"L", "CTT":"L", "CTC":"L", "CTA":"L", "CTG":"L",
"ATG":"M",
"AAT":"N", "AAC":"N",
"CCT":"P", "CCC":"P", "CCA":"P", "CCG":"P",
"CAA":"Q", "CAG":"Q",
"CGT":"R", "CGC":"R", "CGA":"R", "CGG":"R", "AGA":"R", "AGG":"R",
"TCT":"S", "TCC":"S", "TCA":"S", "TCG":"S", "AGT":"S", "AGC":"S",
"ACT":"T", "ACC":"T", "ACA":"T", "ACG":"T",
"GTT":"V", "GTC":"V", "GTA":"V", "GTG":"V",
"TGG":"W",
"TAT":"Y", "TAC":"Y",
"TAA":"_", "TAG":"_", "TGA":"_"}
if cod in tc: return tc[cod]
else: return None
# μλ¬Όνμ μμ΄μ ν΄λμ€
class MySeq:
# MySeq ν΄λμ€μ μμ : seq, seq_type-default : DNA
def __init__(self, seq, seq_type = "DNA"):
self.seq = seq.upper()
self.seq_type = seq_type
# seqμ κΈΈμ΄λ₯Ό λ°ν
def __len__(self):
return len(self.seq)
# seqμ nλ²μ¨° μμλ₯Ό λ°ν
def __getitem__(self, n):
return self.seq[n]
# seqμ μ¬λΌμ΄μ±
def __getslice__(self, i, j):
return self.seq[i:j]
# seq λ°ν
def __str__(self):
return self.seq
# seqμ seq_type λ°ν
def get_seq_biotype(self):
return self.seq_type
# seqμ μ 보 λ°ν - seq, biotype
def show_info_seq(self):
print("Sequence: " + self.seq + " biotype: " + self.seq_type)
# μμ΄ μ’
λ₯μ λ°λ₯Έ νμ©λ¬Έμ
def alphabet(self):
if(self.seq_type == "DNA"): return "ACGT"
elif (self.seq_type == "RNA"): return "ACGU"
elif (self.seq_type == "PROTEIN"): return "ACDEFGHIKLMNPQRSTVWY"
else: return None
# μμ΄ κ²μ¦
def validate(self):
alp = self.alphabet() # alphabet() λ©μλλ₯Ό λ°μ
res = True
i = 0
while i < len(self.seq) and res:
if self.seq[i] not in alp: res = False # μμ΄μ΄ νμ©λ¬Έμ λ΄μ μλ€λ©΄ res = False
else: i += 1 # μμΌλ©΄ κ³μ μ§ν
return res
# DNAμμ΄μ RNAμμ΄λ‘ λ°κΏμ£Όλ μ μ¬ν¨μ
def transcription(self):
if(self.seq_type == "DNA"):
return MySeq(self.seq.replace("T","U"), "RNA") # seq_typeμ RNAλ‘ replace
else:
return None
# DNAμμ΄μ μμ보μμ΄μ ꡬνλ ν¨μ
def reverse_comp(self):
if(self.seq_type != "DNA"): return None
comp = ""
for c in self.seq:
if (c == "A"): comp = "T" + comp
elif (c == "T"): comp = "A" + comp
elif (c == "C"): comp = "G" + comp
elif (c == "G"): comp = "C" + comp
return MySeq(comp, "DNA")
# λ¨λ°±μ§μ λ§λλ λ²μν¨μ
def translate(self, iniPos = 0):
if(self.seq_type != "DNA"): return None
seq_aa = ""
for pos in range(iniPos, len(self.seq)-2, 3):
cod = self.seq[pos:pos+3]
seq_aa += translate_codon(cod) # MySeq ν΄λμ€μ μΈλΆν¨μ translate_codon() - ν΄λμ€ μΈλΆν¨μ μ κ·Ό κ°λ₯
return MySeq(seq_aa, "PROTEIN") # seq_type : PROTEIN
1.6.1. μμ΄μ μ ν¨μ± νλ³Permalink
𧬠MySeq ν΄λμ€μ validate( ) λ©μλ
s1 = MySeq("ATGGGATCGTAGTCGTACTAGCTAGCTGATGGTACTCGATAGTCTACGTAGCTAGTGGTACTGGATGGTACTCAGTAACAT")
s2 = MySeq("MKVVLSVQERSVVSLL", "PROTEIN")
print(s1.validate(), s2.validate())
>> True True
μ ν¨νμ§ μμ μμ΄μ λν΄μλ μλμ κ°μ΄ False κ°μ λ°ννλ€.
s3 = MySeq("GTYSAFADASDBASDAF")
print(s3.validate())
>> False
1.6.2. μ μ¬ / μ 보Permalink
𧬠MySeq ν΄λμ€μ transcription( ) / show_info_seq( ) λ©μλ
s1_rna = s1.transcription()
s1_rna.show_info_seq()
>> Sequence: AUGGGAUCGUAGUCGUACUAGCUAGCUGAUGGUACUCGAUAGUCUACGUAGCUAGUGGUACUGGAUGGUACUCAGUAACAU biotype: RNA
1.6.3. μμ보μμ΄Permalink
𧬠MySeq ν΄λμ€μ reverse_comp( ) λ©μλ
s1_reverse = s1.reverse_comp()
s1_reverse.show_info_seq()
>> Sequence: ATGTTACTGAGTACCATCCAGTACCACTAGCTACGTAGACTATCGAGTACCATCAGCTAGCTAGTACGACTACGATCCCAT biotype: DNA
1.6.4. λ²μPermalink
𧬠MySeq ν΄λμ€μ translate( ) λ©μλ
s1_prot_2 = s1.translate()
s1_prot_2.show_info_seq()
>> Sequence: MGS_SY_LADGTR_ST_LVVLDGTQ_H biotype: PROTEIN
𧬠주μ΄μ§ μμ΄μ΄ μ£Όνκ°λ₯μΈμ§ λΉμ£Όνκ°λ₯μΈμ§ νμ€νμ§ μμΌλ©΄ μμ보μμ΄λ λ²μν΄μ νμΈνλ€.
# μμ보μμ΄ λ²μ
s1_prot = s1_reverse.translate()
s1_prot.show_info_seq()
>> Sequence: MLLSTIQYH_LRRLSSTIS_LVRLRSH biotype: PROTEIN
𧬠μ΄λ² ν¬μ€ν μΌλ‘ 1μ₯μ λͺ¨λ 볡μ΅ν΄λ³΄μλ€. μλ¬Όνμ μμ΄μ κΈ°λ³Έμ μΌλ‘ μ²λ¦¬ν΄μ μ΄μ λ λ€λ₯Έ μμ λ€μ κ±°μΉκ² μ§λ§, λλ μ§ μ²μλΆν° μ λΌμλμΌ λ€μ μΌλ μ νλ¦°λ€κ³ μκ°νλ€. κ±°μ°½ν μκ³ λ¦¬μ¦λ€μ μλμμ§λ§ μλͺ λΏλ§ μλλΌ λ€λ₯Έ μμμλ μ¬μ©ν μ μλ μμ΄λμ΄κ° μ¬λ¬κ° λμ¨ κ² κ°λ€. 곡λΆνλ€κ° μ’ μ’ μμ μ½μ΄λ΄μΌκ² λ€!!
Leave a comment