How to Create a Bioinformatics AI Agent Using Biopython for DNA and Protein Analysis

class BioPythonAIAgent:
def __init__(self, electronic mail=”[email protected]”):
self.electronic mail = electronic mail
Entrez.electronic mail = electronic mail
self.sequences = {}
self.analysis_results = {}
self.alignments = {}
self.bushes = {}

def fetch_sequence_from_ncbi(self, accession_id, db=”nucleotide”, rettype=”fasta”):
strive:
deal with = Entrez.efetch(db=db, id=accession_id, rettype=rettype, retmode=”textual content”)
report = SeqIO.learn(deal with, “fasta”)
deal with.shut()
self.sequences[accession_id] = report
return report
besides Exception as e:
print(f”Error fetching sequence: {str(e)}”)
return None

def create_sample_sequences(self):
covid_spike = “MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT”

human_insulin = “MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN”

e_coli_16s = “AAATTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAACGGTAACAGGAAGCAGCTTGCTGCTTTGCTGACGAGTGGCGGACGGGTGAGTAATGTCTGGGAAACTGCCTGATGGAGGGGGATAACTACTGGAAACGGTAGCTAATACCGCATAATGTCGCAAGACCAAAGAGGGGGACCTTCGGGCCTCTTGCCATCGGATGTGCCCAGATGGGATTAGCTAGTAGGTGGGGTAACGGCTCACCTAGGCGACGATCCCTAGCTGGTCTGAGAGGATGACCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGGGGAGGAAGGCGTTAAGGTTAATAACCTTGGCGATTGACGTTACCCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA”

sample_sequences = [
(“COVID_Spike”, covid_spike, “SARS-CoV-2 Spike Protein”),
(“Human_Insulin”, human_insulin, “Human Insulin Precursor”),
(“E_coli_16S”, e_coli_16s, “E. coli 16S rRNA”)
]

for seq_id, seq_str, desc in sample_sequences:
report = SeqRecord(Seq(seq_str), id=seq_id, description=desc)
self.sequences[seq_id] = report

return sample_sequences

def analyze_sequence(self, sequence_id=None, sequence=None):
if sequence_id and sequence_id in self.sequences:
seq_record = self.sequences[sequence_id]
seq = seq_record.seq
description = seq_record.description
elif sequence:
seq = Seq(sequence)
description = “Customized sequence”
else:
return None

evaluation = {
‘size’: len(seq),
‘composition’: {}
}

for base in [‘A’, ‘T’, ‘G’, ‘C’]:
evaluation[‘composition’][base] = seq.rely(base)

if ‘A’ in evaluation[‘composition’] and ‘T’ in evaluation[‘composition’]:
evaluation[‘gc_content’] = spherical(gc_fraction(seq) * 100, 2)
strive:
evaluation[‘molecular_weight’] = spherical(molecular_weight(seq, seq_type=”DNA”), 2)
besides:
evaluation[‘molecular_weight’] = len(seq) * 650

strive:
if len(seq) % 3 == 0:
protein = seq.translate()
evaluation[‘translation’] = str(protein)
evaluation[‘stop_codons’] = protein.rely(‘*’)

if ‘*’ not in str(protein)[:-1]:
prot_analysis = ProteinAnalysis(str(protein)[:-1])
evaluation[‘protein_mw’] = spherical(prot_analysis.molecular_weight(), 2)
evaluation[‘isoelectric_point’] = spherical(prot_analysis.isoelectric_point(), 2)
evaluation[‘protein_composition’] = prot_analysis.get_amino_acids_percent()
besides:
move

key = sequence_id if sequence_id else “customized”
self.analysis_results[key] = evaluation

return evaluation

def visualize_composition(self, sequence_id):
if sequence_id not in self.analysis_results:
return

evaluation = self.analysis_results[sequence_id]

fig = make_subplots(
rows=2, cols=2,
specs=[[{“type”: “pie”}, {“type”: “bar”}],
[{“colspan”: 2}, None]],
subplot_titles=(“Nucleotide Composition”, “Base Rely”, “Sequence Properties”)
)

labels = record(evaluation[‘composition’].keys())
values = record(evaluation[‘composition’].values())

fig.add_trace(
go.Pie(labels=labels, values=values, title=”Composition”),
row=1, col=1
)

fig.add_trace(
go.Bar(x=labels, y=values, title=”Rely”, marker_color=[‘red’, ‘blue’, ‘green’, ‘orange’]),
row=1, col=2
)

properties = [‘Length’, ‘GC%’, ‘MW (kDa)’]
prop_values = [
analysis[‘length’],
evaluation.get(‘gc_content’, 0),
evaluation.get(‘molecular_weight’, 0) / 1000
]

fig.add_trace(
go.Scatter(x=properties, y=prop_values, mode=”markers+traces”,
marker=dict(dimension=10, coloration=”purple”), title=”Properties”),
row=2, col=1
)

fig.update_layout(
title=f”Complete Evaluation: {sequence_id}”,
showlegend=False,
top=600
)

fig.present()

def perform_multiple_sequence_alignment(self, sequence_ids):
if len(sequence_ids) < 2:
return None

sequences = []
for seq_id in sequence_ids:
if seq_id in self.sequences:
sequences.append(self.sequences[seq_id])

if len(sequences) < 2:
return None

from Bio.Align import PairwiseAligner
aligner = PairwiseAligner()
aligner.match_score = 2
aligner.mismatch_score = -1
aligner.open_gap_score = -2
aligner.extend_gap_score = -0.5

alignments = []
for i in vary(len(sequences)):
for j in vary(i+1, len(sequences)):
alignment = aligner.align(sequences[i].seq, sequences[j].seq)[0]
alignments.append(alignment)

return alignments

def create_phylogenetic_tree(self, alignment_key=None, sequences=None):
if alignment_key and alignment_key in self.alignments:
alignment = self.alignments[alignment_key]
elif sequences:
data = []
for i, seq in enumerate(sequences):
report = SeqRecord(Seq(seq), id=f”seq_{i}”)
data.append(report)
SeqIO.write(data, “temp.fasta”, “fasta”)

strive:
clustalw_cline = ClustalwCommandline(“clustalw2″, infile=”temp.fasta”)
stdout, stderr = clustalw_cline()
alignment = AlignIO.learn(“temp.aln”, “clustal”)
os.take away(“temp.fasta”)
os.take away(“temp.aln”)
os.take away(“temp.dnd”)
besides:
return None
else:
return None

calculator = DistanceCalculator(‘id’)
dm = calculator.get_distance(alignment)

constructor = DistanceTreeConstructor()
tree = constructor.upgma(dm)

tree_key = f”tree_{len(self.bushes)}”
self.bushes[tree_key] = tree

return tree

def visualize_tree(self, tree):
fig, ax = plt.subplots(figsize=(10, 6))
Phylo.draw(tree, axes=ax)
plt.title(“Phylogenetic Tree”)
plt.tight_layout()
plt.present()

def protein_structure_analysis(self, sequence_id):
if sequence_id not in self.sequences:
return None

seq = self.sequences[sequence_id].seq

strive:
if len(seq) % 3 == 0:
protein = seq.translate()
if ‘*’ not in str(protein)[:-1]:
prot_analysis = ProteinAnalysis(str(protein)[:-1])

structure_analysis = {
‘molecular_weight’: prot_analysis.molecular_weight(),
‘isoelectric_point’: prot_analysis.isoelectric_point(),
‘amino_acid_percent’: prot_analysis.get_amino_acids_percent(),
‘secondary_structure’: prot_analysis.secondary_structure_fraction(),
‘flexibility’: prot_analysis.flexibility(),
‘gravy’: prot_analysis.gravy()
}

return structure_analysis
besides:
move

return None

def comparative_analysis(self, sequence_ids):
outcomes = []

for seq_id in sequence_ids:
if seq_id in self.analysis_results:
evaluation = self.analysis_results[seq_id].copy()
evaluation[‘sequence_id’] = seq_id
outcomes.append(evaluation)

df = pd.DataFrame(outcomes)

if len(df) > 1:
fig = make_subplots(
rows=2, cols=2,
subplot_titles=(“Size Comparability”, “GC Content material”, “Molecular Weight”, “Composition Heatmap”)
)

fig.add_trace(
go.Bar(x=df[‘sequence_id’], y=df[‘length’], title=”Size”),
row=1, col=1
)

if ‘gc_content’ in df.columns:
fig.add_trace(
go.Scatter(x=df[‘sequence_id’], y=df[‘gc_content’], mode=”markers+traces”, title=”GC%”),
row=1, col=2
)

if ‘molecular_weight’ in df.columns:
fig.add_trace(
go.Bar(x=df[‘sequence_id’], y=df[‘molecular_weight’], title=”MW”),
row=2, col=1
)

fig.update_layout(title=”Comparative Sequence Evaluation”, top=600)
fig.present()

return df

def codon_usage_analysis(self, sequence_id):
if sequence_id not in self.sequences:
return None

seq = self.sequences[sequence_id].seq

if len(seq) % 3 != 0:
return None

codons = {}
for i in vary(0, len(seq) – 2, 3):
codon = str(seq[i:i+3])
codons[codon] = codons.get(codon, 0) + 1

codon_df = pd.DataFrame(record(codons.gadgets()), columns=[‘Codon’, ‘Count’])
codon_df = codon_df.sort_values(‘Rely’, ascending=False)

fig = px.bar(codon_df.head(20), x=’Codon’, y=’Rely’,
title=f”High 20 Codon Utilization – {sequence_id}”)
fig.present()

return codon_df

def motif_search(self, sequence_id, motif_pattern):
if sequence_id not in self.sequences:
return []

seq = str(self.sequences[sequence_id].seq)
positions = []

for i in vary(len(seq) – len(motif_pattern) + 1):
if seq[i:i+len(motif_pattern)] == motif_pattern:
positions.append(i)

return positions

def gc_content_window(self, sequence_id, window_size=100):
if sequence_id not in self.sequences:
return None

seq = self.sequences[sequence_id].seq
gc_values = []
positions = []

for i in vary(0, len(seq) – window_size + 1, window_size//4):
window = seq[i:i+window_size]
gc_values.append(gc_fraction(window) * 100)
positions.append(i + window_size//2)

fig = go.Determine()
fig.add_trace(go.Scatter(x=positions, y=gc_values, mode=”traces+markers”,
title=f’GC Content material (window={window_size})’))
fig.update_layout(
title=f”GC Content material Sliding Window Evaluation – {sequence_id}”,
xaxis_title=”Place”,
yaxis_title=”GC Content material (%)”
)
fig.present()

return positions, gc_values

def run_comprehensive_analysis(self, sequence_ids):
outcomes = {}

for seq_id in sequence_ids:
if seq_id in self.sequences:
evaluation = self.analyze_sequence(seq_id)
self.visualize_composition(seq_id)

gc_analysis = self.gc_content_window(seq_id)
codon_analysis = self.codon_usage_analysis(seq_id)

outcomes[seq_id] = {
‘basic_analysis’: evaluation,
‘gc_window’: gc_analysis,
‘codon_usage’: codon_analysis
}

if len(sequence_ids) > 1:
comparative_df = self.comparative_analysis(sequence_ids)
outcomes[‘comparative’] = comparative_df

return outcomes

How to Create a Bioinformatics AI Agent Using Biopython for DNA and Protein Analysis

Leave a Reply Cancel reply

Follow US

Popular News

The End of GPUs? Optical AI takes over

Instagram tests new layout that puts the spotlight on Reels and DMs

10 Naruto Characters the Anime Failed

Where To Buy The Jordan 5 Soft Pink & More

AI’s hallucination problem is getting worse

Categories

About US

Quick Links

Important Links

Subscribe US