AllTopicsTodayAllTopicsToday
Notification
Font ResizerAa
  • Home
  • Tech
  • Investing & Finance
  • AI
  • Entertainment
  • Wellness
  • Gaming
  • Movies
Reading: How to Create a Bioinformatics AI Agent Using Biopython for DNA and Protein Analysis
Share
Font ResizerAa
AllTopicsTodayAllTopicsToday
  • Home
  • Blog
  • About Us
  • Contact
Search
  • Home
  • Tech
  • Investing & Finance
  • AI
  • Entertainment
  • Wellness
  • Gaming
  • Movies
Have an existing account? Sign In
Follow US
©AllTopicsToday 2026. All Rights Reserved.
AllTopicsToday > Blog > AI > How to Create a Bioinformatics AI Agent Using Biopython for DNA and Protein Analysis
Blog banner 1 1.png
AI

How to Create a Bioinformatics AI Agent Using Biopython for DNA and Protein Analysis

AllTopicsToday
Last updated: September 8, 2025 6:58 am
AllTopicsToday
Published: September 8, 2025
Share
SHARE
class BioPythonAIAgent:
def __init__(self, electronic mail=”[email protected]”):
self.electronic mail = electronic mail
Entrez.electronic mail = electronic mail
self.sequences = {}
self.analysis_results = {}
self.alignments = {}
self.bushes = {}

def fetch_sequence_from_ncbi(self, accession_id, db=”nucleotide”, rettype=”fasta”):
strive:
deal with = Entrez.efetch(db=db, id=accession_id, rettype=rettype, retmode=”textual content”)
report = SeqIO.learn(deal with, “fasta”)
deal with.shut()
self.sequences[accession_id] = report
return report
besides Exception as e:
print(f”Error fetching sequence: {str(e)}”)
return None

def create_sample_sequences(self):
covid_spike = “MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT”

human_insulin = “MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN”

e_coli_16s = “AAATTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAACGGTAACAGGAAGCAGCTTGCTGCTTTGCTGACGAGTGGCGGACGGGTGAGTAATGTCTGGGAAACTGCCTGATGGAGGGGGATAACTACTGGAAACGGTAGCTAATACCGCATAATGTCGCAAGACCAAAGAGGGGGACCTTCGGGCCTCTTGCCATCGGATGTGCCCAGATGGGATTAGCTAGTAGGTGGGGTAACGGCTCACCTAGGCGACGATCCCTAGCTGGTCTGAGAGGATGACCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGGGGAGGAAGGCGTTAAGGTTAATAACCTTGGCGATTGACGTTACCCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA”

sample_sequences = [
(“COVID_Spike”, covid_spike, “SARS-CoV-2 Spike Protein”),
(“Human_Insulin”, human_insulin, “Human Insulin Precursor”),
(“E_coli_16S”, e_coli_16s, “E. coli 16S rRNA”)
]

for seq_id, seq_str, desc in sample_sequences:
report = SeqRecord(Seq(seq_str), id=seq_id, description=desc)
self.sequences[seq_id] = report

return sample_sequences

def analyze_sequence(self, sequence_id=None, sequence=None):
if sequence_id and sequence_id in self.sequences:
seq_record = self.sequences[sequence_id]
seq = seq_record.seq
description = seq_record.description
elif sequence:
seq = Seq(sequence)
description = “Customized sequence”
else:
return None

evaluation = {
‘size’: len(seq),
‘composition’: {}
}

for base in [‘A’, ‘T’, ‘G’, ‘C’]:
evaluation[‘composition’][base] = seq.rely(base)

if ‘A’ in evaluation[‘composition’] and ‘T’ in evaluation[‘composition’]:
evaluation[‘gc_content’] = spherical(gc_fraction(seq) * 100, 2)
strive:
evaluation[‘molecular_weight’] = spherical(molecular_weight(seq, seq_type=”DNA”), 2)
besides:
evaluation[‘molecular_weight’] = len(seq) * 650

strive:
if len(seq) % 3 == 0:
protein = seq.translate()
evaluation[‘translation’] = str(protein)
evaluation[‘stop_codons’] = protein.rely(‘*’)

if ‘*’ not in str(protein)[:-1]:
prot_analysis = ProteinAnalysis(str(protein)[:-1])
evaluation[‘protein_mw’] = spherical(prot_analysis.molecular_weight(), 2)
evaluation[‘isoelectric_point’] = spherical(prot_analysis.isoelectric_point(), 2)
evaluation[‘protein_composition’] = prot_analysis.get_amino_acids_percent()
besides:
move

key = sequence_id if sequence_id else “customized”
self.analysis_results[key] = evaluation

return evaluation

def visualize_composition(self, sequence_id):
if sequence_id not in self.analysis_results:
return

evaluation = self.analysis_results[sequence_id]

fig = make_subplots(
rows=2, cols=2,
specs=[[{“type”: “pie”}, {“type”: “bar”}],
[{“colspan”: 2}, None]],
subplot_titles=(“Nucleotide Composition”, “Base Rely”, “Sequence Properties”)
)

labels = record(evaluation[‘composition’].keys())
values = record(evaluation[‘composition’].values())

fig.add_trace(
go.Pie(labels=labels, values=values, title=”Composition”),
row=1, col=1
)

fig.add_trace(
go.Bar(x=labels, y=values, title=”Rely”, marker_color=[‘red’, ‘blue’, ‘green’, ‘orange’]),
row=1, col=2
)

properties = [‘Length’, ‘GC%’, ‘MW (kDa)’]
prop_values = [
analysis[‘length’],
evaluation.get(‘gc_content’, 0),
evaluation.get(‘molecular_weight’, 0) / 1000
]

fig.add_trace(
go.Scatter(x=properties, y=prop_values, mode=”markers+traces”,
marker=dict(dimension=10, coloration=”purple”), title=”Properties”),
row=2, col=1
)

fig.update_layout(
title=f”Complete Evaluation: {sequence_id}”,
showlegend=False,
top=600
)

fig.present()

def perform_multiple_sequence_alignment(self, sequence_ids):
if len(sequence_ids) < 2:
return None

sequences = []
for seq_id in sequence_ids:
if seq_id in self.sequences:
sequences.append(self.sequences[seq_id])

if len(sequences) < 2:
return None

from Bio.Align import PairwiseAligner
aligner = PairwiseAligner()
aligner.match_score = 2
aligner.mismatch_score = -1
aligner.open_gap_score = -2
aligner.extend_gap_score = -0.5

alignments = []
for i in vary(len(sequences)):
for j in vary(i+1, len(sequences)):
alignment = aligner.align(sequences[i].seq, sequences[j].seq)[0]
alignments.append(alignment)

return alignments

def create_phylogenetic_tree(self, alignment_key=None, sequences=None):
if alignment_key and alignment_key in self.alignments:
alignment = self.alignments[alignment_key]
elif sequences:
data = []
for i, seq in enumerate(sequences):
report = SeqRecord(Seq(seq), id=f”seq_{i}”)
data.append(report)
SeqIO.write(data, “temp.fasta”, “fasta”)

strive:
clustalw_cline = ClustalwCommandline(“clustalw2″, infile=”temp.fasta”)
stdout, stderr = clustalw_cline()
alignment = AlignIO.learn(“temp.aln”, “clustal”)
os.take away(“temp.fasta”)
os.take away(“temp.aln”)
os.take away(“temp.dnd”)
besides:
return None
else:
return None

calculator = DistanceCalculator(‘id’)
dm = calculator.get_distance(alignment)

constructor = DistanceTreeConstructor()
tree = constructor.upgma(dm)

tree_key = f”tree_{len(self.bushes)}”
self.bushes[tree_key] = tree

return tree

def visualize_tree(self, tree):
fig, ax = plt.subplots(figsize=(10, 6))
Phylo.draw(tree, axes=ax)
plt.title(“Phylogenetic Tree”)
plt.tight_layout()
plt.present()

def protein_structure_analysis(self, sequence_id):
if sequence_id not in self.sequences:
return None

seq = self.sequences[sequence_id].seq

strive:
if len(seq) % 3 == 0:
protein = seq.translate()
if ‘*’ not in str(protein)[:-1]:
prot_analysis = ProteinAnalysis(str(protein)[:-1])

structure_analysis = {
‘molecular_weight’: prot_analysis.molecular_weight(),
‘isoelectric_point’: prot_analysis.isoelectric_point(),
‘amino_acid_percent’: prot_analysis.get_amino_acids_percent(),
‘secondary_structure’: prot_analysis.secondary_structure_fraction(),
‘flexibility’: prot_analysis.flexibility(),
‘gravy’: prot_analysis.gravy()
}

return structure_analysis
besides:
move

return None

def comparative_analysis(self, sequence_ids):
outcomes = []

for seq_id in sequence_ids:
if seq_id in self.analysis_results:
evaluation = self.analysis_results[seq_id].copy()
evaluation[‘sequence_id’] = seq_id
outcomes.append(evaluation)

df = pd.DataFrame(outcomes)

if len(df) > 1:
fig = make_subplots(
rows=2, cols=2,
subplot_titles=(“Size Comparability”, “GC Content material”, “Molecular Weight”, “Composition Heatmap”)
)

fig.add_trace(
go.Bar(x=df[‘sequence_id’], y=df[‘length’], title=”Size”),
row=1, col=1
)

if ‘gc_content’ in df.columns:
fig.add_trace(
go.Scatter(x=df[‘sequence_id’], y=df[‘gc_content’], mode=”markers+traces”, title=”GC%”),
row=1, col=2
)

if ‘molecular_weight’ in df.columns:
fig.add_trace(
go.Bar(x=df[‘sequence_id’], y=df[‘molecular_weight’], title=”MW”),
row=2, col=1
)

fig.update_layout(title=”Comparative Sequence Evaluation”, top=600)
fig.present()

return df

def codon_usage_analysis(self, sequence_id):
if sequence_id not in self.sequences:
return None

seq = self.sequences[sequence_id].seq

if len(seq) % 3 != 0:
return None

codons = {}
for i in vary(0, len(seq) – 2, 3):
codon = str(seq[i:i+3])
codons[codon] = codons.get(codon, 0) + 1

codon_df = pd.DataFrame(record(codons.gadgets()), columns=[‘Codon’, ‘Count’])
codon_df = codon_df.sort_values(‘Rely’, ascending=False)

fig = px.bar(codon_df.head(20), x=’Codon’, y=’Rely’,
title=f”High 20 Codon Utilization – {sequence_id}”)
fig.present()

return codon_df

def motif_search(self, sequence_id, motif_pattern):
if sequence_id not in self.sequences:
return []

seq = str(self.sequences[sequence_id].seq)
positions = []

for i in vary(len(seq) – len(motif_pattern) + 1):
if seq[i:i+len(motif_pattern)] == motif_pattern:
positions.append(i)

return positions

def gc_content_window(self, sequence_id, window_size=100):
if sequence_id not in self.sequences:
return None

seq = self.sequences[sequence_id].seq
gc_values = []
positions = []

for i in vary(0, len(seq) – window_size + 1, window_size//4):
window = seq[i:i+window_size]
gc_values.append(gc_fraction(window) * 100)
positions.append(i + window_size//2)

fig = go.Determine()
fig.add_trace(go.Scatter(x=positions, y=gc_values, mode=”traces+markers”,
title=f’GC Content material (window={window_size})’))
fig.update_layout(
title=f”GC Content material Sliding Window Evaluation – {sequence_id}”,
xaxis_title=”Place”,
yaxis_title=”GC Content material (%)”
)
fig.present()

return positions, gc_values

def run_comprehensive_analysis(self, sequence_ids):
outcomes = {}

for seq_id in sequence_ids:
if seq_id in self.sequences:
evaluation = self.analyze_sequence(seq_id)
self.visualize_composition(seq_id)

gc_analysis = self.gc_content_window(seq_id)
codon_analysis = self.codon_usage_analysis(seq_id)

outcomes[seq_id] = {
‘basic_analysis’: evaluation,
‘gc_window’: gc_analysis,
‘codon_usage’: codon_analysis
}

if len(sequence_ids) > 1:
comparative_df = self.comparative_analysis(sequence_ids)
outcomes[‘comparative’] = comparative_df

return outcomes

LeCun’s world models vs LLM’s empire
Accelerating AI + XR prototyping with XR Blocks and Gemini
Healthy High Protein Cinnamon Roll for One
CBS blocks James Talarico interview by Stephen Colbert
Your First OpenAI API Project in Python Step-By-Step
TAGGED:agentanalysisBioinformaticsBiopythonCreateDNAProtein
Share This Article
Facebook Email Print
Leave a Comment

Leave a Reply Cancel reply

Your email address will not be published. Required fields are marked *

Follow US

Find US on Social Medias
FacebookLike
XFollow
YoutubeSubscribe
TelegramFollow

Weekly Newsletter

Subscribe to our newsletter to get our newest articles instantly!
Popular News
108218827 1761762271206 gettyimages 2243487595 powell fomc nyse.jpeg
Investing & Finance

The Fed lowered interest rates, 2 stocks hit milestones

AllTopicsToday
AllTopicsToday
November 2, 2025
Weekly Chartstopper: January 2, 2026
Denise Richards Breaks Silence After Aaron Phypers Files for Divorce
Cooking Burnout Club Meal Plan 5
8.15 Friday Faves – The Fitnessista
- Advertisement -
Ad space (1)

Categories

  • Tech
  • Investing & Finance
  • AI
  • Entertainment
  • Wellness
  • Gaming
  • Movies

About US

We believe in the power of information to empower decisions, fuel curiosity, and spark innovation.
Quick Links
  • Home
  • Blog
  • About Us
  • Contact
Important Links
  • About Us
  • Privacy Policy
  • Terms and Conditions
  • Disclaimer
  • Contact

Subscribe US

Subscribe to our newsletter to get our newest articles instantly!

©AllTopicsToday 2026. All Rights Reserved.
1 2
Welcome Back!

Sign in to your account

Username or Email Address
Password

Lost your password?