Collection of Python Functions for Organic Chemistry (Chemoinformatics; Structural Bioinformatics)
Recommended posts : 【Organic Chemistry】 Organic Chemistry Index, 【Python】 Collection of Useful Python Functions
1. Nomenclature
2. Drawing
3. Amino Acid
4. Spectroscopy
1. Nomenclature
⑴ SMILES (Simplified Molecular-Input Line Entry System): A short ASCII string representation.
① Double bonds are represented by ‘=’, and triple bonds by ‘#’.
② For cyclic compounds, numbers such as 1, 2, … are assigned to indicate that the ends of a linear molecule are connected to form a ring.
○ Example: CN1C=NC2=C1C(=O)N(C(=O)N2C)C
③ ‘C’ represents a general carbon atom, whereas ‘c’ represents an aromatic carbon atom.
○ C1CCCCC1: Cyclohexane
○ c1ccccc1: Benzene
④ Parentheses can be used to indicate more complex cases.
○ Charge notation can also be indicated, such as [N+].
⑤ @ symbol can be used to represent a stereocenter in a molecule.
⑥ / and \ symbols can be used to represent E/Z isomers.
○ Example: CCC/C(=C/C(=O)OCC)/C(=O)OCC
⑵ Code for converting organic compounds into SMILES
rdkit.Chem.MolToSmiles(Chem.MolFromFASTA(sequence, flavor = 1))
"""
flavor: (optional)
0 Protein, L amino acids (default)
1 Protein, D amino acids
2 RNA, no cap
3 RNA, 5’ cap
4 RNA, 3’ cap
5 RNA, both caps
6 DNA, no cap
7 DNA, 5’ cap
8 DNA, 3’ cap
9 DNA, both caps
"""
⑶ SMILES, IUPAC Interconversion Code : It uses a transformer model.
# reference: https://github.com/Kohulan/Smiles-TO-iUpac-Translator
! pip install STOUT-pypi
! pip install git+https://github.com/Kohulan/Smiles-TO-iUpac-Translator.git
from STOUT import translate_forward, translate_reverse
# SMILES to IUPAC name translation
SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
IUPAC_name = translate_forward(SMILES)
print("IUPAC name of "+SMILES+" is: "+IUPAC_name)
# IUPAC name to SMILES translation
IUPAC_name = "1,3,7-trimethylpurine-2,6-dione"
SMILES = translate_reverse(IUPAC_name)
print("SMILES of "+IUPAC_name+" is: "+SMILES)
① SMILES code conversion may work even if the IUPAC input is not completely accurate
⑷ How to find out the IUPAC nomenclature given any chemical formula
① Step 1. Try the SMILES-to-drawing function several times to find the SMILES code representing the given compound.
② Step 2. Execute the SMILES-to-IUPAC function to obtain the final nomenclature.
⑸ Code for obtaining molecular weight from SMILES
from rdkit import Chem
from rdkit.Chem import Descriptors
def calculate_molecular_weight(smiles):
molecule = Chem.MolFromSmiles(smiles)
return Descriptors.ExactMolWt(molecule)
# Example usage
smiles_code = "C1=CC=C(C=C1)O" # SMILES code for phenol
molecular_weight = calculate_molecular_weight(smiles_code)
print(f"Molecular Weight: {molecular_weight}")
⑹ Code to determine aromaticity from SMILES
from rdkit import Chem
def is_aromatic(smiles):
molecule = Chem.MolFromSmiles(smiles)
if molecule is None:
return False
return any(atom.GetIsAromatic() for atom in molecule.GetAtoms())
# Example usage
cyclohexane = "C1CCCCC1" # cyclohexane
benzene = "c1ccccc1" # benzene
imidazole = "C1=CN=CN1" # 1H-imidazole
print(f"The {cyclohexane} is {'aromatic' if is_aromatic(cyclohexane) else 'not aromatic'}")
print(f"The {benzene} is {'aromatic' if is_aromatic(benzene) else 'not aromatic'}")
print(f"The {imidazole} is {'aromatic' if is_aromatic(imidazole) else 'not aromatic'}")
⑺ Code for calculating dipole moment from SMILES
# conda install -c psi4 psi4
import psi4
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
def calculate_dipole_moment(smiles):
# Convert SMILES to molecule
mol = Chem.MolFromSmiles(smiles)
# Add Hydrogens
mol = Chem.AddHs(mol)
# Generate 3D coordinates
AllChem.EmbedMolecule(mol, AllChem.ETKDG())
# Extract coordinates
conf = mol.GetConformer()
xyz = ''
for atom in mol.GetAtoms():
pos = conf.GetAtomPosition(atom.GetIdx())
xyz += f"{atom.GetSymbol()} {pos.x} {pos.y} {pos.z}\n"
# Set up Psi4
psi4.set_memory('500 MB')
psi4.set_options({'basis': 'sto-3g'})
# Calculate dipole moment using Psi4
psi4_mol = psi4.geometry(xyz)
psi4.energy('scf')
dipole_moment = psi4.variable('SCF DIPOLE')
return dipole_moment
### Example usage
smiles_code = "CCO" # Example SMILES code for ethanol
dipole_moment = calculate_dipole_moment(smiles_code)
print(f"Dipole Moment: {dipole_moment} Debye")
# Dipole Moment: [ 0.04250251 0.20600936 -0.52850913] Debye
dipole_vector = np.array([0.04250251, 0.20600936, -0.52850913])
magnitude = np.linalg.norm(dipole_vector)
print(f"Magnitude of Dipole Moment: {magnitude} Debye")
# Magnitude of Dipole Moment: 0.5688305725409514 Debye
⑻ A function to obtain the boiling point (bp), melting point (mp), and critical temperature from SMILES
# reference: https://thermo.readthedocs.io/thermo.chemical.html
from thermo.chemical import Chemical
N2 = Chemical('Nitrogen')
print(N2.Tm, N2.Tb, N2.Tc) # melting, boiling, and critical points [K]
## 63.15 77.3549950205 126.192
molecule = Chemical('CC(C)C')
print(molecule.Tm, molecule.Tb, molecule.Tc) # melting, boiling, and critical points [K]
## 124.2 261.401014643 407.81
molecule_ = Chemical('2-methylpropane')
print(molecule_.Tm, molecule_.Tb, molecule_.Tc) # melting, boiling, and critical points [K]
## 124.2 261.401014643 407.81
① It operates on a search-based approach and not all compounds are targeted.
② Various machine learning models are being introduced to improve this.
⑼ Model for predicting ADMET (Absorption, Distribution, Metabolism, Excretion, Toxicity) from SMILES : ADMET-AI
⑽ Function to generate IUPAC names from PubChem
import requests
import time
def fetch_iupac_names_from_pubchem(start_cid, count):
iupac_names = []
base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/property/IUPACName/JSON"
current_cid = start_cid
while len(iupac_names) < count:
response = requests.get(base_url.format(current_cid))
if response.status_code == 200:
data = response.json()
if 'PropertyTable' in data and 'Properties' in data['PropertyTable']:
for prop in data['PropertyTable']['Properties']:
if 'IUPACName' in prop:
iupac_names.append(prop['IUPACName'])
if len(iupac_names) >= count:
break
else:
print(f"Failed to fetch data for CID {current_cid}")
current_cid += 1
time.sleep(0.1) # To prevent hitting the API rate limit
return iupac_names
# Fetch 100 IUPAC names starting from CID 1
iupac_names = fetch_iupac_names_from_pubchem(start_cid=1, count=100)
① Application 1. Creating IUPAC Nomenclature Examples
○ Crawling IUPAC nomenclature from PubChem → IUPAC-to-SMILES → SMILES-to-image
○ The example generation algorithm includes an additional step of verifying if SMILES-to-IUPAC matches the original IUPAC, which results in the following incidental effects:
○ Effect 1. Exclusion of inappropriate IUPAC nomenclature
○ Effect 2. By eliminating nomenclature that is difficult for computers to understand, the complexity of the nomenclature examples is adjusted.
⑾ Function to generate SMILES names with stereochemistry from PubChem
import requests
import time
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
def fetch_smiles_from_pubchem(start_cid, count):
smiles_list = []
base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/property/CanonicalSMILES/JSON"
current_cid = start_cid
while len(smiles_list) < count:
response = requests.get(base_url.format(current_cid))
if response.status_code == 200:
data = response.json()
if 'PropertyTable' in data and 'Properties' in data['PropertyTable']:
for prop in data['PropertyTable']['Properties']:
if 'CanonicalSMILES' in prop:
smiles_list.append(prop['CanonicalSMILES'])
if len(smiles_list) >= count:
break
else:
print(f"Failed to fetch data for CID {current_cid}")
current_cid += 1
time.sleep(0.1) # To prevent hitting the API rate limit
return smiles_list
def generate_stereoisomers(smiles, max_isomers=5):
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return []
opts = StereoEnumerationOptions(onlyUnassigned=True, maxIsomers=max_isomers)
isomers = list(EnumerateStereoisomers(mol, options=opts))
smiles_isomers = [Chem.MolToSmiles(isomer, isomericSmiles=True) for isomer in isomers]
return smiles_isomers
# Fetch 100 canonical SMILES codes starting from CID 1
smiles_codes = fetch_smiles_from_pubchem(start_cid=1, count=100)
# Generate stereoisomers for each fetched SMILES code
all_stereoisomers = []
for i, smiles in enumerate(smiles_codes, start=1):
stereoisomers = generate_stereoisomers(smiles)
all_stereoisomers.extend(stereoisomers)
print(f"Canonical SMILES {i}: {smiles}")
for j, isomer in enumerate(stereoisomers, start=1):
print(f" Stereoisomer {j}: {isomer}")
# Optionally, limit the number of generated stereoisomers for display
max_display = 100
print("\nGenerated Stereoisomers (limited to first {}):".format(max_display))
for i, isomer in enumerate(all_stereoisomers[:max_display], start=1):
print(f"{i}: {isomer}")
''' Visualization code example
from rdkit import Chem
from rdkit.Chem import Draw
smiles = 'O=C(O)C[C@@]1(Cl)C=CC(=O)O1'
molecule = Chem.MolFromSmiles(smiles)
Draw.MolToImage(molecule)
'''
① Application 1. Creating RS Nomenclature Examples
○ CrawlingSMILES nomenclature from PubChem → canonical SMILES to stereochemical SMILES → SMILES-to-image
② Application 2. The above code yields a structure that particularly favors the R configuration.
○ In practice, upon examining 62 examples of stereoisomers, 40 instances favored the R configuration while 22 favored the S configuration, indicating a preference for the R configuration in the test.
○ The theoretical ratio of R isomers and S isomers should be identical, so it is suspected that there might be a cognitive bias instead.
2. Drawing
⑴ Drawing molecular formulas for organic compounds (e.g., peracetic acid)
from rdkit import Chem
from rdkit.Chem import Draw
# Define the SMILES strings for peracetic acid
smiles = 'C=CC(=O)O'
# Convert the SMILES strings to RDKit molecule objects
molecule = Chem.MolFromSmiles(smiles)
# Draw the molecules without saving
Draw.MolToImage(molecule)
# Draw the molecules with saving
Draw.MolToFile(molecule, 'peracetic_acid.png')
⑵ Drawing electron density maps for organic compounds (ver. 1) (e.g., peracetic acid)
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import SimilarityMaps
# Generate a 3D structure
molecule = Chem.MolFromSmiles('CC(=O)OO')
molecule_3d = Chem.AddHs(molecule)
AllChem.EmbedMolecule(molecule_3d, AllChem.ETKDG())
AllChem.MMFFOptimizeMolecule(molecule_3d)
# Calculate Gasteiger charges
AllChem.ComputeGasteigerCharges(molecule_3d)
# Function to get atom charges
def GetAtomCharges(mol):
charges = [float(mol.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) for i in range(mol.GetNumAtoms())]
return charges
# Draw electrostatic potential map
fig = SimilarityMaps.GetSimilarityMapFromWeights(molecule_3d, GetAtomCharges(molecule_3d), colorMap='jet', contourLines=10)
⑶ Drawing electron density maps for organic compounds (ver. 2) (e.g., peracetic acid)
# Refernece: https://rdkit.readthedocs.io/en/latest/Cookbook.html
## STEP 1. make a random forest model
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from sklearn.ensemble import RandomForestClassifier
import numpy
# generate four molecules
m1 = Chem.MolFromSmiles('c1ccccc1')
m2 = Chem.MolFromSmiles('c1ccccc1CC')
m3 = Chem.MolFromSmiles('c1ccncc1')
m4 = Chem.MolFromSmiles('c1ccncc1CC')
mols = [m1, m2, m3, m4]
# generate fingeprints: Morgan fingerprint with radius 2
fps = [AllChem.GetMorganFingerprintAsBitVect(m, 2) for m in mols]
# convert the RDKit explicit vectors into numpy arrays
np_fps = []
for fp in fps:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
# get a random forest classifiert with 100 trees
rf = RandomForestClassifier(n_estimators=100, random_state=1123)
# train the random forest
# with the first two molecules being actives (class 1) and
# the last two being inactives (class 0)
ys_fit = [1, 1, 0, 0]
rf.fit(np_fps, ys_fit)
# use the random forest to predict a new molecule
m5 = Chem.MolFromSmiles('c1ccccc1O')
fp = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(AllChem.GetMorganFingerprintAsBitVect(m5, 2), fp)
print(rf.predict((fp,)))
print(rf.predict_proba((fp,)))
## STEP 2. run the random forest model for the input
from rdkit.Chem.Draw import SimilarityMaps
# helper function
def getProba(fp, predictionFunction):
return predictionFunction((fp,))[0][1]
m5 = Chem.MolFromSmiles('CC(=O)OO')
fig, maxweight = SimilarityMaps.GetSimilarityMapForModel(m5, SimilarityMaps.GetMorganFingerprint, lambda x: getProba(x, rf.predict_proba))
⑷ Drawing three-dimensional molecular structures of organic compounds.
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol
def draw_3d_molecule(smiles):
# Convert SMILES to RDKit molecule
mol = Chem.MolFromSmiles(smiles)
if mol is None:
print("Invalid SMILES code.")
return
# Generate 3D coordinates for the molecule
mol = Chem.AddHs(mol) # Add hydrogens
AllChem.EmbedMolecule(mol, AllChem.ETKDG()) # Embed molecule in 3D space
# Convert RDKit molecule to 3Dmol.js viewable format
mb = Chem.MolToMolBlock(mol)
# Visualization with Py3Dmol
viewer = py3Dmol.view(width=400, height=300)
viewer.addModel(mb, 'mol')
viewer.setStyle({'stick': {}})
viewer.zoomTo()
return viewer.show()
# Example usage
smiles_code = "CCO" # Ethanol
draw_3d_molecule(smiles_code)
① Review 1: In methanol, the two methyl groups are in a staggered orientation to minimize steric hindrance, whereas in ethanol, this is not the case due to intramolecular hydrogen bonding.
② Review 2: The structure is also well implemented in the following conjugated ring compounds.
③ This is the charm of chemoinformatics, where new knowledge can be created solely with machine learning models.
⑸ Displaying 2D molecular structure with R/S nomenclature for organic compounds
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
from PIL import Image, ImageDraw, ImageFont
import matplotlib.font_manager as fm
def get_stereochemistry(mol):
Chem.AssignStereochemistry(mol, cleanIt=True, force=True)
stereo_info = {}
for atom in mol.GetAtoms():
if atom.HasProp('_CIPCode'):
stereo_info[atom.GetIdx()] = atom.GetProp('_CIPCode')
return stereo_info
def draw_2d_molecule_with_stereochemistry(smiles, filename="molecule.png"):
# Convert SMILES to RDKit molecule
mol = Chem.MolFromSmiles(smiles)
if mol is None:
print("Invalid SMILES code.")
return
# Generate 2D coordinates for the molecule
AllChem.Compute2DCoords(mol)
# Get stereochemistry information
stereo_info = get_stereochemistry(mol)
print(stereo_info)
# Draw the molecule
img = Draw.MolToImage(mol, size=(300, 300), kekulize=True, wedgeBonds=True)
# Convert the RDKit image to a PIL image
pil_img = img.convert("RGBA")
# Create a drawing context
draw = ImageDraw.Draw(pil_img)
# Load fonts
font_size = 18 # You can change the font size here
try:
# Use DejaVuSans.ttf for regular text
font_path = fm.findfont(fm.FontProperties(family="DejaVu Sans"))
font = ImageFont.truetype(font_path, font_size)
# Use DejaVuSans-Oblique.ttf for italic text
italic_font_path = fm.findfont(fm.FontProperties(family="DejaVu Sans", style="italic"))
italic_font = ImageFont.truetype(italic_font_path, font_size)
except IOError:
font = ImageFont.load_default()
italic_font = font # Fallback if no italic font is found
# Generate 2D coordinates for the drawing
AllChem.Compute2DCoords(mol)
conf = mol.GetConformer()
# Get 2D coordinates for each atom
coords = conf.GetPositions()
# Add stereo annotations
for atom_idx, stereo in stereo_info.items():
atom = mol.GetAtomWithIdx(atom_idx)
pos = coords[atom_idx]
# Calculate pixel position from molecule coordinates
x = pos[0] * 35 + 135
y = -pos[1] * 35 + 150
# Draw the text annotation with non-italicized brackets and italicized R/S
draw.text((x, y), "(", fill=(0, 0, 0), font=font)
draw.text((x + 10, y), stereo, fill=(0, 0, 0), font=italic_font)
draw.text((x + 30, y), ")", fill=(0, 0, 0), font=font)
# Save the image to a file
pil_img.save(filename)
print(f"Image saved as {filename}")
# Example usage
smiles_code = "C[C@H](O)[C@H](O)C" # Example molecule with stereochemistry
draw_2d_molecule_with_stereochemistry(smiles_code, "molecule_with_stereochemistry.png")
⑹ Displaying 3D molecular structure with R/S nomenclature for organic compounds
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
import py3Dmol
def get_stereochemistry(mol):
Chem.AssignStereochemistry(mol, cleanIt=True, force=True)
stereo_info = {}
for atom in mol.GetAtoms():
if atom.HasProp('_CIPCode'):
stereo_info[atom.GetIdx()] = atom.GetProp('_CIPCode')
return stereo_info
def draw_3d_molecule_with_stereochemistry(smiles):
# Convert SMILES to RDKit molecule
mol = Chem.MolFromSmiles(smiles)
if mol is None:
print("Invalid SMILES code.")
return
# Generate 3D coordinates for the molecule
mol = Chem.AddHs(mol) # Add hydrogens
AllChem.EmbedMolecule(mol, AllChem.ETKDG()) # Embed molecule in 3D space
# Get stereochemistry information
stereo_info = get_stereochemistry(mol)
print(stereo_info)
# Convert RDKit molecule to 3Dmol.js viewable format
mb = Chem.MolToMolBlock(mol)
# Visualization with Py3Dmol
viewer = py3Dmol.view(width=400, height=300)
viewer.addModel(mb, 'mol')
viewer.setStyle({'stick': {}})
# Annotate the stereochemistry on the molecule
for atom_idx, stereo in stereo_info.items():
pos = mol.GetConformer().GetAtomPosition(atom_idx)
viewer.addLabel(stereo, {
'position': {'x': pos.x, 'y': pos.y, 'z': pos.z},
'backgroundColor': 'black',
'fontColor': 'white',
'fontSize': 14,
'showBackground': True
})
viewer.zoomTo()
return viewer.show()
# Example usage
smiles_code = "C[C@H](O)[C@H](O)C" # Example molecule with stereochemistry
draw_3d_molecule_with_stereochemistry(smiles_code)
⑺ Code that draws all of the structural isomers of alkanes according to the number of carbon atoms
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from rdkit.Chem.rdchem import Mol
from IPython.display import display
def generate_alkanes(n_carbons):
if n_carbons == 1:
return [Chem.MolFromSmiles('C')]
elif n_carbons == 2:
return [Chem.MolFromSmiles('CC')]
smaller_alkanes = generate_alkanes(n_carbons - 1)
new_alkanes = set()
for mol in smaller_alkanes:
for atom in mol.GetAtoms():
if atom.GetDegree() < 4: # Carbon can have at most four bondings
new_mol = Chem.RWMol(mol)
new_idx = new_mol.AddAtom(Chem.Atom(6))
new_mol.AddBond(atom.GetIdx(), new_idx, Chem.BondType.SINGLE)
Chem.SanitizeMol(new_mol)
smiles = Chem.MolToSmiles(new_mol, canonical=True)
new_alkanes.add(smiles)
return [Chem.MolFromSmiles(smiles) for smiles in new_alkanes]
# Generation and Visualization of All Structural Isomers of C7H16
n_carbons = 7
alkanes = generate_alkanes(n_carbons)
img = Draw.MolsToGridImage(alkanes, molsPerRow=5, subImgSize=(200, 200))
# Directly display images (works only in Jupyter notebooks)
display(img)
Chemical Formula | # of Structural Isomers |
---|---|
C3H8 | 1 |
C4H10 | 2 |
C5H12 | 3 |
C6H14 | 5 |
C7H16 | 9 |
C8H18 | 18 |
C9H20 | 35 |
C10H22 | 75 |
C11H24 | 159 |
C12H26 | 355 |
C13H28 | 802 |
C14H30 | 1,858 |
C15H32 | 4,347 |
C16H34 | 10,359 |
C17H36 | 24,894 |
C18H38 | 60,523 |
C19H40 | 148,284 |
C20H42 | 366,319 |
C30H62 | 4,111,846,763 |
C40H82 | 62,481,801,147,341 |
Table 1. Number of structural isomers of alkanes based on carbon count
⑻ Code to Draw All Alkane Substituents Using a Tree Data Structure (Duplicates exist starting from n_carbons = 6
)
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.rdchem import AtomValenceException
from IPython.display import display
class Node:
def __init__(self, id):
self.id = id
self.children = []
class Tree:
def __init__(self, root):
self.nodes = [root]
def add_node(self, parent_id, node_id, max_nodes):
if len(self.nodes) >= max_nodes or len(self.nodes[parent_id].children) >= 3:
return None
new_node = Node(node_id)
self.nodes.append(new_node)
self.nodes[parent_id].children.append(new_node)
return new_node
def generate_trees(current_tree, current_id, max_nodes):
if len(current_tree.nodes) == max_nodes:
if is_valid_tree(current_tree):
return [current_tree]
else:
return []
trees = []
for node_id in range(len(current_tree.nodes)):
new_tree = Tree(Node(0))
new_tree.nodes = [Node(i) for i in range(len(current_tree.nodes))]
for i in range(len(current_tree.nodes)):
new_tree.nodes[i].children = [new_tree.nodes[n.id] for n in current_tree.nodes[i].children]
new_node = new_tree.add_node(node_id, current_id, max_nodes)
if new_node and is_valid_tree(new_tree):
trees.extend(generate_trees(new_tree, current_id + 1, max_nodes))
return trees
def is_valid_tree(tree):
for node in tree.nodes:
children_counts = [len(child.children) for child in node.children]
if any(children_counts[i] > children_counts[i+1] for i in range(len(children_counts)-1)):
return False
return True
def visualize_organic_structures(trees):
mols = []
for tree in trees:
mol = Chem.RWMol()
atom_index = {}
for node in tree.nodes:
if node.id == 0:
atom = Chem.Atom(6)
atom.SetNumRadicalElectrons(1) # Set radical electron for the root
else:
atom = Chem.Atom(6)
atom_index[node.id] = mol.AddAtom(atom)
for node in tree.nodes:
for child in node.children:
mol.AddBond(atom_index[node.id], atom_index[child.id], Chem.BondType.SINGLE)
mols.append(mol)
return mols
# Input the total number of nodes
n_nodes = 4
# Start with a single root node
initial_tree = Tree(Node(0))
all_trees = generate_trees(initial_tree, 1, n_nodes)
all_mols = visualize_organic_structures(all_trees)
# Visualization
img = Draw.MolsToGridImage(all_mols, molsPerRow=5, subImgSize=(200, 200), useSVG=False)
display(img)
①
n_carbons = 3
: Total 2 sets
②
n_carbons = 4
: Total 4 sets
③
n_carbons = 5
: Total 8 sets
④
n_carbons = 6
: Total 17 sets
3. Amino Acid
⑴ A function of converting a sequence into an amino acid sequence
from Bio.Seq import Seq
# Assuming 'sequence' is your string of nucleotides:
sequence = "ACTCATTCTCCCCAGACGCCAAGGATGGTGGTCATGGCGCCCCGAACCCTCTTCCTGCTGCTCTCGGGGGCCCTGACCCTGACCGAGACCTGGGCGG" # truncated for brevity
# Create a sequence object
seq_obj = Seq(sequence)
# Translate the sequence
amino_acid_sequence = seq_obj.translate(to_stop=True)
# Print the amino acid sequence
print(amino_acid_sequence)
⑵ Function to generate PDB files regarding amino acid structure from amino acid sequence using AlphaFold2
4. Spectroscopy
⑴ Overview
① Research on predicting MS, IR, and NMR spectra from chemical formulas or predicting chemical formulas from MS, IR, and NMR data is actively being conducted.
② This research is making significant progress with the advancement of deep learning technology.
○ Example: NMR-TS
Input : 2023.11.30 02:40