# ═══════════════════════════════════════════════
# STEP 03: ๋ฐ์ธ๋ฉ ์์ธก (Binding Prediction)
# ๋ชจ๋ธ: Graph Convolutional Network (GCN)
# ═══════════════════════════════════════════════
import torch
import torch.nn as nn
import torch_geometric.nn as gnn
from torch_geometric.data import Data
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
# ── 3-1. ๋ถ์ → ๊ทธ๋ํ ๋ณํ ──
def mol_to_graph(smiles: str) -> Data:
"""SMILES ๋ฌธ์์ด์ PyG ๊ทธ๋ํ๋ก ๋ณํ"""
mol = Chem.MolFromSmiles(smiles)
if not mol: return None
# ์์ ํน์ฑ (๋
ธ๋)
atom_features = []
for atom in mol.GetAtoms():
features = [
atom.GetAtomicNum(), # ์์๋ฒํธ
atom.GetDegree(), # ๊ฒฐํฉ ์
atom.GetFormalCharge(), # ํ์ ์ ํ
int(atom.GetIsAromatic()), # ๋ฐฉํฅ์กฑ ์ฌ๋ถ
atom.GetHybridization().real # ํผ์ฑํ
]
atom_features.append(features)
# ๊ฒฐํฉ ์ธ๋ฑ์ค (์ฃ์ง)
edge_index = []
for bond in mol.GetBonds():
i, j = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
edge_index.extend([[i,j],[j,i]])
x = torch.tensor(atom_features, dtype=torch.float)
ei = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
return Data(x=x, edge_index=ei)
# ── 3-2. GCN ๊ฒฐํฉ ์์ธก ๋ชจ๋ธ ──
class BindingGCN(nn.Module):
def __init__(self, node_feat=5, hidden=256):
super().__init__()
self.conv1 = gnn.GCNConv(node_feat, hidden)
self.conv2 = gnn.GCNConv(hidden, hidden)
self.conv3 = gnn.GCNConv(hidden, hidden // 2)
self.pool = gnn.global_mean_pool
self.fc = nn.Sequential(
nn.Linear(hidden // 2, 128),
nn.ReLU(), nn.Dropout(0.2),
nn.Linear(128, 1) # ๊ฒฐํฉ ์๋์ง ์ถ๋ ฅ
)
def forward(self, data):
x, ei, batch = data.x, data.edge_index, data.batch
x = torch.relu(self.conv1(x, ei))
x = torch.relu(self.conv2(x, ei))
x = torch.relu(self.conv3(x, ei))
x = self.pool(x, batch)
return self.fc(x) # ฮG ๊ฒฐํฉ ์์ ์๋์ง ์์ธก
# ── 3-3. ADMET ์์ธก ๋ชจ๋ ──
class ADMETPredictor(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(2048, 512), # Morgan ํ๊ฑฐํ๋ฆฐํธ ์
๋ ฅ
nn.ReLU(), nn.Dropout(0.2),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, 5) # A, D, M, E, T 5๊ฐ ์ถ๋ ฅ
)
def predict(self, smiles: str) -> dict:
mol = Chem.MolFromSmiles(smiles)
fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, 2048)
x = torch.tensor(list(fp), dtype=torch.float).unsqueeze(0)
pred = self.model(x).squeeze().detach().numpy()
return {
'absorption': pred[0],
'distribution': pred[1],
'metabolism': pred[2],
'excretion': pred[3],
'toxicity': pred[4]
}
# ── 3-4. ํตํฉ ์คํฌ๋ฆฌ๋ ํ์ดํ๋ผ์ธ ──
def screen_library(library: list, top_n=100) -> list:
binding_model = BindingGCN()
admet_model = ADMETPredictor()
results = []
for smiles in library:
graph = mol_to_graph(smiles)
binding_dg = binding_model(graph).item()
admet = admet_model.predict(smiles)
if admet['toxicity'] < 0.3: # ๋
์ฑ ํํฐ
results.append({'smiles': smiles, 'dG': binding_dg, 'admet': admet})
return sorted(results, key=lambda x: x['dG'])[:top_n]