21 10 2014 chemoinformatics cytoscape Tweet
RDKItはインストールが面倒くさいのと、実務だとPipelinePIlotとかOpenEye使うだろうからさらっと流すかも。その分iGraphのいじり方を多めに説明するかもしれない。
まだスライドに手をつけてないからわからないけどw
iGraphを使えばノードとエッジに属性を簡単に付加できてgml形式で出力できる。このファイルはすぐにCytoscapeで開くことが出来て便利です。sifとかタブ区切りテキストは色々と面倒ですね。
from rdkit import Chem from rdkit.Chem import AllChem, DataStructs, MolToSmiles import numpy as np from igraph import Graph import numpy as np import logging logging.basicConfig(level=logging.DEBUG) threshold = 0.5 suppl = Chem.SDMolSupplier('syk.sdf') fps = [AllChem.GetMorganFingerprintAsBitVect(mol, 2) for mol in suppl] smiles = [MolToSmiles(mol) for mol in suppl] mat = [] for i in range(len(fps)): mat.append([DataStructs.FingerprintSimilarity(fps[i],fps[j]) for j in range(len(fps))]) sm = np.array(mat) def search_root(sm, remain): cand = None max_c = 0 for l in remain: num_c = len(np.where(sm[l] > threshold)[0].tolist()) if num_c > max_c: max_c = num_c cand = l return cand def check_edge(sm, current, root): cand = None max_c = 0 candidates = set(np.where(sm[root] > threshold)[0].tolist()) for c in candidates: if c in current: if sm[root, c] > max_c: max_c = sm[root, c] cand = c return cand edges = [] sim_edges = [] np.fill_diagonal(sm, 0) remain = set(range(len(sm[0]))) current = set() new = set() ith = 1 while remain: logging.debug("### {} th ###".format(ith)) ith += 1 root = search_root(sm, remain) if root is None: break logging.debug("root: {}".format(root)) cand = check_edge(sm, current, root) if cand: logging.debug("connect {} and {}".format(cand, root)) edges.append((cand, root)) current.remove(cand) sm[:, cand] = 0 remain.remove(root) sm[:, root] = 0 for i in np.where(sm[root] > threshold)[0].tolist(): if i in remain: edges.append((root, i)) sim_edges.append(sm[root, i]) new.add(i) logging.debug("new nodes: {}".format(new)) current |= new remain -= current new = set() g = Graph(edges) g.vs["smiles"] = smiles g.es["similarity"] = sim_edges g.save("test.gml")