src/biotoolbox/contact_map_generator.py (100 lines of code) (raw):
#!/usr/bin/env python
# encoding: utf-8
'''
*Copyright (c) 2023, Alibaba Group;
*Licensed under the Apache License, Version 2.0 (the "License");
*you may not use this file except in compliance with the License.
*You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*Unless required by applicable law or agreed to in writing, software
*distributed under the License is distributed on an "AS IS" BASIS,
*WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*See the License for the specific language governing permissions and
*limitations under the License.
@author: Hey
@email: sanyuan.**@**.com
@tel: 137****6540
@datetime: 2022/12/21 17:55
@project: DeepProtFunc
@file: contact_map_generator
@desc: Contact Map generator
'''
import logging
import math
from pathlib import Path
import numpy as np
from gemmi import Position, cif
class ContactMap(object):
"""
The methods in this class parse an mmCIF file, calculate pairwise C-alpha
distances per chains and saves the distances as numpy .npy matrices
"""
def __init__(self, input_path, output_path, chain, c_atom_type):
"""
:param input_path: String; path to a single mmCIF file
:param output_path: String; path to directory to save the .npy files
"""
self.input_path = input_path
self.output_path = output_path
self.distances = {}
self.chain = chain
self.c_atom_type = c_atom_type
def check_atom(self, atom):
"""
Checks if an atom
:param atom:
:return: Boolean
"""
return atom["group_PDB"] == "ATOM" and atom["label_atom_id"] == self.c_atom_type and atom["label_asym_id"] == self.chain
def get_atoms(self, mmcif):
"""
Returns a row from the "_atom_site" loop of the mmcif data
:param mmcif: Gemmi mmCIF object
:return: Gemmi mmCIF row
"""
atoms = mmcif.find(
"_atom_site.",
[
"group_PDB",
"label_atom_id",
"label_asym_id",
"Cartn_x",
"Cartn_y",
"Cartn_z",
],
)
return [atom for atom in atoms if atom["label_atom_id"] == self.c_atom_type and atom["label_asym_id"] == self.chain]
def get_position(self, atom):
"""
Returns the x, y, z coordinates of an atom
:param atom: Gemmi mmCIF atom
:return: Gemmi Position object
"""
x = float(atom["Cartn_x"])
y = float(atom["Cartn_y"])
z = float(atom["Cartn_z"])
return Position(x, y, z)
def get_file_name(self, chain_id):
"""
Returns a file name
:param chain_id: String, a PDB chain identifier
:return: String, a file name to save the matrix data
"""
file_name = Path(self.input_path).parts[-1].split(".")[0]
file_name_with_chain = f"{file_name}_{chain_id}_matrix.npy"
return str(Path(self.output_path, file_name_with_chain))
def save_matrices(self):
"""
Save all the matrices per chain to individual .npy files
:return: numpy matrix
"""
for chain_id in self.distances.keys():
dimension = int(math.sqrt(len(self.distances[chain_id])))
distances_matrix = np.array(self.distances[chain_id]).reshape(
dimension, dimension
)
self.save_matrix(self.get_file_name(chain_id), distances_matrix)
def get_matrix(self):
chain_id = self.chain
if chain_id in self.distances:
dimension = int(math.sqrt(len(self.distances[chain_id])))
distances_matrix = np.array(self.distances[chain_id]).reshape(
dimension, dimension
)
return distances_matrix
return None
def save_matrix(self, path, matrix):
"""
Save a single matrix to an .npy file
:param matrix: numpy matrix
:param path: String, path to where the file should be saved
:return: None
"""
with open(path, "wb") as outfile:
np.save(outfile, matrix)
def get_distances(self, mmcif):
"""
Calculate the pairwise distances between two atoms across an mmCIF file
:param mmcif: Gemmi mmCIF object
:return: None
"""
# get all toms
all_atoms = self.get_atoms(mmcif)
for atom_1 in all_atoms:
chain_1, pos_1 = self.get_chain_and_position(atom_1)
if chain_1 and pos_1:
for atom_2 in all_atoms:
chain_2, pos_2 = self.get_chain_and_position(atom_2)
if chain_2 and chain_1 == chain_2:
distance = round(pos_1.dist(pos_2), 2)
self.distances[chain_1].append(distance)
def get_chain_and_position(self, atom):
"""
Get the chain identifier and the x, y, z coordinates of an atom
:return: String, GEMMI Position object
"""
if self.check_atom(atom):
chain = atom["label_asym_id"]
if chain not in self.distances.keys():
self.distances[chain] = []
position = self.get_position(atom)
return chain, position
return None, None
def run(self):
"""
Run the process:
1.) Read a single mmCIF file
2.) Calculate the pairwise distances between C-alpha atoms and C-beta atoms
3.) Save the resulting matrices per chains in .npy format
:return: None
"""
try:
mmcif = cif.read_file(self.input_path).sole_block()
self.get_distances(mmcif)
return self.get_matrix()
# self.save_matrices()
except Exception as e:
logging.error("Error: %s" % e)
return None