#!/usr/bin/env python3
# coding: utf-8
import os
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import MDAnalysis as mda
from MDAnalysis.analysis import align
from MDAnalysis.analysis import contacts
import MDAnalysis.analysis.pca as pca
from ipywidgets import IntProgress
# Logging
logger = logging.getLogger(__name__)
[docs]
def read_traj(start_pdb, generic_name):
traj_file_list = [f"{generic_name}.dcd"]
# Get part number
part = 1
while os.path.isfile(f"{generic_name}_part_{part + 1}.csv"):
part += 1
logger.info(f" part number = {part}")
for i in range(2, part + 1):
traj_file_list.append(f"{generic_name}_part_{i}.dcd")
md = mda.Universe(start_pdb, traj_file_list)
return md
[docs]
def prepare_traj(md, ref_Sel, compound="fragments"):
prot = md.select_atoms(ref_Sel)
rest = md.select_atoms(f"not ({ref_Sel})")
print()
f = IntProgress(min=0, max=md.trajectory.n_frames) # instantiate the bar
display(f) # display the bar
for ts in md.trajectory:
# print(ts.frame)
# all_sel.unwrap()
protein_center = prot.center_of_geometry(pbc=False)
dim = ts.triclinic_dimensions
box_center = np.sum(dim, axis=0) / 2
# print(f'Box: {box_center} Prot: {protein_center}')
md.atoms.translate(box_center - protein_center)
rest.wrap(compound=compound)
if ts.frame % 100 == 0:
f.value += 100
[docs]
def align_traj(md, ref, ref_Sel, tol_mass=0.1):
alignment = align.AlignTraj(
md, ref, select=f"{ref_Sel}", in_memory=True, verbose=True, tol_mass=tol_mass
)
_ = alignment.run()
[docs]
def compute_pca(md, ref, sel="backbone", cum_var=0.8):
prot_pca = pca.PCA(md, select=sel)
logger.info("Compute PCA")
prot_pca.run(verbose=True)
# Get components defining cum_var % of variance
n_pcs = max(2, np.where(prot_pca.cumulated_variance > cum_var)[0][0])
logger.info(prot_pca.cumulated_variance)
variance_pd = pd.DataFrame(prot_pca.cumulated_variance, columns=["variance"])
variance_pd["PC"] = range(1, len(prot_pca.cumulated_variance) + 1)
variance_pd["PC"] = variance_pd["PC"]
plt.figure()
ax = sns.barplot(data=variance_pd, x="PC", y="variance")
plt.xlim(-0.5, 20)
plt.ylim(0, 1.0)
atomgroup = md.select_atoms(sel)
pca_space = prot_pca.transform(atomgroup, n_components=n_pcs)
col_names = ["PC_{}".format(i + 1) for i in range(n_pcs)]
pca_df = pd.DataFrame(pca_space, columns=col_names)
if ref is not None:
logger.info("Compute PCA for ref structure")
ref_atomgroup = ref.select_atoms(sel)
pca_space_ref = prot_pca.transform(ref_atomgroup, n_components=n_pcs)
pca_ref_df = pd.DataFrame(pca_space_ref, columns=col_names)
else:
pca_ref_df = None
return (prot_pca, pca_df, pca_ref_df)