Source code for lineage.visualization

# The MIT License (MIT)
#
# Copyright (c) 2016 Ryan Dale
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""Chromosome plotting functions.

Notes
-----
Adapted from Ryan Dale's GitHub Gist for plotting chromosome features. [#Dale]_

References
----------
.. [#Dale] Ryan Dale, GitHub Gist,
   https://gist.github.com/daler/c98fc410282d7570efc3#file-ideograms-py

"""

import logging
import os

import matplotlib
import numpy as np
import pandas as pd
from atomicwrites import atomic_write
from matplotlib import patches
from matplotlib import pyplot as plt

matplotlib.use("Agg")

logger = logging.getLogger(__name__)


[docs] def plot_chromosomes(one_chrom_match, two_chrom_match, cytobands, path, title, build): """Plots chromosomes with designated markers. Parameters ---------- one_chrom_match : pandas.DataFrame segments to highlight on the chromosomes representing one shared chromosome two_chrom_match : pandas.DataFrame segments to highlight on the chromosomes representing two shared chromosomes cytobands : pandas.DataFrame cytobands table loaded with Resources path : str path to destination `.png` file title : str title for plot build : {37} human genome build """ # Height of each chromosome chrom_height = 1.25 # Spacing between consecutive chromosomes chrom_spacing = 1 # Decide which chromosomes to use chromosome_list = ["chr%s" % i for i in range(1, 23)] chromosome_list.append("chrY") chromosome_list.append("chrX") # Keep track of the y positions for chromosomes, and the center of each chromosome # (which is where we'll put the ytick labels) ybase = 0 chrom_ybase = {} chrom_centers = {} # Iterate in reverse so that items in the beginning of `chromosome_list` will # appear at the top of the plot for chrom in chromosome_list[::-1]: chrom_ybase[chrom] = ybase chrom_centers[chrom] = ybase + chrom_height / 2.0 ybase += chrom_height + chrom_spacing # Colors for different chromosome stains color_lookup = { "gneg": (202 / 255, 202 / 255, 202 / 255), # background "one_chrom": (0 / 255, 176 / 255, 240 / 255), "two_chrom": (66 / 255, 69 / 255, 121 / 255), "centromere": (1, 1, 1, 0.6), } df = _patch_chromosomal_features(cytobands, one_chrom_match, two_chrom_match) # Add a new column for colors df["colors"] = df["gie_stain"].apply(lambda x: color_lookup[x]) # Width, height (in inches) figsize = (6.5, 9) fig = plt.figure(figsize=figsize) ax = fig.add_subplot(111) # Now all we have to do is call our function for the chromosome data... for collection in _chromosome_collections(df, chrom_ybase, chrom_height): xranges, yrange, colors = collection ax.broken_barh(xranges, yrange, facecolors=colors) # Axes tweaking ax.set_yticks([chrom_centers[i] for i in chromosome_list]) ax.set_yticklabels(chromosome_list) ax.margins(0.01) ax.axis("tight") handles = [] # setup legend if len(one_chrom_match) > 0: one_chrom_patch = patches.Patch( color=color_lookup["one_chrom"], label="One chromosome shared" ) handles.append(one_chrom_patch) if len(two_chrom_match) > 0: two_chrom_patch = patches.Patch( color=color_lookup["two_chrom"], label="Two chromosomes shared" ) handles.append(two_chrom_patch) no_match_patch = patches.Patch(color=color_lookup["gneg"], label="No shared DNA") handles.append(no_match_patch) centromere_patch = patches.Patch( color=(234 / 255, 234 / 255, 234 / 255), label="Centromere" ) handles.append(centromere_patch) plt.legend(handles=handles, loc="lower right", bbox_to_anchor=(0.95, 0.05)) ax.set_title(title, fontsize=14, fontweight="bold") plt.xlabel("Build " + str(build) + " Chromosome Position", fontsize=10) logger.info("Saving {}".format(os.path.relpath(path))) plt.tight_layout() with atomic_write(path, mode="wb", overwrite=True) as f: plt.savefig(f)
def _chromosome_collections(df, y_positions, height, **kwargs): """ Yields data for features that can be added to an Axes object using ax.broken_barh. Parameters ---------- df : pandas.DataFrame Must at least have columns ['chrom', 'start', 'end', 'color']. If no column 'width', it will be calculated from start/end. y_positions : dict Keys are chromosomes, values are y-value at which to anchor the bars height : float Height of each bar Additional kwargs are passed to ax.broken_barh """ del_width = False if "width" not in df.columns: del_width = True df["width"] = df["end"] - df["start"] for chrom, group in df.groupby("chrom"): yrange = (y_positions["chr" + chrom], height) xranges = group[["start", "width"]].values colors = group["colors"].values yield xranges, yrange, colors if del_width: del df["width"] def _patch_chromosomal_features(cytobands, one_chrom_match, two_chrom_match): """Highlight positions for each chromosome segment / feature. Parameters ---------- cytobands : pandas.DataFrame cytoband table from UCSC one_chrom_match : pandas.DataFrame segments to highlight on the chromosomes representing one shared chromosome two_chrom_match : pandas.DataFrame segments to highlight on the chromosomes representing two shared chromosomes Returns ------- df : pandas.DataFrame the start and stop positions of particular features on each chromosome """ def concat(df, chrom, start, end, gie_stain): return pd.concat( [ df, pd.DataFrame( { "chrom": [chrom], "start": [start], "end": [end], "gie_stain": [gie_stain], } ), ], ignore_index=True, ) chromosomes = cytobands["chrom"].unique() df = pd.DataFrame() for chromosome in chromosomes: chromosome_length = np.max( cytobands[cytobands["chrom"] == chromosome]["end"].values ) # get all markers for this chromosome one_chrom_match_markers = one_chrom_match.loc[ one_chrom_match["chrom"] == chromosome ] two_chrom_match_markers = two_chrom_match.loc[ two_chrom_match["chrom"] == chromosome ] # background of chromosome df = concat(df, chromosome, 0, chromosome_length, "gneg") # add markers for shared DNA on one chromosome for marker in one_chrom_match_markers.itertuples(): df = concat(df, chromosome, marker.start, marker.end, "one_chrom") # add markers for shared DNA on both chromosomes for marker in two_chrom_match_markers.itertuples(): df = concat(df, chromosome, marker.start, marker.end, "two_chrom") # add centromeres for item in cytobands.loc[ (cytobands["chrom"] == chromosome) & (cytobands["gie_stain"] == "acen") ].itertuples(): df = concat(df, chromosome, item.start, item.end, "centromere") return df