Source code for sa.analysis

"""
##########################
analysis (``sa.analysis``)
##########################

This script performs statistical analysis of strains inside plate and in-between plates.

"""
import numpy as np
import sklearn
from collections import defaultdict
from operator import itemgetter
from sklearn.metrics.pairwise import euclidean_distances

import utilities as utils
import plotting
import methods

#For new wild-type ORFs, just add them to this list
WT_ORFs = ["YOR202W"]
    
[docs]def strains_1p_WT(meta, plate, res_path, plot_attr_hist = True): """ Analyze WT strains from one plate. First, filter out MT strains, e.g. retain strains with ORF equal to YOR202W. Continue by standardizing the features and detecting outliers using elliptic envelope method. Detected outliers are saved to file named `<plate-title>_outliers.csv`. Euclidean distances are computed between WT strains and plotted as heat map. Additionally, mean distances between all strains (WT strains without outliers and MT strains) are plotted in a heat map as located on the plate. WT strains are clustered to reveal possible structures and assess their homogeneity. Also, the intersection between clusters and WT outliers is printed to the screen. PCA is computed in explained variance is printed. Standardized plate data are saved in Orange and CSV format. :param meta: Meta data for one plate, `(file_name, attr_names)`, as returned from :mod:`utilities.read`. :type meta: `tuple` :param plate: Plate data as returned from :mod:`utilities.read`. :type plate: `list` :param res_path: Full path to the directory where results are to be saved. :type res_path: `str` .. seealso:: See also functions :func:`sa.methods.standardize`, :func:`sa.plotting.plot_hist_with_norm_fit`, :func:`sa.methods.detect_outliers`, :func:`sa.plotting.plot_plate_by_mean_well_distance`, :func:`sa.methods.k_means`, :func:`sa.methods.outlier2cluster`, :func:`sa.methods.decompose_PCA`, :func:`sa.utilities.to_orange` and :func:`sa.utilities.to_csv`. """ file_name, attrs = meta out_name = res_path + file_name[:-4] dataf = utils.filter_attribute(attrs, plate, attr_name = "ORF", attr_values = WT_ORFs) dnpf = utils.data2np(dataf, skip_first = 5, skip_last = 1) if plot_attr_hist: for i in xrange(dnpf.shape[1]): plotting.plot_hist_with_norm_fit(dnpf[:, i], attrs[i], title = file_name, out_name = out_name + "_" + attrs[i]) dnp = methods.standardize(dnpf) utils.pp_plate_info(meta, plate) #analysis en_out, r_data = methods.detect_outliers(dnp, dataf, out_name = "%s%s_outliers.csv" % (res_path, file_name[:-4]), save = True) r_dataf, r_dnp = r_data #if you have outliers in data set, normalizing data will scale normal data to a very small interval #dnp = methods.normalize(r_dnp) dist_matrix = methods.compute_distance(r_dnp, title = file_name, out_name = out_name, labels = tuple(r_dataf[i][0] for i in range(r_dnp.shape[0]))) wt, mt = utils.split_WT_MT(meta, plate, wt_mt_name = "ORF", wt_name = WT_ORFs) #WT dnp_wt = utils.data2np(wt, skip_first = 5, skip_last = 1) dnp_wt = methods.standardize(dnp_wt) _, (plate_wtr, dnp_wtr) = methods.detect_outliers(dnp_wt, wt, out_name = None, save = False) #MUTANTS dnp_mt = utils.data2np(mt, skip_first = 5, skip_last = 1) dnp_mt = methods.standardize(dnp_mt) dnp_os = np.vstack((dnp_wtr, dnp_mt)) plate_os = plate_wtr + mt dist_matrix_os = euclidean_distances(dnp_os, dnp_os) mw_matrix = plotting.plot_plate_by_mean_well_distance(dist_matrix_os, title = file_name, out_name = out_name, plate = plate_os) pred_kmeans, score_kmeans, trans_kmeans = methods.k_means(dnp, dataf, k_range = range(2,6), out_name_silhouette = out_name, out_dir_predictions = res_path, out_name_predictions = file_name[:-4], save_silhouette = True, save_predictions = True, wt_name = WT_ORFs) methods.outlier2cluster(en_out[0], pred_kmeans) pca_trans = methods.decompose_PCA(r_dnp, r_dataf, n_components = 3, title = file_name, out_name = out_name) #save data used in analysis in Orange format utils.to_orange(attrs, plate_os, dnp_os, out_name = out_name, noncomp_first = 5, noncomp_last = 1) #save data used in analysis in CSV format utils.to_csv(attrs, plate_os, dnp_os, out_name = out_name, noncomp_first = 5, noncomp_last = 1)
[docs]def strains_Np_WT(meta, plates, res_path): """ Analyze WT strains from many plates by combining them in one set. :param meta: Meta data, `[(file_name1, attr_names1), (file_name2 attr_names2) ...]`, as returned from :mod:`utilities.read`. :type meta: `list` :param plates: Plates data as returned from :mod:`utilities.read`. :type plates: `list` :param res_path: Full path to the directory where results are to be saved. :type res_path: `str` .. seealso:: See also functions :func:`sa.methods.standardize`, :func:`sa.methods.detect_outliers`, :func:`sa.methods.k_means`, :func:`sa.methods.outlier2cluster`, :func:`sa.methods.decompose_PCA`, :func:`sa.utilities.to_orange` and :func:`sa.utilities.to_csv`. """ print print "==== Joined standardization and outlier removal for all plates ====" print file_nameL, attrsL = zip(*meta) metaj, platej = utils.combine(meta, plates) file_name, attrs = metaj out_name = res_path + file_name dataf = utils.filter_attribute(attrs, platej, attr_name = "ORF", attr_values = WT_ORFs) dnpf = utils.data2np(dataf, skip_first = 5, skip_last = 1) dnp = methods.standardize(dnpf) #combined analysis as one plate en_out, r_data = methods.detect_outliers(dnp, dataf, out_name = "%scombined_outliers.csv" % res_path, save = True) r_dataf, r_dnp = r_data #if you have outliers in data set, normalizing data will scale normal data to a very small interval #dnp = methods.normalize(dnp) dist_matrix = methods.compute_distance(r_dnp, title = file_name + " (joined std., o.r.)", out_name = out_name + "__joined_std_out", labels = tuple(r_dataf[i][0] for i in range(len(r_dataf)))) pred_kmeans, score_kmeans, trans_kmeans = methods.k_means(dnp, dataf, k_range = range(2,6), out_name_silhouette = out_name, out_dir_predictions = res_path, out_name_predictions = "combined", save_silhouette = True, save_predictions = True, wt_name = WT_ORFs) methods.outlier2cluster(en_out[0], pred_kmeans) pca_trans = methods.decompose_PCA(r_dnp, r_dataf, n_components = 3, title = file_name, out_name = out_name) #separate standardization and outlier removal for each plate print print "==== Separate standardization and outlier removal for each plate ====" print dnp_all, plate_all = [], [] for smeta, splate in zip(meta, plates): wt, mt = utils.split_WT_MT(smeta, splate, wt_mt_name = "ORF", wt_name = WT_ORFs) #WT dnp_wt = utils.data2np(wt, skip_first = 5, skip_last = 1) dnp_wt = methods.standardize(dnp_wt) _, (plate_wtr, dnp_wtr) = methods.detect_outliers(dnp_wt, wt, out_name = None, save = False) #MUTANTS dnp_mt = utils.data2np(mt, skip_first = 5, skip_last = 1) dnp_mt = methods.standardize(dnp_mt) #save plate_all.extend(plate_wtr) plate_all.extend(mt) dnp_all.extend(dnp_wtr) dnp_all.extend(dnp_mt) dist_matrix = methods.compute_distance(np.array(dnp_all), title = file_name + " (per plate std., o.r.)", out_name = out_name + "__separate_std_out", labels = tuple(r_dataf[i][0] for i in range(len(r_dataf)))) #save data used in analysis in Orange format utils.to_orange(attrs, plate_all, np.array(dnp_all), out_name = out_name, noncomp_first = 5, noncomp_last = 1) #save data used in analysis in CSV format utils.to_csv(attrs, plate_all, np.array(dnp_all), out_name = out_name, noncomp_first = 5, noncomp_last = 1)
[docs]def strains_Np_MT(data_del, data_ts, data_sg, repeats_path, res_path, repeats_keys = ["RT", "37"], standardize = True): """ Find mutants with significantly different profiles than wild-type cells by estimating distance between WT strains, MT strains, WT and MT strains and assess significance with permutation test. :param data_del: Deletion collection plates data as returned from :mod:`utilities.read`. :type data_del: `tuple` (meta_data, plates_data) :param data_ts: TS collection plates data as returned from :mod:`utilities.read`. :type data_ts: `tuple` (meta_data, plates_data) :param data_sg: SG collection plates data as returned from :mod:`utilities.read`. :type data_sg: `tuple` (meta_data, plates_data) :param repeats_path: Full path to file with multi-occuring mutant strains specification. :type repeats_path: `str` :param res_path: Full path to the directory where results are to be saved. :type res_path: `str` :param repeats_keys: Names of TS (temperature sensitive mutant strains) plates' extensions. By default these are ["RT", "37"]. :type repeats_keys: `list` :param standardize: Indicator whether to work with standardized or original features. By default, data set is standardized. :type standardize: `bool` .. seealso:: See also functions :func:`sa.plotting.plot_hist_mean_MT_WT_distance`, :func:`sa.plotting.plot_hist_WT_WT_distance`, :func:`sa.plotting.plot_hist_mean_MT_WT__WT_WT_distance` and :func:`sa.plotting.plot_hist_signif_MT_WT`. """ plates_wt, dnp_wtc = [], [] plates_mt, dnp_mtc = [], [] for coll in [data_del, data_ts, data_sg]: for meta, plate in zip(coll[0], coll[1]): wt, mt = utils.split_WT_MT(meta, plate, wt_mt_name = "ORF", wt_name = WT_ORFs) #WT dnp_wt = utils.data2np(wt, skip_first = 5, skip_last = 1) if standardize: dnp_wt = methods.standardize(dnp_wt) _, (plate_wtr, dnp_wtr) = methods.detect_outliers(dnp_wt, wt, out_name = None, save = False) #MUTANTS dnp_mt = utils.data2np(mt, skip_first = 5, skip_last = 1) if standardize: dnp_mt = methods.standardize(dnp_mt) #save plates_wt.extend(plate_wtr) dnp_wtc.extend(dnp_wtr) plates_mt.extend(mt) dnp_mtc.extend(dnp_mt) dnp_mtc = np.array(dnp_mtc) dnp_wtc = np.array(dnp_wtc) print "No. observations of mutant strains: %d" % len(plates_mt) print "No. observations of wild-type strains: %d" % len(plates_wt) #Compute and save Euclidean distances between mutant and wild-type strains to file. print "Computing Euclidean distances between mutant and wild-type strains." dist_mat = euclidean_distances(dnp_mtc, dnp_wtc) #mean distance of each mutant to all wild-type strains m_mt_wt = np.mean(dist_mat, axis = 1) print "Mean distance between mutant strain and wild-type: %5.3f" % np.mean(m_mt_wt) assert len(plates_mt) == len(m_mt_wt), "Dimension mismatch between mutant computational and meta features." #Construct representation of mutant meta data and their distances. Average replication unless from TS collection _, reps = utils.read_repl(repeats_path, repeats_keys) repsd = defaultdict(list) for orf, pn, r, c in reps: repsd[orf].append((pn, r, c)) dstd = {} for i in xrange(len(plates_mt)): pn, date, r, c = plates_mt[i][1:5] orf = plates_mt[i][-1] dstd[(orf, pn, r, c)] = m_mt_wt[i] mutd = defaultdict(list) for i in xrange(len(plates_mt)): orf = plates_mt[i][-1] pn, date, r, c = plates_mt[i][1:5] mutd[orf].append((orf, dstd[(orf, pn, r, c)], (pn, date, r, c))) mut = [] for orf, vals in mutd.iteritems(): if len(vals) == 1: # (orf, mean_dist, [(pn1, date1, r1, c1, dist1), (pn2, date2, r2, c2, dist1), ...]) mut.append((orf, vals[0][1], [vals[0][2]])) else: valsc = [] for val in vals: TS_in = sum([(k in val[2][0]) for k in repeats_keys]) > 0 if TS_in: mut.append((orf, val[1], [val[2]])) else: valsc.append(val) if valsc: mean = np.mean([ el[1] for el in valsc]) mut.append((orf, mean, [(el[2][0], el[2][1], el[2][2], el[2][3], el[1]) for el in valsc])) mut.sort(reverse=True, key = itemgetter(1)) #Save computed distances between mutants and WT to file out_name = res_path + "MT-WT_distance" f = open(out_name + ".csv", "wt") f.write("%s\n" % ",".join(["ORF", "distance", "meta"])) for el in mut: orf, meand, repsm = el f.write("%s\n" % (",".join([orf, str(meand), ",".join(str(el).replace("'", "") for el in repsm) ]))) f.close() print "Mean mutant distances to WT saved to: %s" % out_name #Save processed MT profiles to csv file plates_mt_d = {(p[-1], p[1], p[2], p[3], p[4]): i for i, p in enumerate(plates_mt)} attrs = data_del[0][0][1] data_org, dnp = [], [] for el in mut: orf, _, repsm = el for rep in repsm: pl, dt, r, c = rep[:4] data_org.append(plates_mt[plates_mt_d[(orf, pl, dt, r, c)]]) dnp.append(dnp_mtc[plates_mt_d[(orf, pl, dt, r, c)]]) utils.to_csv(attrs, data_org, np.array(dnp), out_name = out_name, noncomp_first = 5, noncomp_last = 1) dm_wt_wt = euclidean_distances(dnp_wtc, dnp_wtc) wt_wt = np.tril(dm_wt_wt, -1) wt_wt = wt_wt[np.nonzero(wt_wt)] m_wt_wt = [np.mean(np.hstack((dm_wt_wt[i, :i], dm_wt_wt[i, i+1:]))) for i in xrange(dm_wt_wt.shape[0])] #Save computed distances between WT and WT to file out_name = res_path + "WT-WT_distance.csv" f = open(out_name, "wt") f.write("%s\n" % ",".join(["ORF", "plate", "date", "row", "col", "distance"])) wt_sort = [(i, ds) for i, ds in enumerate(m_wt_wt)] wt_sort.sort(reverse = True, key = itemgetter(1)) for i, dist in wt_sort: orf = plates_wt[i][-1] f.write("%s,%5.4f\n" % (",".join([orf] + plates_wt[i][1:5]), dist)) f.close() print "Mean WT distances to WT saved to: %s" % out_name #Save processed MT and WT profiles to csv file mt_wt_sort = [("WT", i, ds) for (i, ds) in wt_sort] + [("MT", orf, mean, lst) for (orf, mean, lst) in mut] mt_wt_sort.sort(reverse = True, key = itemgetter(2)) attrs = data_del[0][0][1] data_org, dnp = [], [] for el in mt_wt_sort: if el[0] == "WT": _, i, ds = el data_org.append(plates_wt[i]) dnp.append(dnp_wtc[i]) elif el[0] == "MT": _, orf, _, repsm = el for rep in repsm: pl, dt, r, c = rep[:4] data_org.append(plates_mt[plates_mt_d[(orf, pl, dt, r, c)]]) dnp.append(dnp_mtc[plates_mt_d[(orf, pl, dt, r, c)]]) utils.to_csv(attrs, data_org, np.array(dnp), out_name = res_path + "WT_MT_by_distance", noncomp_first = 5, noncomp_last = 1) plotting.plot_hist_mean_MT_WT_distance([el[1] for el in mut], res_path) plotting.plot_hist_WT_WT_distance(wt_wt, res_path) plotting.plot_hist_mean_MT_WT__WT_WT_distance([el[1] for el in mut], m_wt_wt, res_path) plotting.plot_hist_signif_MT_WT(mut, plates_mt, dnp_mtc, np.mean([el[1] for el in mut]), res_path)
[docs]def strains_coll(data_del, data_ts, data_sg, res_path): """ Preprocess plates from each collection (standardizing features and remove outlier WT strains) and compute distances between strains from the same and different collections. Histograms of distances between collections are saved to directory :param:`res_path`. :param data_del: Deletion collection plates data as returned from :mod:`utilities.read`. :type data_del: `tuple` (meta_data, plates_data) :param data_ts: TS collection plates data as returned from :mod:`utilities.read`. :type data_ts: `tuple` (meta_data, plates_data) :param data_sg: SG collection plates data as returned from :mod:`utilities.read`. :type data_sg: `tuple` (meta_data, plates_data) :param res_path: Full path to the directory where results are to be saved. :type res_path: `str` .. seealso:: See also function :func:`sa.plotting.plot_hist_coll`. """ dnps = [] for coll_name, coll in [("DEL", data_del), ("TS", data_ts), ("SG", data_sg)]: print "Preprocessing %s collection" % coll_name dnp = [] for meta, plate in zip(coll[0], coll[1]): wt, mt = utils.split_WT_MT(meta, plate, wt_mt_name = "ORF", wt_name = WT_ORFs) #WT dnp_wt = utils.data2np(wt, skip_first = 5, skip_last = 1) dnp_wt = methods.standardize(dnp_wt) _, (plate_wtr, dnp_wtr) = methods.detect_outliers(dnp_wt, wt, out_name = None, save = False) #MUTANTS dnp_mt = utils.data2np(mt, skip_first = 5, skip_last = 1) dnp_mt = methods.standardize(dnp_mt) #save dnp.extend(dnp_wtr) dnp.extend(dnp_mt) dnps.append(np.array(dnp)) plotting.plot_hist_coll([dnps[0], dnps[2], dnps[1]], ["Del", "SG", "TS"], out_dir = res_path) plotting.plot_hist_coll([dnps[2], dnps[0], dnps[1]], ["SG", "Del", "TS"], out_dir = res_path) plotting.plot_hist_coll([dnps[1], dnps[2], dnps[0]], ["TS", "SG", "Del"], out_dir = res_path)
[docs]def strains_repl(data_del, data_ts, data_sg, repeats_path, res_path, repeats_keys = ["RT", "37"]): """ Analyze mutants that occur multiple times in the data set. First standardize data and then analyze distance distribution of replicate observations and all observations. :param data_del: Deletion collection plates data as returned from :mod:`utilities.read`. :type data_del: `tuple` (meta_data, plates_data) :param data_ts: TS collection plates data as returned from :mod:`utilities.read`. :type data_ts: `tuple` (meta_data, plates_data) :param data_sg: SG collection plates data as returned from :mod:`utilities.read`. :type data_sg: `tuple` (meta_data, plates_data) :param repeats_path: Full path to file with multi-occurring mutants specification. :type repeats_path: `str` :param res_path: Full path to the directory where results are to be saved. :type res_path: `str` :param repeats_keys: Names of TS (temperature sensitive mutants) plates' extensions. By default these are ["RT", "37"]. :type repeats_keys: `list` .. seealso:: See also function :func:`sa.methods.analyze_repl`. """ c_plates = [] rc_dnp = [] for _, coll_plates in [data_del, data_ts, data_sg]: for plate in coll_plates: dnp = utils.data2np(plate, skip_first = 5, skip_last = 1) dnp = methods.standardize(dnp) c_plates.extend(plate) rc_dnp.extend(dnp) print "Total number of examples %d" % len(c_plates) c_dnp = np.array(rc_dnp) methods.analyze_repl(repeats_path, c_plates, c_dnp, res_path, keys = repeats_keys, save_dist_mat = False)
[docs]def fss(data_del, data_ts, data_sg, res_path): """ Feature subset selection for unsupervised learning. Feature subset selection (FSS) and clustering based on feature subspace with highest score. A low dimensional representation (MDS) of best clustering is saved to directory :param:`res_path`. :param data_del: Deletion collection plates data as returned from :mod:`utilities.read`. :type data_del: `tuple` (meta_data, plates_data) :param data_ts: TS collection plates data as returned from :mod:`utilities.read`. :type data_ts: `tuple` (meta_data, plates_data) :param data_sg: SG collection plates data as returned from :mod:`utilities.read`. :type data_sg: `tuple` (meta_data, plates_data) :param res_path: Full path to the directory where results are to be saved. :type res_path: `str` .. seealso:: See also functions :func:`sa.methods.fss_wrapper`, :func:`sa.methods.decompose_MDS` and :func:`sa.utilities.std_prep`. """ dnp, plates = utils.std_prep(data_del, data_ts, data_sg, res_path, wt_attr_name = "ORF", wt_name = WT_ORFs) dnp = dnp[:, 1:] attr_names = data_del[0][0][1][6:-1] assert dnp.shape[1] == len(attr_names), "The shapes of attribute space and feature names do not match." pred_best, score_best, attr_best = methods.fss_wrapper(dnp, plates, attr_names, out_dir = res_path) _ = methods.decompose_MDS(dnp, pred_best, out_dir = res_path, save_coordinates = True)
[docs]def fss_post_cluster(data_del, data_ts, data_sg, fss_subset_path, fss_cluster_path, res_path): """ Read clustering predictions and description of feature subspace. Run MDS optimization and save plotted coordinates. :param data_del: Deletion collection plates data as returned from :mod:`utilities.read`. :type data_del: `tuple` (meta_data, plates_data) :param data_ts: TS collection plates data as returned from :mod:`utilities.read`. :type data_ts: `tuple` (meta_data, plates_data) :param data_sg: SG collection plates data as returned from :mod:`utilities.read`. :type data_sg: `tuple` (meta_data, plates_data) :param fss_subset_path: Full path to the file with feature space description as obtained by :mod:`analysis.fss`. :type fss_subset_path: `str` :param fss_cluster_path: Full path to the file with predictions for observations as obtained by :mod:`analysis.fss`. :type fss_cluster_path: `str` :param res_path: Full path to the directory where results are to be saved. :type res_path: `str` .. seealso:: See also functions :func:`sa.methods.decompose_MDS` and :func:`sa.utilities.std_prep`. """ dnp, plates = utils.std_prep(data_del, data_ts, data_sg, res_path, wt_attr_name = "ORF", wt_name = WT_ORFs) dnp = dnp[:, 1:] attr_names = data_del[0][0][1][6:-1] assert dnp.shape[1] == len(attr_names), "The shapes of attribute space and feature names do not match." f_subset = open(fss_subset_path, "r") fs = f_subset.readline().strip().split(",")[1:] f_subset.close() print "Feature subspace size: %d" % len(fs) print "Feature subspace: %s" % ", ".join(fs) f_cluster = open(fss_cluster_path, "r") pred = [line.strip().split(",") for line in f_cluster] f_cluster.close() attr2idx = {attr:i for i, attr in enumerate(attr_names)} fs_idx = [attr2idx[attr] for attr in fs] plt2idx = {(el[1], el[3], el[4]): i for i, el in enumerate(plates)} c_labels = np.ones(dnp.shape[0]) * -1 for el in pred: plt, r, c = el[1], el[3], el[4] if (plt, r, c) in plt2idx: c_labels[plt2idx[(plt, r, c)]] = int(el[5]) dnp_fss = dnp[:, fs_idx] assert dnp_fss.shape[1] == len(fs), "Dimension mismatch." _ = methods.decompose_MDS(dnp_fss, c_labels, out_dir = res_path, save_coordinates = True)
[docs]def strains_Np_novelty_MT(data_del, data_ts, data_sg, res_path): """ Find mutants with significantly different profiles than wild-type cells by novelty detection using one-class SVM and GMM. :param data_del: Deletion collection plates data as returned from :mod:`utilities.read`. :type data_del: `tuple` (meta_data, plates_data) :param data_ts: TS collection plates data as returned from :mod:`utilities.read`. :type data_ts: `tuple` (meta_data, plates_data) :param data_sg: SG collection plates data as returned from :mod:`utilities.read`. :type data_sg: `tuple` (meta_data, plates_data) :param res_path: Full path to the directory where results are to be saved. :type res_path: `str` .. seealso:: See also functions :func:`sa.methods.detect_novelties_SVM` and :func:`sa.methods.detect_novelties_GMM`. """ plates_wt, dnp_wtc = [], [] plates_mt, dnp_mtc = [], [] for coll in [data_del, data_ts, data_sg]: for meta, plate in zip(coll[0], coll[1]): wt, mt = utils.split_WT_MT(meta, plate, wt_mt_name = "ORF", wt_name = WT_ORFs) #WT dnp_wt = utils.data2np(wt, skip_first = 5, skip_last = 1) dnp_wt = methods.standardize(dnp_wt) _, (plate_wtr, dnp_wtr) = methods.detect_outliers(dnp_wt, wt, out_name = None, save = False) #MUTANTS dnp_mt = utils.data2np(mt, skip_first = 5, skip_last = 1) dnp_mt = methods.standardize(dnp_mt) #save plates_wt.extend(plate_wtr) dnp_wtc.extend(dnp_wtr) plates_mt.extend(mt) dnp_mtc.extend(dnp_mt) dnp_mtc = np.array(dnp_mtc) dnp_wtc = np.array(dnp_wtc) print "No. observations of mutant strains: %d" % len(plates_mt) print "No. observations of wild-type strains: %d" % len(plates_wt) methods.detect_novelties_SVM(dnp_wtc, dnp_mtc, plates_wt, plates_mt, res_path, save_visualization = True) methods.detect_novelties_GMM(dnp_wtc, dnp_mtc, plates_wt, plates_mt, res_path, save_visualization = True, wt_name = WT_ORFs)