Source code for cobamp.analysis.frequency

from collections import Counter
from itertools import combinations, chain

import pandas as pd

from cobamp.analysis.plotting import display_heatmap


[docs]def get_frequency_dataframe(pathway_dict, k_min=1, k_max=1): def _get_possible_combinations(pathway): return list( chain(*[[' '.join(list(frozenset(c))) for c in combinations(pathway, k)] for k in range(k_min, k_max + 1)])) def _get_reaction_frequencies(pathways): c = Counter() for pathway in pathways: c.update(_get_possible_combinations(pathway)) return c return pd.DataFrame( {ident: _get_reaction_frequencies(pathways) for ident, pathways in pathway_dict.items()})
if __name__ == '__main__': n_reactions = 30 efm_size_range = (1, 20) efm_number = 20 efm_group_number = 5 def generate_random_efms(n_reactions, efm_size_range, efm_number, efm_group_number): from random import randint def random_slightly_readable_string_generator(length): s = "" vwls = ['a', 'e', 'i', 'o', 'u'] vwl_flag = bool(randint(0, 1)) while len(s) < length: some_char = chr(randint(97, 122)) if (some_char in vwls and vwl_flag) or (some_char not in vwls and not vwl_flag): s += some_char vwl_flag = not vwl_flag return s reaction_names = [random_slightly_readable_string_generator(randint(4, 10)) for _ in range(n_reactions)] group_names = [random_slightly_readable_string_generator(randint(10, 15)) for _ in range(n_reactions)] efm_groups = { group_names[j]: [set([reaction_names[randint(0, n_reactions - 1)] for i in range(randint(*efm_size_range))]) for _ in range(efm_number)] for j in range(efm_group_number)} return reaction_names, group_names, efm_groups reaction_names, group_names, efm_groups = generate_random_efms(n_reactions, efm_size_range, efm_number, efm_group_number) df = get_frequency_dataframe(efm_groups) display_heatmap(df)