Source code for indra.sources.tas.api

from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str

__all__ = ['process_csv']

from os import path
from .processor import TasProcessor
from indra.util import read_unicode_csv


HERE = path.dirname(path.abspath(__file__))
DATAFILE_NAME = 'classification_hms_cmpds_symbol.csv'


def _load_data():
    """Load the data from the csv in data.

    The "gene_id" is the Entrez gene id, and the "approved_symbol" is the
    standard gene symbol. The "hms_id" is the LINCS ID for the drug.

    Returns
    -------
    data : list[dict]
        A list of dicts of row values keyed by the column headers extracted from
        the csv file, described above.
    """
    # Get the cwv reader object.
    csv_path = path.join(HERE, path.pardir, path.pardir, 'resources',
                         DATAFILE_NAME)
    data_iter = list(read_unicode_csv(csv_path))

    # Get the headers.
    headers = data_iter[0]

    # For some reason this heading is oddly formatted and inconsistent with the
    # rest, or with the usual key-style for dicts.
    headers[headers.index('Approved.Symbol')] = 'approved_symbol'
    return [{header: val for header, val in zip(headers, line)}
            for line in data_iter[1:]]


[docs]def process_csv(affinity_class_limit=2):
    """Return a TasProcessor for the contents of the csv contained in data.

    Interactions are classified into the following classes based on affinity:
      | 1  -- Kd < 100nM
      | 2  -- 100nM < Kd < 1uM
      | 3  -- 1uM < Kd < 10uM
      | 10 -- Kd > 10uM
    By default, only classes 1 and 2 are extracted but the affinity_class_limit
    parameter can be used to change the upper limit of extracted classes.

    Parameters
    ----------
    affinity_class_limit : Optional[int]
        Defines the highest class of binding affinity that is included in the
        extractions. Default: 2

    Returns
    -------
    TasProcessor
        A TasProcessor object which has a list of INDRA Statements extracted
        from the CSV file representing drug-target inhibitions in its
        statements attribute.
    """
    return TasProcessor(_load_data(), affinity_class_limit)