Source code for indra.databases.lincs_client

from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str

__all__ = ['get_drug_target_data', 'LincsClient', 'load_lincs_csv']

import os
import sys
import json
import logging
import requests
from io import StringIO, BytesIO
from indra.util import read_unicode_csv_fileobj


logger = logging.getLogger(__name__)


LINCS_URL = 'http://lincs.hms.harvard.edu/db'


resources = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                         os.path.pardir, 'resources')
lincs_sm = os.path.join(resources, 'lincs_small_molecules.json')
lincs_prot = os.path.join(resources, 'lincs_proteins.json')


[docs]class LincsClient(object):
    """Client for querying LINCS small molecules and proteins."""
    def __init__(self):
        with open(lincs_sm, 'r') as fh:
            self._sm_data = json.load(fh)
        with open(lincs_prot, 'r') as fh:
            self._prot_data = json.load(fh)

[docs]    def get_small_molecule_name(self, hms_lincs_id):
        """Get the name of a small molecule from the LINCS sm metadata.

        Parameters
        ----------
        hms_lincs_id : str
            The HMS LINCS ID of the small molecule.

        Returns
        -------
        str
            The name of the small molecule.
        """
        entry = self._get_entry_by_id(self._sm_data, hms_lincs_id)
        if not entry:
            return None
        name = entry['Name']
        return name

[docs]    def get_small_molecule_refs(self, hms_lincs_id):
        """Get the id refs of a small molecule from the LINCS sm metadata.

        Parameters
        ----------
        hms_lincs_id : str
            The HMS LINCS ID of the small molecule.

        Returns
        -------
        dict
            A dictionary of references.
        """
        refs = {'HMS-LINCS': hms_lincs_id}

        entry = self._get_entry_by_id(self._sm_data, hms_lincs_id)
        # If there is no entry for this ID
        if not entry:
            return refs

        # If there is an entry then fill up the refs with existing values
        mappings = dict(chembl='ChEMBL ID', chebi='ChEBI ID',
                        pubchem='PubChem CID', lincs='LINCS ID')
        for k, v in mappings.items():
            if entry.get(v):
                refs[k.upper()] = entry.get(v)
        return refs

[docs]    def get_protein_refs(self, hms_lincs_id):
        """Get the refs for a protein from the LINCs protein metadata.

        Parameters
        ----------
        hms_lincs_id : str
            The HMS LINCS ID for the protein

        Returns
        -------
        dict
            A dictionary of protein references.
        """
        # TODO: We could get phosphorylation states from the protein data.
        refs = {'HMS-LINCS': hms_lincs_id}

        entry = self._get_entry_by_id(self._prot_data, hms_lincs_id)
        # If there is no entry for this ID
        if not entry:
            return refs
        mappings = dict(egid='Gene ID', up='UniProt ID')
        for k, v in mappings.items():
            if entry.get(v):
                refs[k.upper()] = entry.get(v)
        return refs

    def _get_entry_by_id(self, resource, hms_lincs_id):
        # This means it's a short ID
        if '-' not in hms_lincs_id:
            keys = [k for k in resource.keys() if k.startswith(hms_lincs_id)]
            if not keys:
                logger.error('Couldn\'t find entry for %s' % hms_lincs_id)
                return None
            entry = resource[keys[0]]
        # This means it's a full ID
        else:
            entry = resource.get(hms_lincs_id)
            if not entry:
                logger.error('Couldn\'t find entry for %s' % hms_lincs_id)
                return None
        return entry


[docs]def get_drug_target_data():
    """Load the csv into a list of dicts containing the LINCS drug target data.

    Returns
    -------
    data : list[dict]
        A list of dicts, each keyed based on the header of the csv, with values
        as the corresponding column values.
    """
    url = LINCS_URL + '/datasets/20000/results'
    return load_lincs_csv(url)


def _build_db_refs(lincs_id, data, **mappings):
    db_refs = {'HMS-LINCS': lincs_id}
    for db_ref, key in mappings.items():
        if data[key]:
            db_refs[db_ref.upper()] = data[key]
    return db_refs


[docs]def load_lincs_csv(url):
    """Helper function to turn csv rows into dicts."""
    resp = requests.get(url, params={'output_type': '.csv'}, timeout=120)
    resp.raise_for_status()
    if sys.version_info[0] < 3:
        csv_io = BytesIO(resp.content)
    else:
        csv_io = StringIO(resp.text)
    data_rows = list(read_unicode_csv_fileobj(csv_io, delimiter=','))
    headers = data_rows[0]
    return [{header: val for header, val in zip(headers, line_elements)}
            for line_elements in data_rows[1:]]