import logging
import pandas as pd
from .net import IndraNet
from indra.statements import *
from itertools import permutations
from collections import OrderedDict, defaultdict
logger = logging.getLogger(__name__)
NS_PRIORITY_LIST = (
'FPLX', 'HGNC', 'UP', 'CHEBI', 'GO', 'MESH', 'HMDB', 'PUBCHEM')
[docs]def get_ag_ns_id(ag):
"""Return a tuple of name space, id from an Agent's db_refs."""
for ns in NS_PRIORITY_LIST:
if ns in ag.db_refs:
return ns, ag.db_refs[ns]
return 'TEXT', ag.name
[docs]class IndraNetAssembler():
"""Assembler to create an IndraNet object from a list of INDRA statements.
Parameters
----------
statements : list[indra.statements.Statement]
A list of INDRA Statements to be assembled.
Attributes
----------
model : IndraNet
An IndraNet graph object assembled by this class.
"""
def __init__(self, statements=None):
self.statements = statements if statements else []
self.model = None
[docs] def add_statements(self, stmts):
"""Add INDRA Statements to the assembler's list of statements.
Parameters
----------
stmts : list[indra.statements.Statement]
A list of :py:class:`indra.statements.Statement`
to be added to the statement list of the assembler.
"""
self.statements += stmts
[docs] def make_model(self, exclude_stmts=None, complex_members=3,
graph_type='multi_graph', sign_dict=None,
belief_flattening=None, weight_flattening=None):
"""Assemble an IndraNet graph object.
Parameters
----------
exclude_stmts : list[str]
A list of statement type names to not include in the graph.
complex_members : int
Maximum allowed size of a complex to be included in the graph.
All complexes larger than complex_members will be rejected. For
accepted complexes, all permutations of their members will be added
as edges. Default is `3`.
graph_type : str
Specify the type of graph to assemble. Chose from 'multi_graph'
(default), 'digraph', 'signed'. Default is `multi_graph`.
sign_dict : dict
A dictionary mapping a Statement type to a sign to be used for
the edge. This parameter is only used with the 'signed' option.
See IndraNet.to_signed_graph for more info.
belief_flattening : str or function(networkx.DiGraph, edge)
The method to use when updating the belief for the flattened edge.
If a string is provided, it must be one of the predefined options
'simple_scorer' or 'complementary_belief'.
If a function is provided, it must take the flattened graph 'G'
and an edge 'edge' to perform the belief flattening on and return
a number:
>>> def belief_flattening(G, edge):
... # Return the average belief score of the constituent edges
... all_beliefs = [s['belief']
... for s in G.edges[edge]['statements']]
... return sum(all_beliefs)/len(all_beliefs)
weight_flattening : function(networkx.DiGraph)
A function taking at least the graph G as an argument and
returning G after adding edge weights as an edge attribute to the
flattened edges using the reserved keyword 'weight'.
Example:
>>> def weight_flattening(G):
... # Sets the flattened weight to the average of the
... # inverse source count
... for edge in G.edges:
... w = [1/s['evidence_count']
... for s in G.edges[edge]['statements']]
... G.edges[edge]['weight'] = sum(w)/len(w)
... return G
Returns
-------
model : IndraNet
IndraNet graph object.
"""
df = self.make_df(exclude_stmts, complex_members)
if graph_type == 'multi_graph':
model = IndraNet.from_df(df)
elif graph_type == 'digraph':
model = IndraNet.digraph_from_df(
df=df,
flattening_method=belief_flattening,
weight_mapping=weight_flattening
)
elif graph_type == 'signed':
model = IndraNet.signed_from_df(df, sign_dict=sign_dict,
flattening_method=belief_flattening,
weight_mapping=weight_flattening)
else:
raise TypeError('Have to specify one of \'multi_graph\', '
'\'digraph\' or \'signed\' when providing graph '
'type.')
return model
[docs] def make_df(self, exclude_stmts=None, complex_members=3):
"""Create a dataframe containing information extracted from assembler's
list of statements necessary to build an IndraNet.
Parameters
----------
exclude_stmts : list[str]
A list of statement type names to not include in the dataframe.
complex_members : int
Maximum allowed size of a complex to be included in the
data frame. All complexes larger than complex_members will be
rejected. For accepted complexes, all permutations of their
members will be added as dataframe records. Default is `3`.
Returns
-------
df : pd.DataFrame
Pandas DataFrame object containing information extracted from
statements. It contains the following columns:
*agA_name*
The first Agent's name.
*agA_ns*
The first Agent's identifier namespace as per `db_refs`.
*agA_id*
The first Agent's identifier as per `db_refs`
*ags_ns, agB_name, agB_id*
As above for the second agent. Note that the Agent may be None
(and these fields left empty) if the Statement consists only
of a single Agent (e.g., SelfModification, ActiveForm,
or Translocation statement).
*stmt_type*
Statement type, given by the name of the class
in indra.statements.
*evidence_count*
Number of evidences for the statement.
*stmt_hash*
An unique long integer hash identifying the content of the
statement.
*belief*
The belief score associated with the statement.
*source_counts*
The number of evidences per input source for the statement.
*initial_sign*
The default sign (polarity) associated with the given
statement if the statement type has implied polarity.
To facilitate weighted path finding, the sign is represented
as 0 for positive polarity and 1 for negative polarity.
"""
rows = []
if exclude_stmts:
exclude_types = tuple(
get_statement_by_name(st_type) for st_type in exclude_stmts)
else:
exclude_types = ()
for stmt in self.statements:
# Exclude statements from given exclude list
if isinstance(stmt, exclude_types):
logger.debug('Skipping a statement of a type %s.'
% type(stmt).__name__)
continue
agents = stmt.agent_list()
not_none_agents = [a for a in agents if a is not None]
# Exclude statements with less than 2 agents
if len(not_none_agents) < 2:
continue
# Special handling for Influences and Associations
if isinstance(stmt, (Influence, Association)):
stmt_pol = stmt.overall_polarity()
if stmt_pol == 1:
sign = 0
elif stmt_pol == -1:
sign = 1
else:
sign = None
if isinstance(stmt, Influence):
edges = [(stmt.subj.concept, stmt.obj.concept, sign)]
else:
edges = [(a, b, sign) for a, b in
permutations(not_none_agents, 2)]
# Handle complexes by creating pairs of their
# not-none-agents.
elif isinstance(stmt, Complex):
# Do not add complexes with more members than complex_members
if len(not_none_agents) > complex_members:
logger.debug('Skipping a complex with %d members.'
% len(not_none_agents))
continue
else:
# add every permutation with a neutral polarity
edges = [(a, b, None) for a, b in
permutations(not_none_agents, 2)]
elif isinstance(stmt, Conversion):
edges = []
if stmt.subj:
for obj in stmt.obj_from:
edges.append((stmt.subj, obj, 1))
for obj in stmt.obj_to:
edges.append((stmt.subj, obj, 0))
# This is for any remaining statement type that may not be
# handled above explicitly but somehow has more than two
# not-none-agents at this point
elif len(not_none_agents) > 2:
continue
else:
edges = [(not_none_agents[0], not_none_agents[1], None)]
for (agA, agB, sign) in edges:
agA_ns, agA_id = get_ag_ns_id(agA)
agB_ns, agB_id = get_ag_ns_id(agB)
stmt_type = type(stmt).__name__
row = OrderedDict([
('agA_name', agA.name),
('agB_name', agB.name),
('agA_ns', agA_ns),
('agA_id', agA_id),
('agB_ns', agB_ns),
('agB_id', agB_id),
('stmt_type', stmt_type),
('evidence_count', len(stmt.evidence)),
('stmt_hash', stmt.get_hash(refresh=True)),
('belief', stmt.belief),
('source_counts', _get_source_counts(stmt)),
('initial_sign', sign)])
rows.append(row)
df = pd.DataFrame.from_dict(rows)
df = df.where((pd.notnull(df)), None)
return df
def _get_source_counts(stmt):
source_counts = defaultdict(int)
for ev in stmt.evidence:
source_counts[ev.source_api] += 1
return source_counts