Source code for indra.sources.cwms.rdf_processor
from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import logging
import rdflib
from indra.statements import Influence, Agent, Evidence
logger = logging.getLogger(__name__)
prefixes = """
PREFIX role: <http://www.cs.rochester.edu/research/trips/role#>
PREFIX lf: <http://www.cs.rochester.edu/research/trips/LF#>
"""
[docs]class CWMSRDFProcessor(object):
"""This processor extracts INDRA statements from CWMS RDF output.
Parameters
----------
text: str
The source sentence as text
rdf_filename: str
A string containing the RDF output returned by CWMS for that sentence
Attributes
----------
statements: list[indra.statements.Statement]
A list of INDRA statements that were extracted by the processor.
"""
def __init__(self, text, rdf_filename):
self.text = text
# Read in the RDF graph
g = rdflib.Graph()
with open(rdf_filename, 'rb') as f:
logger.info('Started loading graph from %s' % rdf_filename)
g.parse(f, format='application/rdf+xml')
logger.info('Finished loading graph')
self.graph = g
# Extract statements
self.statements = []
self.extract_statements()
[docs] def extract_statement_from_query_result(self, res):
"""Adds a statement based on one element of a rdflib SPARQL query.
Parameters
----------
res: rdflib.query.ResultRow
Element of rdflib SPARQL query result
"""
agent_start, agent_end, affected_start, affected_end = res
# Convert from rdflib literals to python integers so we can use
# them to index strings
agent_start = int(agent_start)
agent_end = int(agent_end)
affected_start = int(affected_start)
affected_end = int(affected_end)
# Find the text corresponding to these indices
agent = self.text[agent_start:agent_end]
affected = self.text[affected_start:affected_end]
# Strip off surrounding whitespace
agent = agent.lstrip().rstrip()
affected = affected.lstrip().rstrip()
# Make an Agent object for both the subject and the object
subj = Agent(agent, db_refs={'TEXT': agent})
obj = Agent(affected, db_refs={'TEXT': affected})
statement = Influence(subj=subj, obj=obj)
# Add the statement to the list of statements
self.statements.append(statement)
[docs] def extract_statements(self):
"""Extracts INDRA statements from the RDF graph via SPARQL queries.
"""
# Look for events that have an AGENT and an AFFECTED, and get the
# start and ending text indices for each.
query = prefixes + """
SELECT
?agent_start
?agent_end
?affected_start
?affected_end
WHERE {
?rel role:AGENT ?agent .
?rel role:AFFECTED ?affected .
?agent lf:start ?agent_start .
?agent lf:end ?agent_end .
?affected lf:start ?affected_start .
?affected lf:end ?affected_end .
}
"""
results = self.graph.query(query)
for res in results:
# Make a statement for each query match
self.extract_statement_from_query_result(res)
# Look for events that have an AGENT and a RESULT, and get the start
# and ending text indices for each.
query = query.replace('role:AFFECTED', 'role:RESULT')
results = self.graph.query(query)
for res in results:
# Make a statement for each query match
self.extract_statement_from_query_result(res)