From b7d9a8ffc8cc4a8949229b5bd3ada301878633d9 Mon Sep 17 00:00:00 2001 From: Federico Igne Date: Tue, 24 Nov 2020 11:41:08 +0000 Subject: Rework filtering program generation --- .../uk/ac/ox/cs/rsacomb/FilteringProgram.scala | 64 +--- .../uk/ac/ox/cs/rsacomb/util/RDFoxHelpers.scala | 62 +++- .../ac/ox/cs/rsacomb/FilteringProgramSpecs.scala | 343 ++++----------------- 3 files changed, 129 insertions(+), 340 deletions(-) (limited to 'src') diff --git a/src/main/scala/uk/ac/ox/cs/rsacomb/FilteringProgram.scala b/src/main/scala/uk/ac/ox/cs/rsacomb/FilteringProgram.scala index 055cf2a..b70de66 100644 --- a/src/main/scala/uk/ac/ox/cs/rsacomb/FilteringProgram.scala +++ b/src/main/scala/uk/ac/ox/cs/rsacomb/FilteringProgram.scala @@ -1,36 +1,18 @@ package uk.ac.ox.cs.rsacomb +//import scala.collection.JavaConverters._ import tech.oxfordsemantic.jrdfox.logic.Datatype -import tech.oxfordsemantic.jrdfox.logic.expression.{ - Term, - IRI, - Variable, - Literal, - FunctionCall -} import tech.oxfordsemantic.jrdfox.logic.datalog.{ Rule, TupleTableAtom, - BindAtom, - TupleTableName, - Atom, BodyFormula, Negation } -import tech.oxfordsemantic.jrdfox.logic.sparql.statement.{SelectQuery} -import tech.oxfordsemantic.jrdfox.logic.sparql.pattern.{ - GroupGraphPattern, - ConjunctionPattern, - TriplePattern, - QueryPattern -} - -import scala.collection.JavaConverters._ - +import tech.oxfordsemantic.jrdfox.logic.expression.{Term, Variable} import uk.ac.ox.cs.rsacomb.implicits.RSAAtom -import uk.ac.ox.cs.rsacomb.suffix.{RSASuffix, Forward, Backward} -import uk.ac.ox.cs.rsacomb.util.RSA import uk.ac.ox.cs.rsacomb.sparql.ConjunctiveQuery +import uk.ac.ox.cs.rsacomb.suffix.{Forward, Backward} +import uk.ac.ox.cs.rsacomb.util.{RSA, RDFoxHelpers} /** Factory for [[uk.ac.ox.cs.rsacomb.FilteringProgram FilteringProgram]] */ object FilteringProgram { @@ -102,7 +84,7 @@ class FilteringProgram(query: ConjunctiveQuery, constants: List[Term]) * * @note corresponds to rule 1 in Table 3 in the paper. */ - val r1 = reifyRule(Rule.create(RSA.QM, query.atoms: _*)) + val r1 = Rule.create(RSA.QM, query.atoms: _*) /** Initializes instances of `rsa:Named`. * @@ -325,39 +307,13 @@ class FilteringProgram(query: ConjunctiveQuery, constants: List[Term]) (r1 :: r2 ::: r3a ::: r3b :: r3c :: r4a ::: r4b ::: r4c ::: - // r5c ::: r5b ::: r5a ::: + r5a ::: r5b ::: r5c ::: r6 ::: r7b ::: r7a ::: r8a ::: r8b :: r8c ::: - r9 :: List()) map reifyRule + r9 :: List()) map RDFoxHelpers.reify } - private def reifyAtom(atom: Atom): (Option[BindAtom], List[Atom]) = { - atom match { - case atom: TupleTableAtom => atom.reified - case other => (None, List(other)) - } - } + /** Pretty-print filtering rule */ + override def toString(): String = rules mkString "\n" - private def reifyBodyFormula(formula: BodyFormula): List[BodyFormula] = { - formula match { - case atom: TupleTableAtom => atom.reified._2 - case neg: Negation => { - val (bs, as) = neg.getNegatedAtoms.asScala.toList.map(reifyAtom).unzip - val bind = bs.flatten.map(_.getBoundVariable).asJava - val atoms = as.flatten.asJava - List(Negation.create(bind, atoms)) - } - case other => List(other) - } - } - - private def reifyRule(rule: Rule): Rule = { - val (bs, hs) = rule.getHead.asScala.toList.map(_.reified).unzip - val head: List[TupleTableAtom] = hs.flatten - val bind: List[BodyFormula] = bs.flatten - val body: List[BodyFormula] = - rule.getBody.asScala.toList.map(reifyBodyFormula).flatten - Rule.create(head.asJava, (body ++ bind).asJava) - } - -} // class FilteringProgram +} diff --git a/src/main/scala/uk/ac/ox/cs/rsacomb/util/RDFoxHelpers.scala b/src/main/scala/uk/ac/ox/cs/rsacomb/util/RDFoxHelpers.scala index 0f3a1cf..a05e416 100644 --- a/src/main/scala/uk/ac/ox/cs/rsacomb/util/RDFoxHelpers.scala +++ b/src/main/scala/uk/ac/ox/cs/rsacomb/util/RDFoxHelpers.scala @@ -8,12 +8,28 @@ import tech.oxfordsemantic.jrdfox.client.{ DataStoreConnection } import tech.oxfordsemantic.jrdfox.formats.SPARQLParser -import tech.oxfordsemantic.jrdfox.logic.expression.Resource +import tech.oxfordsemantic.jrdfox.logic.datalog.{ + Rule, + BodyFormula, + Negation, + TupleTableAtom, + TupleTableName +} +import tech.oxfordsemantic.jrdfox.logic.expression.{Resource} import tech.oxfordsemantic.jrdfox.logic.sparql.statement.SelectQuery import uk.ac.ox.cs.rsacomb.suffix.Nth +import uk.ac.ox.cs.rsacomb.implicits.RSAAtom /** A collection of helper methods for RDFox */ -object RDFoxHelpers { +object RDFoxHelpers extends RSAAtom { + + /** Simplify conversion between Java and Scala `List`s */ + import uk.ac.ox.cs.rsacomb.implicits.JavaCollections._ + + /** Extends capabilities of + * [[tech.oxfordsemantic.jrdfox.logic.datalog.TupleTableAtom TupleTableAtom]]. + */ + //import uk.ac.ox.cs.rsacomb.implicits.RSAAtom._ /** Type alias for a collection of answers to a * [[tech.oxfordsemantic.jrdfox.logic.sparql.statement.Query]]. @@ -142,6 +158,48 @@ object RDFoxHelpers { } } + /** Reify a [[tech.oxfordsemantic.jrdfox.logic.datalog.Rule Rule]]. + * + * This is needed because RDFox supports only predicates of arity 1 + * or 2, but the filtering program uses predicates with higher arity. + * + * @note we can perform a reification of the atoms thanks to the + * built-in `SKOLEM` funtion of RDFox. + */ + def reify(rule: Rule): Rule = { + val (bs, as) = rule.getHead.map(_.reified).unzip + val head: List[TupleTableAtom] = as.flatten + val bind: List[BodyFormula] = bs.flatten + val body: List[BodyFormula] = rule.getBody.map(reify).flatten + Rule.create(head, bind ::: body) + } + + /** Reify a [[tech.oxfordsemantic.jrdfox.logic.datalog.BodyFormula BodyFormula]]. + * + * This is needed because RDFox supports only predicates of arity 1 + * or 2, but the filtering program uses predicates with higher arity. + * + * @note we can perform a reification of the atoms thanks to the + * built-in `SKOLEM` funtion of RDFox. + */ + private def reify(formula: BodyFormula): List[BodyFormula] = { + formula match { + case atom: TupleTableAtom => atom.reified._2 + case neg: Negation => { + val (bs, as) = neg.getNegatedAtoms + .map({ + case a: TupleTableAtom => a.reified + case a => (None, List(a)) + }) + .unzip + val bind = bs.flatten.map(_.getBoundVariable) + val atoms = as.flatten + List(Negation.create(bind, atoms)) + } + case other => List(other) + } + } + /** Close an open connection to RDFox. * * @param server server connection diff --git a/src/test/scala/uk/ac/ox/cs/rsacomb/FilteringProgramSpecs.scala b/src/test/scala/uk/ac/ox/cs/rsacomb/FilteringProgramSpecs.scala index e9a20db..71c9a99 100644 --- a/src/test/scala/uk/ac/ox/cs/rsacomb/FilteringProgramSpecs.scala +++ b/src/test/scala/uk/ac/ox/cs/rsacomb/FilteringProgramSpecs.scala @@ -1,313 +1,88 @@ package rsacomb -import java.io.File -import java.util.{ArrayList => JList} -import org.scalatest.LoneElement -import org.scalatest.Inspectors import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers - -import tech.oxfordsemantic.jrdfox.logic.datalog.TupleTableAtom -import tech.oxfordsemantic.jrdfox.logic.expression.{Variable, IRI} -import tech.oxfordsemantic.jrdfox.logic.sparql.statement.{Query, SelectQuery} -import tech.oxfordsemantic.jrdfox.Prefixes - -import scala.collection.JavaConverters._ - +import tech.oxfordsemantic.jrdfox.logic.expression.IRI import uk.ac.ox.cs.rsacomb.FilteringProgram -import uk.ac.ox.cs.rsacomb.util.RDFoxHelpers +import uk.ac.ox.cs.rsacomb.sparql.ConjunctiveQuery object FilteringProgramSpec { - val prefixes = new Prefixes() - prefixes.declarePrefix( - ":", - "http://slegger.gitlab.io/slegge-obda/ontology/subsurface-exploration#" - ) - prefixes.declarePrefix("rdf:", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") - prefixes.declarePrefix("rdfs:", "http://www.w3.org/2000/01/rdf-schema#") - prefixes.declarePrefix("owl:", "http://www.w3.org/2002/07/owl#") - - // DEBUG: Quick helper functions - def v(v: String): Variable = Variable.create(v) - def c(c: String): IRI = IRI.create(":" + c) - - // QUERY 0 - - val query0 = RDFoxHelpers - .parseSelectQuery(""" - SELECT ?subj - WHERE { - ?subj ?pred ?obj - } - """, prefixes) - .get - - // val query0 = Query.create( - // QueryType.SELECT, - // false, - // List(v("subj")).asJava, - // Atom.rdf(v("subj"), v("pred"), v("obj")) - // ) - - // QUERY 1 - - val query1 = RDFoxHelpers - .parseSelectQuery(""" - SELECT * - WHERE { - ?w a :Wellbore - } - """, prefixes) - .get - - // val query1 = Query.create( - // QueryType.SELECT, - // false, - // List(v("w")).asJava, - // Atom.rdf(v("w"), IRI.RDF_TYPE, c("Wellbore")) - // ) - - // QUERY 2 - - val query2 = RDFoxHelpers - .parseSelectQuery( - """ - SELECT * - WHERE { - ?w a :Wellbore ; - :wellboreDocument ?doc . - ?doc :hasURL ?document_hyperlink - } - """, - prefixes - ) - .get + val constants = + List(IRI.create("_:iri1"), IRI.create("_:iri2"), IRI.create("_:iri3")) - // val query2 = Query.create( - // QueryType.SELECT, - // false, - // List(v("w"), v("doc"), v("document_hyperlink")).asJava, - // Conjunction.create( - // Atom.rdf(v("w"), IRI.RDF_TYPE, c("Wellbore")), - // Atom.rdf(v("w"), c("wellboreDocument"), v("doc")), - // Atom.rdf(v("doc"), c("hasURL"), v("document_hyperlink")) - // ) - // ) + val cq0 = """ + PREFIX : - // QUERY 3 - - val query3 = RDFoxHelpers - .parseSelectQuery( - """ - SELECT ?wellbore ?formation_pressure + SELECT ?X WHERE { - ?w a :Wellbore ; - :name ?wellbore ; - :hasFormationPressure ?fp . - ?fp :valueInStandardUnit ?formation_pressure + ?X a :D ; + :R ?Y . + ?Y :S ?Z . + ?Z a :D . } - """, - prefixes - ) - .get - - // val query3 = Query.create( - // QueryType.SELECT, - // false, - // List(v("wellbore"), v("formation_pressure")).asJava, - // Conjunction.create( - // Atom.rdf(v("w"), IRI.RDF_TYPE, c("Wellbore")), - // Atom.rdf(v("w"), c("name"), v("wellbore")), - // Atom.rdf(v("w"), c("hasFormationPressure"), v("fp")), - // Atom.rdf(v("fp"), c("valueInStandardUnit"), v("formation_pressure")) - // ) - // ) + """ - // QUERY 4 + val cq1 = """ + PREFIX : - val query4 = RDFoxHelpers - .parseSelectQuery( - """ - SELECT * + SELECT * WHERE { - ?w a :Wellbore ; - :hasGeochemicalMeasurement ?measurement . - ?measurement :cgType ?cgtype ; - :peakName ?peakType ; - :peakHeight ?peak_height ; - :peakAmount ?peak_amount + ?X a :D ; + :R ?Y . + ?Y :S ?Z . + ?Z a :D . } - """, - prefixes - ) - .get + """ - // val query4 = Query.create( - // QueryType.SELECT, - // false, - // List( - // v("w"), - // v("measurement"), - // v("cgtype"), - // v("peakType"), - // v("peak_height"), - // v("peak_amount") - // ).asJava, - // Conjunction.create( - // Atom.rdf(v("w"), IRI.RDF_TYPE, c("Wellbore")), - // Atom.rdf(v("w"), c("hasGeochemicalMeasurement"), v("measurement")), - // Atom.rdf(v("measurement"), c("cgType"), v("cgtype")), - // Atom.rdf(v("measurement"), c("peakName"), v("peakType")), - // Atom.rdf(v("measurement"), c("peakHeight"), v("peak_height")), - // Atom.rdf(v("measurement"), c("peakAmount"), v("peak_amount")) - // ) - // ) + val cq2 = """ + PREFIX : - // QUERY 5 - - val query5 = RDFoxHelpers - .parseSelectQuery( - """ - SELECT ?wellbore ?unit_name ?discovery + SELECT ?X WHERE { - ?w a :Wellbore ; - :name ?wellbore ; - :hasWellboreInterval ?c_int ; - :hasWellboreInterval ?f_int . - ?c_int :hasUnit ?c_unit . - ?c_unit :name ?unit_name . - ?f_int a :FluidZone ; - :name ?discovery ; - :overlapsWellboreInterval ?c_int + ?X a :D ; + :R ?Y . + ?Y :S ?Z . + ?Y :T ?W . + ?Z a :D . + ?W a :D } - """, - prefixes - ) - .get - - // val query5 = Query.create( - // QueryType.SELECT, - // false, - // List(v("wellbore"), v("unit_name"), v("discovery")).asJava, - // Conjunction.create( - // Atom.rdf(v("w"), IRI.RDF_TYPE, c("Wellbore")), - // Atom.rdf(v("w"), c("name"), v("wellbore")), - // Atom.rdf(v("w"), c("hasWellboreInterval"), v("c_int")), - // Atom.rdf(v("w"), c("hasWellboreInterval"), v("f_int")), - // Atom.rdf(v("c_int"), c("hasUnit"), v("c_unit")), - // Atom.rdf(v("c_unit"), c("name"), v("unit_name")), - // Atom.rdf(v("f_int"), IRI.RDF_TYPE, c("FluidZone")), - // Atom.rdf(v("f_int"), c("name"), v("discovery")), - // Atom.rdf(v("f_int"), c("overlapsWellboreInterval"), v("c_int")) - // ) - // ) + """ - // QUERY 6 + val bcq0 = """ + PREFIX : - val query6 = RDFoxHelpers - .parseSelectQuery( - """ - SELECT DISTINCT ?wellbore ?content - WHERE { - ?w a :Wellbore ; - :name ?wellbore ; - :hasWellboreInterval ?int . - ?int a :FluidZone ; - :fluidZoneContent ?content + ASK { + ?X a :D ; + :R ?Y . + ?Y :S ?Z . + ?Z a :D . } - """, - prefixes - ) - .get - - // val query6 = Query.create( - // QueryType.SELECT, - // true, - // List(v("wellbore"), v("content")).asJava, - // Conjunction.create( - // Atom.rdf(v("w"), IRI.RDF_TYPE, c("Wellbore")), - // Atom.rdf(v("w"), c("name"), v("wellbore")), - // Atom.rdf(v("w"), c("hasWellboreInterval"), v("int")), - // Atom.rdf(v("int"), IRI.RDF_TYPE, c("FluidZone")), - // Atom.rdf(v("int"), c("fluidZoneContent"), v("content")) - // ) - // ) - - // QUERY 7 - - val query7 = RDFoxHelpers - .parseSelectQuery( - """ - SELECT ?wName ?sample ?porosity ?top_depth_md ?bot_depth_md - WHERE { - ?w a :Wellbore ; - :name ?wName ; - :hasWellboreInterval ?z . - ?z :hasUnit ?u . - ?u :name ?strat_unit_name . - ?wellbore :hasWellboreInterval ?cored_int . - ?c :extractedFrom ?cored_int ; - :hasCoreSample ?sample . - ?sample :hasDepth ?sample_depth . - ?sample_depth - :inWellboreInterval ?z . - ?sample :hasPorosity ?p . - ?p :valueInStandardUnit ?porosity . - ?z :hasTopDepth ?top . - ?top a :MeasuredDepth ; - :valueInStandardUnit ?top_depth_md . - ?z :hasBottomDepth ?bot . - ?bot a :MeasuredDepth ; - :valueInStandardUnit ?bot_depth_md - } - """, - prefixes - ) - .get - - // val query7 = Query.create( - // QueryType.SELECT, - // false, - // List( - // v("wName"), - // v("sample"), - // v("porosity"), - // v("top_depth_md"), - // v("bot_depth_md") - // ).asJava, - // Conjunction.create( - // Atom.rdf(v("w"), IRI.RDF_TYPE, c("Wellbore")), - // Atom.rdf(v("w"), c("name"), v("wName")), - // Atom.rdf(v("w"), c("hasWellboreInterval"), v("z")), - // Atom.rdf(v("z"), c("hasUnit"), v("u")), - // Atom.rdf(v("u"), c("name"), v("strat_unit_name")), - // Atom.rdf(v("wellbore"), c("hasWellboreInterval"), v("cored_int")), - // Atom.rdf(v("c"), c("extractedFrom"), v("cored_int")), - // Atom.rdf(v("c"), c("hasCoreSample"), v("sample")), - // Atom.rdf(v("sample"), c("hasDepth"), v("sample_depth")), - // Atom.rdf(v("sample_depth"), c("inWellboreInterval"), v("z")), - // Atom.rdf(v("sample"), c("hasPorosity"), v("p")), - // Atom.rdf(v("p"), c("valueInStandardUnit"), v("porosity")), - // Atom.rdf(v("z"), c("hasTopDepth"), v("top")), - // Atom.rdf(v("top"), IRI.RDF_TYPE, c("MeasuredDepth")), - // Atom.rdf(v("top"), c("valueInStandardUnit"), v("top_depth_md")), - // Atom.rdf(v("z"), c("hasBottomDepth"), v("bot")), - // Atom.rdf(v("bot"), IRI.RDF_TYPE, c("MeasuredDepth")), - // Atom.rdf(v("bot"), c("valueInStandardUnit"), v("bot_depth_md")) - // ) - // ) - - val queries = - List(query0, query1, query2, query3, query4, query5, query6, query7) + """ } -class FilteringProgramSpec - extends AnyFlatSpec - with Matchers - with LoneElement - with Inspectors { +class FilteringProgramSpec extends AnyFlatSpec with Matchers { import FilteringProgramSpec._ + "CQ 0" should "generate 30 rules" in { + val cq = ConjunctiveQuery(cq0).get + FilteringProgram(cq, constants).rules should have length 30 + } + + "CQ 1" should "generate 15 rules" in { + val cq = ConjunctiveQuery(cq1).get + FilteringProgram(cq, List()).rules should have length 15 + } + + "CQ 2" should "generate 51 rules" in { + val cq = ConjunctiveQuery(cq2).get + FilteringProgram(cq, List()).rules should have length 51 + } + + "BCQ 0" should "generate 46 rules" in { + val cq = ConjunctiveQuery(bcq0).get + FilteringProgram(cq, constants).rules should have length 46 + } + } -- cgit v1.2.3