From 0d8f240c9c0a64f2285324e5a517161e45c698fc Mon Sep 17 00:00:00 2001 From: yzhou Date: Thu, 30 Apr 2015 17:36:35 +0100 Subject: downgrade owl api and reorganised src files --- .../uk/ac/ox/cs/data/AtomicQueryGenerator.java | 86 ++ external/uk/ac/ox/cs/data/Comparator.java | 131 +++ external/uk/ac/ox/cs/data/Fragment.java | 129 +++ external/uk/ac/ox/cs/data/OntologyStatistics.java | 15 + .../uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java | 184 ++++ external/uk/ac/ox/cs/data/QueryFilter.java | 30 + .../uk/ac/ox/cs/data/RemoveDataPropertyRange.java | 52 ++ .../uk/ac/ox/cs/data/RemoveImportInTurtle.java | 77 ++ external/uk/ac/ox/cs/data/WriteIntoTurtle.java | 69 ++ external/uk/ac/ox/cs/data/WriteToNTriple.java | 57 ++ .../cs/data/datatype/DataPropertyEliminator.java | 56 ++ .../uk/ac/ox/cs/data/datatype/DataToObject.java | 932 +++++++++++++++++++++ external/uk/ac/ox/cs/data/dbpedia/DataFilter.java | 68 ++ .../ox/cs/data/dbpedia/DataFilterRDFHandler.java | 116 +++ external/uk/ac/ox/cs/data/dbpedia/Normaliser.java | 155 ++++ external/uk/ac/ox/cs/data/sample/DataSampling.java | 320 +++++++ external/uk/ac/ox/cs/data/sample/RandomWalk.java | 88 ++ .../uk/ac/ox/cs/data/sample/RandomWalkMulti.java | 112 +++ external/uk/ac/ox/cs/data/sample/Sampler.java | 23 + 19 files changed, 2700 insertions(+) create mode 100644 external/uk/ac/ox/cs/data/AtomicQueryGenerator.java create mode 100644 external/uk/ac/ox/cs/data/Comparator.java create mode 100644 external/uk/ac/ox/cs/data/Fragment.java create mode 100644 external/uk/ac/ox/cs/data/OntologyStatistics.java create mode 100644 external/uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java create mode 100644 external/uk/ac/ox/cs/data/QueryFilter.java create mode 100644 external/uk/ac/ox/cs/data/RemoveDataPropertyRange.java create mode 100644 external/uk/ac/ox/cs/data/RemoveImportInTurtle.java create mode 100644 external/uk/ac/ox/cs/data/WriteIntoTurtle.java create mode 100644 external/uk/ac/ox/cs/data/WriteToNTriple.java create mode 100644 external/uk/ac/ox/cs/data/datatype/DataPropertyEliminator.java create mode 100644 external/uk/ac/ox/cs/data/datatype/DataToObject.java create mode 100644 external/uk/ac/ox/cs/data/dbpedia/DataFilter.java create mode 100644 external/uk/ac/ox/cs/data/dbpedia/DataFilterRDFHandler.java create mode 100644 external/uk/ac/ox/cs/data/dbpedia/Normaliser.java create mode 100644 external/uk/ac/ox/cs/data/sample/DataSampling.java create mode 100644 external/uk/ac/ox/cs/data/sample/RandomWalk.java create mode 100644 external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java create mode 100644 external/uk/ac/ox/cs/data/sample/Sampler.java (limited to 'external/uk/ac') diff --git a/external/uk/ac/ox/cs/data/AtomicQueryGenerator.java b/external/uk/ac/ox/cs/data/AtomicQueryGenerator.java new file mode 100644 index 0000000..d271e87 --- /dev/null +++ b/external/uk/ac/ox/cs/data/AtomicQueryGenerator.java @@ -0,0 +1,86 @@ +package uk.ac.ox.cs.data; + +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLObjectProperty; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import uk.ac.ox.cs.pagoda.owl.OWLHelper; +import uk.ac.ox.cs.pagoda.tester.PagodaTester; +import uk.ac.ox.cs.pagoda.util.Utility; + +public class AtomicQueryGenerator { + + public static final String template = //"^[query@ID]" + Utility.LINE_SEPARATOR + + "PREFIX rdf: " + Utility.LINE_SEPARATOR + + "SELECT ?X" + Utility.LINE_SEPARATOR + + "WHERE {" + Utility.LINE_SEPARATOR + + "?X rdf:type <@CLASS>" + Utility.LINE_SEPARATOR + + "}"; + + public static String outputFile = "output/atomic_fly.sparql"; + + public static void main(String[] args) throws Exception { + if (args.length == 0) { +// args = new String[] { "/home/yzhou/backup/20141212/univ-bench-dl-queries.owl"}; + args = new String[] { PagodaTester.onto_dir + "fly/fly-all-in-one_rolledUp.owl"}; +// args = new String[] { PagodaTester.onto_dir + "dbpedia/integratedOntology-all-in-one-minus-datatype.owl" }; +// args = new String[] { PagodaTester.onto_dir + "npd/npd-all-minus-datatype.owl" }; +// args = new String[] { PagodaTester.onto_dir + "bio2rdf/chembl/cco-noDPR.ttl" }; +// args = new String[] { PagodaTester.onto_dir + "bio2rdf/reactome/biopax-level3-processed.owl" }; +// args = new String[] { PagodaTester.onto_dir + "bio2rdf/uniprot/core-processed-noDis.owl" }; + } + +// OWLOntology ontology = OWLHelper.getMergedOntology(args[0], null); +// OWLHelper.correctDataTypeRangeAxioms(ontology); + OWLOntology ontology = OWLHelper.loadOntology(args[0]); + + OWLOntologyManager manager = ontology.getOWLOntologyManager(); + OWLDataFactory factory = manager.getOWLDataFactory(); +// manager.saveOntology(ontology, new FileOutputStream(args[0].replace(".owl", "_owlapi.owl"))); + + if (outputFile != null) + Utility.redirectCurrentOut(outputFile); + + int queryID = 0; + for (OWLClass cls: ontology.getClassesInSignature(true)) { + if (cls.equals(factory.getOWLThing()) || cls.equals(factory.getOWLNothing())) + continue; + if (!cls.toStringID().contains("Query")) continue; + System.out.println("^[Query" + ++queryID + "]"); + System.out.println(template.replace("@CLASS", cls.toStringID())); + System.out.println(); + } + + for (OWLOntology onto: ontology.getImportsClosure()) + for (OWLObjectProperty prop: onto.getObjectPropertiesInSignature()) { +// if (!prop.toStringID().contains("Query")) continue; + System.out.println("^[Query" + ++queryID + "]"); + System.out.println("SELECT ?X ?Y"); + System.out.println("WHERE {"); + System.out.println("?X <" + prop.toStringID() + "> ?Y ."); + System.out.println("}"); + System.out.println(); + } + + String[] answerVars = new String[] {"?X", "?Y"}; + + for (OWLOntology onto: ontology.getImportsClosure()) + for (OWLObjectProperty prop: onto.getObjectPropertiesInSignature()) { +// if (!prop.toStringID().contains("Query")) continue; + for (int i = 0; i < answerVars.length; ++i) { + System.out.println("^[Query" + ++queryID + "]"); + System.out.println("SELECT " + answerVars[i]); + System.out.println("WHERE {"); + System.out.println("?X <" + prop.toStringID() + "> ?Y ."); + System.out.println("}"); + System.out.println(); + } + } + + if (outputFile != null) + Utility.closeCurrentOut(); + } + +} diff --git a/external/uk/ac/ox/cs/data/Comparator.java b/external/uk/ac/ox/cs/data/Comparator.java new file mode 100644 index 0000000..5b61a81 --- /dev/null +++ b/external/uk/ac/ox/cs/data/Comparator.java @@ -0,0 +1,131 @@ +package uk.ac.ox.cs.data; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.util.Collection; +import java.util.HashSet; +import java.util.Scanner; +import java.util.Set; + +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLClassExpression; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEquivalentClassesAxiom; +import org.semanticweb.owlapi.model.OWLOntology; +import uk.ac.ox.cs.pagoda.owl.OWLHelper; +import uk.ac.ox.cs.pagoda.util.Utility; + +public class Comparator { + + public static void main(String[] args) throws IOException { + compareFiles(args); + } + + public static void compareFiles(String[] args) throws IOException { + String directory = "/users/yzhou/workspace/pagoda/"; + String name1 = "abox1.txt", name2 = "abox2.txt"; + + args = (directory + name1 + " " + + directory + name2 + " " + + directory + "diff.dlog").split("\\ "); + + Scanner s1 = new Scanner(new File(args[0])), s2 = new Scanner(new File(args[1])); + HashSet h1 = new HashSet(), h2 = new HashSet(); + while (s1.hasNextLine()) h1.add(s1.nextLine()); + s1.close(); + while (s2.hasNextLine()) h2.add(s2.nextLine().replace("an-minus.owl", "an.owl")); + s2.close(); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[2]))); + + writer.write("Elements in " + name1 + ", but not in " + name2); + writer.newLine(); + for (String line: h1) + if (!h2.contains(line)) { + writer.write(line); + writer.newLine(); + } + + writer.write("--------------------------------------------------------"); + writer.newLine(); + + writer.write("Elements in " + name2 + ", but not in " + name1); + writer.newLine(); + for (String line: h2) + if (!h1.contains(line)) { + writer.write(line); + writer.newLine(); + } + + writer.close(); + } + + + public void compareOntologies(String[] args) throws IOException { + String directory = "/home/scratch/yzhou/ontologies/fly/auxiliary/datalog/"; + String name1 = "eq/elho.owl", name2 = "noEQ/elho.owl"; + + args = (directory + name1 + " " + + directory + name2 + " " + + directory + "diff.owl").split("\\ "); + + OWLOntology o1 = OWLHelper.loadOntology(args[0]); + OWLOntology o2 = OWLHelper.loadOntology(args[1]); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[2]))); + + writer.write("Elements in " + name1 + ", but not in " + name2); + writer.newLine(); + writer.write(compareOntologies(o1, o2)); + + writer.write("--------------------------------------------------------"); + writer.newLine(); + + writer.write("Elements in " + name2 + ", but not in " + name1); + writer.newLine(); + writer.write(compareOntologies(o2, o1)); + + writer.close(); + } + + private static String compareOntologies(OWLOntology o1, OWLOntology o2) { + StringBuilder sb = new StringBuilder(); + + Set axioms = new HashSet(); + OWLDataFactory factory1 = o1.getOWLOntologyManager().getOWLDataFactory(); + OWLDataFactory factory2 = o2.getOWLOntologyManager().getOWLDataFactory(); + + for (OWLAxiom a: o2.getAxioms()) + for (OWLAxiom axiom: process(a, factory2)){ + axioms.add(axiom.toString()); + } + + for (OWLAxiom a: o1.getAxioms()) { + for (OWLAxiom axiom: process(a, factory1)) + if (!axioms.contains(axiom.toString())) + sb.append(axiom.toString()).append(Utility.LINE_SEPARATOR); + } + + return sb.toString(); + } + + private static Collection process(OWLAxiom axiom, OWLDataFactory factory) { + Set axioms = new HashSet(); + OWLEquivalentClassesAxiom equiv; + if (axiom instanceof OWLEquivalentClassesAxiom) { + equiv = (OWLEquivalentClassesAxiom) axiom; + for (OWLClassExpression exp1: equiv.getClassExpressions()) + for (OWLClassExpression exp2: equiv.getClassExpressions()) + if (!exp1.equals(exp2)) + axioms.add(factory.getOWLSubClassOfAxiom(exp1, exp2)); + } + else + axioms.add(axiom); + + return axioms; + } + +} diff --git a/external/uk/ac/ox/cs/data/Fragment.java b/external/uk/ac/ox/cs/data/Fragment.java new file mode 100644 index 0000000..1038a33 --- /dev/null +++ b/external/uk/ac/ox/cs/data/Fragment.java @@ -0,0 +1,129 @@ +package uk.ac.ox.cs.data; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.util.Random; + +import org.openrdf.model.Statement; +import org.openrdf.rio.RDFHandler; +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.turtle.TurtleParser; +import org.openrdf.rio.turtle.TurtleWriter; + +import uk.ac.ox.cs.pagoda.util.Utility; + +public class Fragment { + + private TurtleWriter m_writer; + private FragmentRDFHandler m_handler; + + public Fragment(int fragment, String outFile) { + try { + m_writer = new TurtleWriter(new FileOutputStream(outFile)); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + m_handler = new FragmentRDFHandler(fragment, m_writer); + } + + public void process(String prefix, String fileName) { + FileInputStream istream; + try { + TurtleParser parser = new TurtleParser(); + parser.setRDFHandler(m_handler); + + File f = new File(fileName); + if (f.isDirectory()) + for (String tFileName: f.list()) { + if (tFileName.endsWith(".ttl")) { + parser.parse(istream = new FileInputStream(fileName + Utility.FILE_SEPARATOR + tFileName), prefix); + istream.close(); + } + } + else { + parser.parse(istream = new FileInputStream(fileName), prefix); + istream.close(); + } + } catch (Exception e) { + e.printStackTrace(); + Utility.logError("aoaoaoao ~~~~~"); + return ; + } + Utility.logInfo("DONE"); + } + + public void dispose() { + try { + m_writer.endRDF(); + } catch (RDFHandlerException e) { + e.printStackTrace(); + } + } + + public static void main(String[] args) { + /** + * for ChEMBL + */ + Fragment f = new Fragment(100, "data_01.ttl"); + f.process("http://rdf.ebi.ac.uk/terms/chembl#", "/media/krr-nas-share/Yujiao/ontologies/bio2rdf/chembl/data"); + + /** + * for Reactome + * "http://www.biopax.org/release/biopax-level3.owl#", + "/home/scratch/yzhou/ontologies/bio2rdf/reactome" + "/home/scratch/yzhou/ontologies/bio2rdf/reactome/biopaxrdf", + */ + +// Fragment f = new Fragment(1000, "data_001.ttl"); +// f.process("http://www.biopax.org/release/biopax-level3.owl#", "/media/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data.ttl"); + + f.dispose(); + } + +} + + +class FragmentRDFHandler implements RDFHandler { + + int m_mod; + TurtleWriter m_writer; + Random m_rand = new Random(); + + public FragmentRDFHandler(int mod, TurtleWriter writer) { + m_mod = mod; + m_writer = writer; + } + + @Override + public void endRDF() throws RDFHandlerException { + } + + @Override + public void handleComment(String arg0) throws RDFHandlerException { + m_writer.handleComment(arg0); + Utility.logDebug("handling comment: " + arg0); + } + + @Override + public void handleNamespace(String arg0, String arg1) throws RDFHandlerException { + m_writer.handleNamespace(arg0, arg1); + } + + @Override + public void handleStatement(Statement arg0) throws RDFHandlerException { + if (m_rand.nextInt() % m_mod == 0) + m_writer.handleStatement(arg0); + } + + boolean m_started = false; + + @Override + public void startRDF() throws RDFHandlerException { + if (m_started) return ; + m_started = true; + m_writer.startRDF(); + } + +} \ No newline at end of file diff --git a/external/uk/ac/ox/cs/data/OntologyStatistics.java b/external/uk/ac/ox/cs/data/OntologyStatistics.java new file mode 100644 index 0000000..de40dda --- /dev/null +++ b/external/uk/ac/ox/cs/data/OntologyStatistics.java @@ -0,0 +1,15 @@ +package uk.ac.ox.cs.data; + +import org.semanticweb.owlapi.model.OWLOntology; +import uk.ac.ox.cs.pagoda.owl.OWLHelper; + +public class OntologyStatistics { + + public static void main(String[] args) { + args = ("/home/yzhou/ontologies/uobm/univ-bench-dl-minus.owl").split("\\ "); + + OWLOntology onto = OWLHelper.loadOntology(args[0]); + System.out.println(onto.getTBoxAxioms(true).size() + onto.getRBoxAxioms(true).size()); + } + +} diff --git a/external/uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java b/external/uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java new file mode 100644 index 0000000..10f1ac2 --- /dev/null +++ b/external/uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java @@ -0,0 +1,184 @@ +package uk.ac.ox.cs.data; + +import java.io.*; +import java.util.HashMap; +import java.util.Map; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Node_URI; +import com.hp.hpl.jena.graph.Node_Variable; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.sparql.core.TriplePath; +import com.hp.hpl.jena.sparql.core.Var; +import com.hp.hpl.jena.sparql.syntax.Element; +import com.hp.hpl.jena.sparql.syntax.ElementAssign; +import com.hp.hpl.jena.sparql.syntax.ElementBind; +import com.hp.hpl.jena.sparql.syntax.ElementData; +import com.hp.hpl.jena.sparql.syntax.ElementDataset; +import com.hp.hpl.jena.sparql.syntax.ElementExists; +import com.hp.hpl.jena.sparql.syntax.ElementFilter; +import com.hp.hpl.jena.sparql.syntax.ElementGroup; +import com.hp.hpl.jena.sparql.syntax.ElementMinus; +import com.hp.hpl.jena.sparql.syntax.ElementNamedGraph; +import com.hp.hpl.jena.sparql.syntax.ElementNotExists; +import com.hp.hpl.jena.sparql.syntax.ElementOptional; +import com.hp.hpl.jena.sparql.syntax.ElementPathBlock; +import com.hp.hpl.jena.sparql.syntax.ElementService; +import com.hp.hpl.jena.sparql.syntax.ElementSubQuery; +import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; +import com.hp.hpl.jena.sparql.syntax.ElementUnion; +import com.hp.hpl.jena.sparql.syntax.ElementVisitor; + +import uk.ac.ox.cs.pagoda.query.QueryManager; +import uk.ac.ox.cs.pagoda.util.Namespace; + +public class PrepareQueries4Hydrowl { + + public static void main(String[] args) throws FileNotFoundException { + if (args.length == 0) +// args = new String[] {"/media/krr-nas-share/Yujiao/ontologies/dbpedia/queries/atomic_ground.sparql"}; + args = new String[] {"/home/yzhou/temp/ontologies/reactome/example.sparql"}; +// String fileName = args[0].substring(args[0].lastIndexOf(Utility.FILE_SEPARATOR) + 1); + + PrintStream ps = new PrintStream(new File(args[0].replace(".sparql", "_hydrowl.sparql"))); + if (ps != null) System.setOut(ps); + + StringBuilder sb = new StringBuilder(); + Map vars = new HashMap(); + for (String text: QueryManager.collectQueryTexts(args[0])) { + Query query = QueryFactory.create(text); + for (Var var: query.getProjectVars()) + sb.append(sb.length() == 0 ? "Q(?" : ",?").append(var.getName()); + sb.append(") <- "); + ElementVisitor visitor = new HydrowlGeneratorVisitor(sb); + query.getQueryPattern().visit(visitor); + sb.setLength(sb.length() - 2); + System.out.println(sb); + sb.setLength(0); + vars.clear(); + } + + if (ps != null) ps.close(); + } + +} + +class HydrowlGeneratorVisitor implements ElementVisitor { + + StringBuilder m_text; + + public HydrowlGeneratorVisitor(StringBuilder text) { + m_text = text; + } + + @Override + public void visit(ElementTriplesBlock el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementPathBlock el) { + // TODO Auto-generated method stub + for (TriplePath p: el.getPattern().getList()) { + if (p.getPredicate().getURI().equals(Namespace.RDF_TYPE) && !p.getObject().isVariable()) + m_text.append(p.getObject().getURI()).append("(").append(getURI(p.getSubject())).append("), "); + else + m_text.append(p.getPredicate().getURI()).append("(").append(getURI(p.getSubject())).append(", ").append(getURI(p.getObject())).append("), "); + } + } + + private String getURI(Node node) { + if (node instanceof Node_URI) return node.getURI(); + if (node instanceof Node_Variable) return "?" + node.getName(); + System.out.println("Unknown node: " + node); + return null; + } + + @Override + public void visit(ElementFilter el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementAssign el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementBind el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementUnion el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementOptional el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementGroup el) { + // TODO Auto-generated method stub + for (Element e: el.getElements()) + e.visit(this); + } + + @Override + public void visit(ElementDataset el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementNamedGraph el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementExists el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementNotExists el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementMinus el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementService el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementSubQuery el) { + // TODO Auto-generated method stub + + } + + @Override + public void visit(ElementData el) { + // TODO Auto-generated method stub + + } + +} \ No newline at end of file diff --git a/external/uk/ac/ox/cs/data/QueryFilter.java b/external/uk/ac/ox/cs/data/QueryFilter.java new file mode 100644 index 0000000..36837d5 --- /dev/null +++ b/external/uk/ac/ox/cs/data/QueryFilter.java @@ -0,0 +1,30 @@ +package uk.ac.ox.cs.data; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.Scanner; + +import uk.ac.ox.cs.pagoda.query.QueryManager; + +public class QueryFilter { + + public static void main(String[] args) throws FileNotFoundException { + args = new String[] {"/media/krr-nas-share/Yujiao/ontologies/npd/queries/atomic.sparql", + "/home/yzhou/java-workspace/test-share/results_new/npd/pagoda"}; + Scanner answerReader = new Scanner(new File(args[1])); + int totalNumberOfQueries = 0; + String line, prefix = "The number of answer tuples: "; + int index = 0, length = prefix.length(); + for (String query: QueryManager.collectQueryTexts(args[0])) { + while (!(line = answerReader.nextLine()).startsWith(prefix)); + ++totalNumberOfQueries; +// if (query.contains("?X ?Y")) continue; + if (line.charAt(length) == '0') continue; + System.out.println("^[Query" + ++index + "]"); + System.out.println(query); + } + answerReader.close(); + System.out.println("Total number of queries: " + totalNumberOfQueries); + } + +} diff --git a/external/uk/ac/ox/cs/data/RemoveDataPropertyRange.java b/external/uk/ac/ox/cs/data/RemoveDataPropertyRange.java new file mode 100644 index 0000000..acaa91b --- /dev/null +++ b/external/uk/ac/ox/cs/data/RemoveDataPropertyRange.java @@ -0,0 +1,52 @@ +package uk.ac.ox.cs.data; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLDataPropertyRangeAxiom; +import org.semanticweb.owlapi.model.OWLException; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import uk.ac.ox.cs.pagoda.tester.PagodaTester; +import uk.ac.ox.cs.pagoda.util.Utility; + +public class RemoveDataPropertyRange { + + public static void process(String file) throws OWLException, IOException { + OWLOntologyManager originalManager = OWLManager.createOWLOntologyManager(); + OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); + + OWLOntology originalOntology = originalManager.loadOntologyFromOntologyDocument(new File(file)); + OWLOntology ontology = manager.createOntology(originalOntology.getOntologyID().getOntologyIRI()); + + for (OWLOntology onto: originalOntology.getImportsClosure()) + for (OWLAxiom axiom: onto.getAxioms()) { + if (!(axiom instanceof OWLDataPropertyRangeAxiom)) + manager.addAxiom(ontology, axiom); + } + originalManager.removeOntology(originalOntology); + + String extension = file.substring(file.lastIndexOf(".")); + String fileName = file.substring(file.lastIndexOf(Utility.FILE_SEPARATOR) + 1); + String dest = fileName.replace(extension, "-noDPR.owl"); + manager.saveOntology(ontology, new FileOutputStream(dest)); + System.out.println("The processed ontology is saved in " + dest + " successfully."); + manager.removeOntology(ontology); + } + + public static void main(String[] args) { + try { + process(PagodaTester.chembl_tbox); + } catch (OWLException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + } + +} diff --git a/external/uk/ac/ox/cs/data/RemoveImportInTurtle.java b/external/uk/ac/ox/cs/data/RemoveImportInTurtle.java new file mode 100644 index 0000000..2c0fb00 --- /dev/null +++ b/external/uk/ac/ox/cs/data/RemoveImportInTurtle.java @@ -0,0 +1,77 @@ +package uk.ac.ox.cs.data; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.openrdf.model.Statement; +import org.openrdf.rio.RDFHandler; +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.RDFParseException; +import org.openrdf.rio.turtle.TurtleParser; +import org.openrdf.rio.turtle.TurtleWriter; + +public class RemoveImportInTurtle { + + public static void main(String[] args) throws RDFParseException, RDFHandlerException, IOException { + if (args.length == 0) + args = new String[] { +// "/media/krr-nas-share/Yujiao/ontologies/lubm/data/lubm1.ttl", +// "../trowl/lubm_trowl/lubm1.ttl", +// "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#" + "/media/krr-nas-share/Yujiao/ontologies/npd/data/npd-data-dump-minus-datatype-new.ttl", + "/users/yzhou/temp/npd.ttl", + "http://sws.ifi.uio.no/data/npd-v2/#" + }; + TurtleParser parser = new TurtleParser(); + TurtleWriter writer = new TurtleWriter(new FileOutputStream(new File(args[1]))); + parser.setRDFHandler(new LocalRDFHandler(writer)); + parser.parse(new FileInputStream(new File(args[0])), args[2]); + } + +} + +class LocalRDFHandler implements RDFHandler { + + TurtleWriter m_writer; + + public LocalRDFHandler(TurtleWriter writer) { + m_writer = writer; + } + + @Override + public void startRDF() throws RDFHandlerException { + m_writer.startRDF(); + + } + + @Override + public void endRDF() throws RDFHandlerException { + m_writer.endRDF(); + } + + @Override + public void handleNamespace(String prefix, String uri) + throws RDFHandlerException { + m_writer.handleNamespace(prefix, uri); + + } + + @Override + public void handleStatement(Statement st) throws RDFHandlerException { + if (st.getObject().toString().equals("http://www.w3.org/2002/07/owl#Ontology")) + return ; + if (st.getPredicate().toString().equals("http://www.w3.org/2002/07/owl#imports")) + return ; + m_writer.handleStatement(st); + + } + + @Override + public void handleComment(String comment) throws RDFHandlerException { + m_writer.handleComment(comment); + + } + +} \ No newline at end of file diff --git a/external/uk/ac/ox/cs/data/WriteIntoTurtle.java b/external/uk/ac/ox/cs/data/WriteIntoTurtle.java new file mode 100644 index 0000000..b17e035 --- /dev/null +++ b/external/uk/ac/ox/cs/data/WriteIntoTurtle.java @@ -0,0 +1,69 @@ +package uk.ac.ox.cs.data; + +import org.semanticweb.simpleETL.SimpleETL; + +public class WriteIntoTurtle { + + public void rewriteUOBM(int number) { + rewrite( + "http://semantics.crl.ibm.com/univ-bench-dl.owl#", + "/home/yzhou/krr-nas-share/Yujiao/ontologies/uobm/data/uobm" + number + "_owl", + "/home/yzhou/krr-nas-share/Yujiao/ontologies/uobm/data/uobm" + number + ".ttl" + ); + } + + public void rewriteUOBM15() { + rewriteUOBM(15); + } + + public void rewriteUOBM300() { + rewriteUOBM(300); + } + + public void testUOBM400() { + rewriteUOBM(400); + } + + public void rewriteLUBM(int number) { + rewrite( + "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#", + "/home/yzhou/krr-nas-share/Yujiao/ontologies/lubm/data/lubm" + number + "_owl", + "/home/yzhou/krr-nas-share/Yujiao/ontologies/lubm/data/lubm" + number + ".ttl" + ); + } + + public void testLUBM900() { + rewriteLUBM(900); + } + + public static void main(String[] args) { +// "http://identifiers.org/biomodels.vocabulary#", +// "/home/yzhou/krr-nas-share/Yujiao/BioModels/sbml2rdfall", +// "/users/yzhou/ontologies/biomodels"); + +// "http://www.biopax.org/release/biopax-level3.owl#", +// "/home/scratch/yzhou/ontologies/bio2rdf/reactome/biopaxrdf", +// "/home/scratch/yzhou/ontologies/bio2rdf/reactome" + + new WriteIntoTurtle().rewriteUOBM(20); + +// args = new String[] { +// "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#", +// "/home/yzhou/krr-nas-share/Yujiao/ontologies/lubm/data/lubm400_owl", +// "/home/yzhou/krr-nas-share/Yujiao/ontologies/lubm/data/lubm400.ttl" +// }; +// +// new WriteIntoTurtle().rewrite(args); + } + + public void rewrite(String... args) { + SimpleETL rewriter = new SimpleETL(args[0], args[1], args[2]); + + try { + rewriter.rewrite(); + } catch (Exception e) { + e.printStackTrace(); + } + } + +} diff --git a/external/uk/ac/ox/cs/data/WriteToNTriple.java b/external/uk/ac/ox/cs/data/WriteToNTriple.java new file mode 100644 index 0000000..27e69b9 --- /dev/null +++ b/external/uk/ac/ox/cs/data/WriteToNTriple.java @@ -0,0 +1,57 @@ +package uk.ac.ox.cs.data; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.openrdf.model.Statement; +import org.openrdf.rio.RDFHandler; +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.RDFParseException; +import org.openrdf.rio.RDFParser; +import org.openrdf.rio.RDFWriter; +import org.openrdf.rio.ntriples.NTriplesWriter; +import org.openrdf.rio.turtle.TurtleParser; + + +public class WriteToNTriple { + + public static void main(String... args) throws RDFParseException, RDFHandlerException, IOException { + if (args.length == 0) + args = new String[] {"/media/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data/data.ttl", + "http://www.biopax.org/release/biopax-level3.owl#"}; + + RDFParser parser = new TurtleParser(); + final RDFWriter writer = new NTriplesWriter(new FileOutputStream(args[0].replace(".ttl", ".nt"))); + + parser.setRDFHandler(new RDFHandler() { + + @Override + public void startRDF() throws RDFHandlerException { + writer.startRDF(); + } + + @Override + public void handleStatement(Statement arg0) throws RDFHandlerException { + writer.handleStatement(arg0); + } + + @Override + public void handleNamespace(String arg0, String arg1) throws RDFHandlerException { + writer.handleNamespace(arg0, arg1); + } + + @Override + public void handleComment(String arg0) throws RDFHandlerException { + writer.handleComment(arg0); + } + + @Override + public void endRDF() throws RDFHandlerException { + writer.endRDF(); + } + }); + + parser.parse(new FileInputStream(args[0]), args[1]); + } +} diff --git a/external/uk/ac/ox/cs/data/datatype/DataPropertyEliminator.java b/external/uk/ac/ox/cs/data/datatype/DataPropertyEliminator.java new file mode 100644 index 0000000..95765f3 --- /dev/null +++ b/external/uk/ac/ox/cs/data/datatype/DataPropertyEliminator.java @@ -0,0 +1,56 @@ +package uk.ac.ox.cs.data.datatype; + +import java.io.File; + +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.model.OWLOntologyStorageException; +import uk.ac.ox.cs.pagoda.owl.OWLHelper; + +public class DataPropertyEliminator { + + private static final String FLAG = "-minus-datatype"; + + public static void main(String[] args) { + // for NPD dataset +// args = "/home/yzhou/ontologies/npd/npd-all.owl".split("\\ "); + + args = "/home/yzhou/ontologies/dbpedia/integratedOntology-all-in-one.owl".split("\\ "); + + String file = args[0]; + String newFile = file.replace(".owl", FLAG + ".owl"); + + OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); + OWLOntology onto = OWLHelper.loadOntology(manager, file); + OWLOntology newOntology; + + try { + if (onto.getOntologyID().getOntologyIRI() != null) { + String iri = onto.getOntologyID().getOntologyIRI().toString(); + iri = iri.replace(".owl", FLAG + ".owl"); + newOntology = manager.createOntology(IRI.create(iri)); + } + else newOntology = manager.createOntology(); + + for (OWLOntology o: onto.getImportsClosure()) + for (OWLAxiom axiom: o.getAxioms()) { + if (axiom.getDatatypesInSignature().isEmpty() && axiom.getDataPropertiesInSignature().isEmpty()) { + manager.addAxiom(newOntology, axiom); + } + } + + manager.saveOntology(newOntology, IRI.create(new File(newFile))); + } + catch (OWLOntologyCreationException e) { + e.printStackTrace(); + } catch (OWLOntologyStorageException e) { + e.printStackTrace(); + } + + } + +} \ No newline at end of file diff --git a/external/uk/ac/ox/cs/data/datatype/DataToObject.java b/external/uk/ac/ox/cs/data/datatype/DataToObject.java new file mode 100644 index 0000000..90794fd --- /dev/null +++ b/external/uk/ac/ox/cs/data/datatype/DataToObject.java @@ -0,0 +1,932 @@ +package uk.ac.ox.cs.data.datatype; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.text.Normalizer; +import java.util.HashSet; +import java.util.Set; + +import org.openrdf.model.Resource; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.impl.StatementImpl; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.rio.RDFHandler; +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.RDFParseException; +import org.openrdf.rio.RDFWriter; +import org.openrdf.rio.turtle.TurtleParser; +import org.openrdf.rio.turtle.TurtleWriter; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLAnnotationPropertyDomainAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationPropertyRangeAxiom; +import org.semanticweb.owlapi.model.OWLAnonymousIndividual; +import org.semanticweb.owlapi.model.OWLAsymmetricObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLClassAssertionAxiom; +import org.semanticweb.owlapi.model.OWLClassExpression; +import org.semanticweb.owlapi.model.OWLDataAllValuesFrom; +import org.semanticweb.owlapi.model.OWLDataComplementOf; +import org.semanticweb.owlapi.model.OWLDataExactCardinality; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLDataHasValue; +import org.semanticweb.owlapi.model.OWLDataIntersectionOf; +import org.semanticweb.owlapi.model.OWLDataMaxCardinality; +import org.semanticweb.owlapi.model.OWLDataMinCardinality; +import org.semanticweb.owlapi.model.OWLDataOneOf; +import org.semanticweb.owlapi.model.OWLDataProperty; +import org.semanticweb.owlapi.model.OWLDataPropertyAssertionAxiom; +import org.semanticweb.owlapi.model.OWLDataPropertyDomainAxiom; +import org.semanticweb.owlapi.model.OWLDataPropertyExpression; +import org.semanticweb.owlapi.model.OWLDataPropertyRangeAxiom; +import org.semanticweb.owlapi.model.OWLDataRange; +import org.semanticweb.owlapi.model.OWLDataSomeValuesFrom; +import org.semanticweb.owlapi.model.OWLDataUnionOf; +import org.semanticweb.owlapi.model.OWLDatatype; +import org.semanticweb.owlapi.model.OWLDatatypeDefinitionAxiom; +import org.semanticweb.owlapi.model.OWLDatatypeRestriction; +import org.semanticweb.owlapi.model.OWLDeclarationAxiom; +import org.semanticweb.owlapi.model.OWLDifferentIndividualsAxiom; +import org.semanticweb.owlapi.model.OWLDisjointClassesAxiom; +import org.semanticweb.owlapi.model.OWLDisjointDataPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLDisjointObjectPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLDisjointUnionAxiom; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLEquivalentClassesAxiom; +import org.semanticweb.owlapi.model.OWLEquivalentDataPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLEquivalentObjectPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLFacetRestriction; +import org.semanticweb.owlapi.model.OWLFunctionalDataPropertyAxiom; +import org.semanticweb.owlapi.model.OWLFunctionalObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLHasKeyAxiom; +import org.semanticweb.owlapi.model.OWLIndividual; +import org.semanticweb.owlapi.model.OWLInverseFunctionalObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLInverseObjectPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLIrreflexiveObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLNamedIndividual; +import org.semanticweb.owlapi.model.OWLNegativeDataPropertyAssertionAxiom; +import org.semanticweb.owlapi.model.OWLNegativeObjectPropertyAssertionAxiom; +import org.semanticweb.owlapi.model.OWLObjectAllValuesFrom; +import org.semanticweb.owlapi.model.OWLObjectComplementOf; +import org.semanticweb.owlapi.model.OWLObjectExactCardinality; +import org.semanticweb.owlapi.model.OWLObjectHasSelf; +import org.semanticweb.owlapi.model.OWLObjectHasValue; +import org.semanticweb.owlapi.model.OWLObjectIntersectionOf; +import org.semanticweb.owlapi.model.OWLObjectInverseOf; +import org.semanticweb.owlapi.model.OWLObjectMaxCardinality; +import org.semanticweb.owlapi.model.OWLObjectMinCardinality; +import org.semanticweb.owlapi.model.OWLObjectOneOf; +import org.semanticweb.owlapi.model.OWLObjectProperty; +import org.semanticweb.owlapi.model.OWLObjectPropertyAssertionAxiom; +import org.semanticweb.owlapi.model.OWLObjectPropertyDomainAxiom; +import org.semanticweb.owlapi.model.OWLObjectPropertyExpression; +import org.semanticweb.owlapi.model.OWLObjectPropertyRangeAxiom; +import org.semanticweb.owlapi.model.OWLObjectSomeValuesFrom; +import org.semanticweb.owlapi.model.OWLObjectUnionOf; +import org.semanticweb.owlapi.model.OWLObjectVisitorEx; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.model.OWLOntologyStorageException; +import org.semanticweb.owlapi.model.OWLReflexiveObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLSameIndividualAxiom; +import org.semanticweb.owlapi.model.OWLSubAnnotationPropertyOfAxiom; +import org.semanticweb.owlapi.model.OWLSubClassOfAxiom; +import org.semanticweb.owlapi.model.OWLSubDataPropertyOfAxiom; +import org.semanticweb.owlapi.model.OWLSubObjectPropertyOfAxiom; +import org.semanticweb.owlapi.model.OWLSubPropertyChainOfAxiom; +import org.semanticweb.owlapi.model.OWLSymmetricObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLTransitiveObjectPropertyAxiom; +import org.semanticweb.owlapi.model.SWRLBuiltInAtom; +import org.semanticweb.owlapi.model.SWRLClassAtom; +import org.semanticweb.owlapi.model.SWRLDataPropertyAtom; +import org.semanticweb.owlapi.model.SWRLDataRangeAtom; +import org.semanticweb.owlapi.model.SWRLDifferentIndividualsAtom; +import org.semanticweb.owlapi.model.SWRLIndividualArgument; +import org.semanticweb.owlapi.model.SWRLLiteralArgument; +import org.semanticweb.owlapi.model.SWRLObjectPropertyAtom; +import org.semanticweb.owlapi.model.SWRLRule; +import org.semanticweb.owlapi.model.SWRLSameIndividualAtom; +import org.semanticweb.owlapi.model.SWRLVariable; + +import uk.ac.ox.cs.data.dbpedia.DataFilterRDFHandler; +import uk.ac.ox.cs.pagoda.owl.OWLHelper; +import uk.ac.ox.cs.pagoda.util.Utility; + +public class DataToObject { + + private static final String FLAG = "-replaced"; + public static final String PREFIX_LITERAL = "http://www.datatypevalue.org#"; + + String m_ontoFile, m_dataFile; + String m_newOntoFile, m_newDataFile; + + Set m_dataProperties = new HashSet(); + String m_prefix; + + public DataToObject(String prefix, String ontoFile, String dataFile) { + m_prefix = prefix; + + m_ontoFile = ontoFile; + String ext = m_ontoFile.substring(m_ontoFile.lastIndexOf(".")); + m_newOntoFile = m_ontoFile.replace(ext, FLAG + ext); + + if (dataFile == null || dataFile.isEmpty()) + m_dataFile = m_newDataFile = null; + else { + m_dataFile = dataFile; + m_newDataFile = m_dataFile.replace(".ttl", FLAG + ".ttl"); + } + } + + public static void main(String[] args) { + DataToObject p = new DataToObject( +// "http://dbpedia.org/ontology/", +// "/home/yzhou/ontologies/dbpedia/integratedOntology-all-in-one.owl", +// "/home/yzhou/workspace/payQ/ontologies/dbpedia/dbpedia.ttl"); + + // for NPD dataset +// "http://sws.ifi.uio.no/vocab/npd-all.owl", +// "/home/yzhou/ontologies/npd/npd-all.owl", +// "/home/yzhou/ontologies/npd/data/npd-data-dump-processed.ttl"); + + // for ChEmBL + "http://rdf.ebi.ac.uk/terms/chembl#", + "/home/scratch/yzhou/ontologies/bio2rdf/chembl/cco (copy).ttl", + null); + + p.processOntology(); + Utility.logInfo("Ontology Processing DONE."); + + p.processData(); + Utility.logInfo("Data Processing DONE."); } + + public void setOutputOntologyFile(String file) { + m_newOntoFile = file; + } + + public void setOutputDataFile(String file) { + m_newDataFile = file; + } + + public String processData() { + if (m_dataFile == null) + return null; + + TurtleParser parser = new TurtleParser(); + TurtleWriter writer; + try { + writer = new TurtleWriter(new FileOutputStream(m_newDataFile)); + } catch (FileNotFoundException e) { + e.printStackTrace(); + new File(m_newDataFile).delete(); + return null; + } + + parser.setRDFHandler(new DataToObjectRDFHandler(writer, m_dataProperties)); + try { + parser.parse(new FileInputStream(m_dataFile), m_prefix); + } catch (RDFParseException e) { + e.printStackTrace(); + } catch (RDFHandlerException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + return m_newDataFile; + } + + public String processOntology() { + OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); + OWLOntology newOntology, oldOntology; + oldOntology = OWLHelper.loadOntology(manager, m_ontoFile); + for (OWLDataProperty property: oldOntology.getDataPropertiesInSignature()) + m_dataProperties.add(property.toStringID()); + + DataToObjectVisitor visitor = new DataToObjectVisitor(manager); + newOntology = (OWLOntology) oldOntology.accept(visitor); + + try { + manager.saveOntology(newOntology, IRI.create(new File(m_newOntoFile))); + } catch (OWLOntologyStorageException e) { + e.printStackTrace(); + } + + return m_newOntoFile; + } + + protected class DataToObjectVisitor implements OWLObjectVisitorEx { + + private final OWLOntologyManager m_manager; + private final OWLDataFactory m_factory; + + public DataToObjectVisitor(OWLOntologyManager man) { + m_manager = man; + m_factory = man.getOWLDataFactory(); + } + + private void reportUnsupportedFeature() { + Utility.logError("Unsupported features"); + } + + @Override + public Object visit(OWLDataProperty property) { + return m_factory.getOWLObjectProperty(property.getIRI()); + } + + @Override + public Object visit(OWLObjectOneOf ce) { + return ce; + } + + @Override + public Object visit(OWLDataHasValue node) { + return m_factory.getOWLObjectHasValue( + (OWLObjectPropertyExpression) node.getProperty().accept(this), + (OWLIndividual) node.getValue().accept(this) + ); + } + + @Override + public Object visit(OWLDataSomeValuesFrom node) { + OWLClassExpression exp = null; + try { + exp = m_factory.getOWLObjectSomeValuesFrom( + (OWLObjectPropertyExpression) node.getProperty().accept(this), + (OWLClassExpression) node.getFiller().accept(this) + ); + return exp; + } catch (Exception e) { + e.printStackTrace(); + } + return node; + } + + @Override + public Object visit(OWLDataIntersectionOf node) { + Set exps = new HashSet(); + for (OWLDataRange range: node.getOperands()) + exps.add((OWLClassExpression) range.accept(this)); + + return m_factory.getOWLObjectIntersectionOf(exps); + } + + @Override + public Object visit(OWLSubDataPropertyOfAxiom axiom) { + return m_factory.getOWLSubObjectPropertyOfAxiom( + (OWLObjectPropertyExpression) axiom.getSubProperty().accept(this), + (OWLObjectPropertyExpression) axiom.getSuperProperty().accept(this)); + } + + @Override + public Object visit(OWLEquivalentDataPropertiesAxiom axiom) { + Set props = new HashSet(); + for (OWLDataPropertyExpression dataProperty: axiom.getProperties()) + props.add((OWLObjectPropertyExpression) dataProperty.accept(this)); + return m_factory.getOWLEquivalentObjectPropertiesAxiom(props); + } + + @Override + public Object visit(OWLTransitiveObjectPropertyAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLReflexiveObjectPropertyAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLDataPropertyDomainAxiom axiom) { + return m_factory.getOWLObjectPropertyDomainAxiom( + (OWLObjectPropertyExpression) axiom.getProperty().accept(this), + (OWLClassExpression) axiom.getDomain().accept(this) + ); + } + + @Override + public Object visit(OWLDataPropertyRangeAxiom axiom) { + return m_factory.getOWLObjectPropertyRangeAxiom( + (OWLObjectPropertyExpression) axiom.getProperty().accept(this), + (OWLClassExpression) axiom.getRange().accept(this) + ); + } + + @Override + public Object visit(OWLDataPropertyAssertionAxiom axiom) { + return m_factory.getOWLObjectPropertyAssertionAxiom( + (OWLObjectPropertyExpression) axiom.getProperty().accept(this), + axiom.getSubject(), + (OWLIndividual) axiom.getObject().accept(this) + ); + } + + @Override + public Object visit(OWLNegativeDataPropertyAssertionAxiom axiom) { + return m_factory.getOWLNegativeObjectPropertyAssertionAxiom( + (OWLObjectPropertyExpression) axiom.getProperty().accept(this), + axiom.getSubject(), + (OWLIndividual) axiom.getObject().accept(this) + ); + } + + @Override + public Object visit(OWLNegativeObjectPropertyAssertionAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLFunctionalDataPropertyAxiom axiom) { + return m_factory.getOWLFunctionalObjectPropertyAxiom( + (OWLObjectPropertyExpression) axiom.getProperty().accept(this) + ); + } + + @Override + public Object visit(OWLHasKeyAxiom axiom) { + Set props = new HashSet(axiom.getObjectPropertyExpressions()); + for (OWLDataPropertyExpression dataProperty: axiom.getDataPropertyExpressions()) + props.add((OWLObjectPropertyExpression) dataProperty.accept(this)); + return m_factory.getOWLHasKeyAxiom( + (OWLClassExpression) axiom.getClassExpression().accept(this), + props + ); + } + + + @Override + public Object visit(OWLObjectHasSelf node) { + return node; + } + + + @Override + public Object visit(OWLDataOneOf node) { + Set individuals = new HashSet(); + for (OWLLiteral literal: node.getValues()) + individuals.add((OWLIndividual) literal.accept(this)); + return m_factory.getOWLObjectOneOf(individuals); + } + + + + @Override + public Object visit(OWLSubPropertyChainOfAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLOntology ontology) { + OWLOntology newOntology = null; + try { + if (ontology.getOntologyID().getOntologyIRI() != null) { + String ontologyIRI = ontology.getOntologyID().getOntologyIRI().toString(); + if (ontologyIRI.contains(".owl")) + ontologyIRI = ontologyIRI.replace(".owl", FLAG + ".owl"); + else + ontologyIRI += FLAG; + + newOntology = m_manager.createOntology(IRI.create(ontologyIRI)); + } + else newOntology = m_manager.createOntology(); + + for (OWLOntology onto: ontology.getImportsClosure()) + for (OWLAxiom axiom: onto.getAxioms()) { + OWLAxiom newAxiom = (OWLAxiom) axiom.accept(this); + m_manager.addAxiom(newOntology, newAxiom); + } + + } catch (OWLOntologyCreationException e) { + e.printStackTrace(); + } + + return newOntology; + } + + @Override + public Object visit(OWLSubClassOfAxiom axiom) { + return m_factory.getOWLSubClassOfAxiom( + (OWLClassExpression) axiom.getSubClass().accept(this), + (OWLClassExpression) axiom.getSuperClass().accept(this) + ); + } + + @Override + public Object visit(OWLAsymmetricObjectPropertyAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLDisjointClassesAxiom axiom) { + Set exps = new HashSet(); + for (OWLClassExpression exp: axiom.getClassExpressions()) + exps.add((OWLClassExpression) exp.accept(this)); + return m_factory.getOWLDisjointClassesAxiom(exps); + } + + @Override + public Object visit(OWLObjectPropertyDomainAxiom axiom) { + return m_factory.getOWLObjectPropertyDomainAxiom( + axiom.getProperty(), + (OWLClassExpression) axiom.getDomain().accept(this) + ); + } + + @Override + public Object visit(OWLEquivalentObjectPropertiesAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLDifferentIndividualsAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLDisjointDataPropertiesAxiom axiom) { + Set props = new HashSet(); + for (OWLDataPropertyExpression dataProperty: axiom.getProperties()) + props.add((OWLObjectPropertyExpression) dataProperty.accept(this)); + return m_factory.getOWLDisjointObjectPropertiesAxiom(props); + } + + @Override + public Object visit(OWLDisjointObjectPropertiesAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLObjectPropertyRangeAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLObjectPropertyAssertionAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLFunctionalObjectPropertyAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLSubObjectPropertyOfAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLDisjointUnionAxiom axiom) { + Set exps = new HashSet(); + for (OWLClassExpression exp: axiom.getClassExpressions()) + exps.add((OWLClassExpression) exp.accept(this)); + return m_factory.getOWLDisjointUnionAxiom((OWLClass) axiom.getOWLClass().accept(this), exps); + } + + @Override + public Object visit(OWLDeclarationAxiom axiom) { + OWLEntity entity = axiom.getEntity(); + if (entity instanceof OWLDataProperty) + return m_factory.getOWLDeclarationAxiom(m_factory.getOWLObjectProperty(entity.getIRI())); + else if (entity instanceof OWLDatatype) + return m_factory.getOWLDeclarationAxiom((OWLClass) entity.accept(this)); + else + return axiom; + } + + @Override + public Object visit(OWLAnnotationAssertionAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLSymmetricObjectPropertyAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLClassAssertionAxiom axiom) { + return m_factory.getOWLClassAssertionAxiom( + (OWLClassExpression) axiom.getClassExpression().accept(this), + axiom.getIndividual()); + } + + @Override + public Object visit(OWLEquivalentClassesAxiom axiom) { + Set exps = new HashSet(); + for (OWLClassExpression exp: axiom.getClassExpressions()) + exps.add((OWLClassExpression) exp.accept(this)); + return m_factory.getOWLEquivalentClassesAxiom(exps); + } + + @Override + public Object visit(OWLIrreflexiveObjectPropertyAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLInverseFunctionalObjectPropertyAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLSameIndividualAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLInverseObjectPropertiesAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLDatatypeDefinitionAxiom axiom) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLRule rule) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(OWLSubAnnotationPropertyOfAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLAnnotationPropertyDomainAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLAnnotationPropertyRangeAxiom axiom) { + return axiom; + } + + @Override + public Object visit(OWLClass ce) { + return ce; + } + + @Override + public Object visit(OWLObjectIntersectionOf ce) { + Set exps = new HashSet(); + for (OWLClassExpression exp: ce.getOperands()) + exps.add((OWLClassExpression) exp.accept(this)); + return m_factory.getOWLObjectIntersectionOf(exps); + } + + @Override + public Object visit(OWLObjectUnionOf ce) { + Set exps = new HashSet(); + for (OWLClassExpression exp: ce.getOperands()) + exps.add((OWLClassExpression) exp.accept(this)); + return m_factory.getOWLObjectUnionOf(exps); + } + + @Override + public Object visit(OWLObjectComplementOf ce) { + return m_factory.getOWLObjectComplementOf((OWLClassExpression) ce.getOperand().accept(this)); + } + + @Override + public Object visit(OWLObjectSomeValuesFrom ce) { + return m_factory.getOWLObjectSomeValuesFrom(ce.getProperty(), (OWLClassExpression) ce.getFiller().accept(this)); + } + + @Override + public Object visit(OWLObjectAllValuesFrom ce) { + return m_factory.getOWLObjectAllValuesFrom(ce.getProperty(), (OWLClassExpression) ce.getFiller().accept(this)); + } + + @Override + public Object visit(OWLObjectHasValue ce) { + return ce; + } + + @Override + public Object visit(OWLObjectMinCardinality ce) { + if (ce.getFiller().equals(m_factory.getOWLThing())) + return ce; + else + return m_factory.getOWLObjectMinCardinality( + ce.getCardinality(), + ce.getProperty(), + (OWLClassExpression) ce.getFiller().accept(this) + ); + } + + @Override + public Object visit(OWLObjectExactCardinality ce) { + if (ce.getFiller().equals(m_factory.getOWLThing())) + return ce; + else + return m_factory.getOWLObjectExactCardinality( + ce.getCardinality(), + ce.getProperty(), + (OWLClassExpression) ce.getFiller().accept(this) + ); + } + + @Override + public Object visit(OWLObjectMaxCardinality ce) { + if (ce.getFiller().equals(m_factory.getOWLThing())) + return ce; + else + return m_factory.getOWLObjectMaxCardinality( + ce.getCardinality(), + ce.getProperty(), + (OWLClassExpression) ce.getFiller().accept(this) + ); + } + + @Override + public Object visit(OWLDataAllValuesFrom ce) { + return m_factory.getOWLObjectAllValuesFrom( + (OWLObjectPropertyExpression) ce.getProperty().accept(this), + (OWLClassExpression) ce.getFiller().accept(this) + ); + } + + @Override + public Object visit(OWLDataMinCardinality ce) { + if (ce.getFiller().equals(m_factory.getTopDatatype())) + return m_factory.getOWLObjectMinCardinality( + ce.getCardinality(), + (OWLObjectPropertyExpression) ce.getProperty().accept(this) + ); + else + return m_factory.getOWLObjectMinCardinality( + ce.getCardinality(), + (OWLObjectPropertyExpression) ce.getProperty().accept(this), + (OWLClassExpression) ce.getFiller().accept(this) + ); + } + + @Override + public Object visit(OWLDataExactCardinality ce) { + if (ce.getFiller().equals(m_factory.getTopDatatype())) + return m_factory.getOWLObjectExactCardinality( + ce.getCardinality(), + (OWLObjectPropertyExpression) ce.getProperty().accept(this) + ); + else + return m_factory.getOWLObjectExactCardinality( + ce.getCardinality(), + (OWLObjectPropertyExpression) ce.getProperty().accept(this), + (OWLClassExpression) ce.getFiller().accept(this) + ); + } + + @Override + public Object visit(OWLDataMaxCardinality ce) { + if (ce.getFiller().equals(m_factory.getTopDatatype())) + return m_factory.getOWLObjectMaxCardinality( + ce.getCardinality(), + (OWLObjectPropertyExpression) ce.getProperty().accept(this) + ); + else + return m_factory.getOWLObjectMaxCardinality( + ce.getCardinality(), + (OWLObjectPropertyExpression) ce.getProperty().accept(this), + (OWLClassExpression) ce.getFiller().accept(this) + ); + } + + @Override + public Object visit(OWLDatatype node) { + return m_factory.getOWLClass(node.getIRI()); + } + + @Override + public Object visit(OWLDataComplementOf node) { + return m_factory.getOWLObjectComplementOf( + (OWLClassExpression) node.getDataRange().accept(this) + ); + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLDataVisitorEx#visit(org.semanticweb.owlapi.model.OWLDataUnionOf) + */ + @Override + public Object visit(OWLDataUnionOf node) { + Set exps = new HashSet(); + for (OWLDataRange range: node.getOperands()) + exps.add((OWLClassExpression) range.accept(this)); + return m_factory.getOWLObjectUnionOf(exps); + } + + @Override + public Object visit(OWLDatatypeRestriction node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(OWLLiteral node) { + String name = PREFIX_LITERAL + node.getLiteral() + getTypeTag(node.getDatatype()); + return m_factory.getOWLNamedIndividual(IRI.create(name)); + } + + private String getTypeTag(OWLDatatype datatype) { + if (datatype.isBoolean()) return "_boolean"; + if (datatype.isDouble()) return "_double"; + if (datatype.isFloat()) return "_float"; + if (datatype.isInteger()) return "_integer"; + if (datatype.isRDFPlainLiteral()) return "_plain"; + if (datatype.isString()) return "_string"; + return null; + } + + @Override + public Object visit(OWLFacetRestriction node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(OWLObjectProperty property) { + return property; + } + + @Override + public Object visit(OWLObjectInverseOf property) { + return property; + } + + @Override + public Object visit(OWLNamedIndividual individual) { + return individual; + } + + @Override + public Object visit(OWLAnnotationProperty property) { + return property; + } + + @Override + public Object visit(OWLAnnotation node) { + return node; + } + + @Override + public Object visit(IRI iri) { + return iri; + } + + @Override + public Object visit(OWLAnonymousIndividual individual) { + return individual; + } + + @Override + public Object visit(SWRLClassAtom node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLDataRangeAtom node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLObjectPropertyAtom node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLDataPropertyAtom node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLBuiltInAtom node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLVariable node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLIndividualArgument node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLLiteralArgument node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLSameIndividualAtom node) { + reportUnsupportedFeature(); + return null; + } + + @Override + public Object visit(SWRLDifferentIndividualsAtom node) { + reportUnsupportedFeature(); + return null; + } + } + + protected class DataToObjectRDFHandler implements RDFHandler { + + RDFWriter m_writer; + Set m_properties; + DataToObjectVisitor m_visitor; + + public DataToObjectRDFHandler(TurtleWriter writer, Set dataProperties) { + m_writer = writer; + m_properties = dataProperties; + } + + @Override + public void endRDF() throws RDFHandlerException { + m_writer.endRDF(); + } + + @Override + public void handleComment(String arg0) throws RDFHandlerException { + m_writer.handleComment(arg0); + } + + @Override + public void handleNamespace(String arg0, String arg1) throws RDFHandlerException { + m_writer.handleNamespace(arg0, arg1); + } + + @Override + public void handleStatement(Statement arg0) throws RDFHandlerException { + URI predicate = arg0.getPredicate(); + Resource subject = arg0.getSubject(); + Value object = arg0.getObject(); + + if (subject instanceof URI) { + String newSubject = Normalizer.normalize(arg0.getSubject().toString(), Normalizer.Form.NFKC); + if (!isValidIRI(newSubject)) { + return ; + } + else subject = new URIImpl(newSubject); + } + + if (m_properties.contains(predicate.toString()) || object.toString().contains("\"^^")) { + String newObject = Normalizer.normalize(getIndividual(object.toString()), Normalizer.Form.NFKC); + if (!isValidIRI(newObject)) { + return ; + } + + m_writer.handleStatement(new StatementImpl(subject, predicate, new URIImpl(newObject))); + } + else + m_writer.handleStatement(arg0); + } + + private boolean isValidIRI(String newSubject) { + org.apache.jena.iri.IRI iri; + try { + iri = DataFilterRDFHandler.iriFactory.construct(newSubject); + if (iri.hasViolation(true)) return false; + } catch (org.apache.jena.iri.IRIException e) { + return false; + } + return true; + } + + private String getIndividual(String s) { + if (s.startsWith("_:")) return s; + int left = s.indexOf("\""), right = s.lastIndexOf("\""); + return PREFIX_LITERAL + s.substring(left + 1, right).replace(' ', '-'); + } + + @Override + public void startRDF() throws RDFHandlerException { + m_writer.startRDF(); + } + + } +} diff --git a/external/uk/ac/ox/cs/data/dbpedia/DataFilter.java b/external/uk/ac/ox/cs/data/dbpedia/DataFilter.java new file mode 100644 index 0000000..dc2f3e0 --- /dev/null +++ b/external/uk/ac/ox/cs/data/dbpedia/DataFilter.java @@ -0,0 +1,68 @@ +package uk.ac.ox.cs.data.dbpedia; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.RDFParseException; +import org.openrdf.rio.turtle.TurtleParser; +import org.openrdf.rio.turtle.TurtleWriter; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataProperty; +import org.semanticweb.owlapi.model.OWLOntology; +import uk.ac.ox.cs.pagoda.owl.OWLHelper; + +public class DataFilter { + + public static void main(String[] args) throws FileNotFoundException { + filteringDBPedia(); + } + + /** + * Filter out data property assertions and annotation property assertions in the data set. + * + * @throws FileNotFoundException + */ + private static void filteringDBPedia() throws FileNotFoundException { + String[] args = ( +// "/home/yzhou/ontologies/npd/npd-all.owl " + +// "/home/yzhou/ontologies/npd/data/npd-data-dump-processed.ttl " + +// "/home/yzhou/ontologies/npd/data/npd-data-dump-minus-datatype-new.ttl " + +// "http://sws.ifi.uio.no/vocab/npd-all.owl#" + + "/media/RDFData/yzhou/dbpedia/integratedOntology.owl " + + "/media/RDFData/yzhou/dbpedia/data/dbpedia-processed.ttl " + + "/home/yzhou/ontologies/dbpedia/data/dbpedia-minus-datatype-new.ttl " + + "http://dbpedia.org/ontology/" + ).split("\\ "); + + + OWLOntology ontology = OWLHelper.loadOntology(args[0]); + + Set properties2ignore = new HashSet(); + for (OWLDataProperty prop: ontology.getDataPropertiesInSignature(true)) + properties2ignore.add(prop.toStringID()); + for (OWLAnnotationProperty prop: ontology.getAnnotationPropertiesInSignature()) + properties2ignore.add(prop.toStringID()); + + TurtleParser parser = new TurtleParser(); + TurtleWriter writer = new TurtleWriter(new FileOutputStream(args[2])); + + parser.setRDFHandler(new DataFilterRDFHandler(writer, properties2ignore)); + try { + parser.parse(new FileInputStream(args[1]), args[3]); + } catch (RDFParseException e) { + e.printStackTrace(); + } catch (RDFHandlerException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + } + +} diff --git a/external/uk/ac/ox/cs/data/dbpedia/DataFilterRDFHandler.java b/external/uk/ac/ox/cs/data/dbpedia/DataFilterRDFHandler.java new file mode 100644 index 0000000..6dbac91 --- /dev/null +++ b/external/uk/ac/ox/cs/data/dbpedia/DataFilterRDFHandler.java @@ -0,0 +1,116 @@ +package uk.ac.ox.cs.data.dbpedia; + +import java.text.Normalizer; +import java.util.Set; + +import org.apache.jena.iri.IRI; +import org.apache.jena.iri.IRIException; +import org.apache.jena.iri.IRIFactory; + +import org.openrdf.model.BNode; +import org.openrdf.model.Resource; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.Literal; +import org.openrdf.model.Statement; +import org.openrdf.model.impl.StatementImpl; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.rio.RDFHandler; +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.RDFWriter; + +public class DataFilterRDFHandler implements RDFHandler { + + public static IRIFactory iriFactory = IRIFactory.semanticWebImplementation(); + + RDFWriter m_writer; + Set m_properties; + + public DataFilterRDFHandler(RDFWriter writer, Set properties2ignore) { + m_writer = writer; + m_properties = properties2ignore; + } + + @Override + public void endRDF() throws RDFHandlerException { + m_writer.endRDF(); + } + + @Override + public void handleComment(String arg0) throws RDFHandlerException { + m_writer.handleComment(arg0); + } + + @Override + public void handleNamespace(String arg0, String arg1) throws RDFHandlerException { + m_writer.handleNamespace(arg0, arg1); + } + + @Override + public void handleStatement(Statement arg0) throws RDFHandlerException { + Value newObject = null, oldObject = arg0.getObject(); + + if (oldObject instanceof Literal) + return ; + else if (oldObject instanceof BNode) { + newObject = oldObject; + } + else if (oldObject instanceof URI) + newObject = new URIImpl(Normalizer.normalize(oldObject.toString(), Normalizer.Form.NFKC)); + else { + System.out.println("Object: " + oldObject.getClass()); + } + + String predicate = arg0.getPredicate().toString(); + if (m_properties.contains(predicate)) return ; + + Resource newSubject = null, oldSubject = arg0.getSubject(); + + if (oldSubject instanceof BNode) { + newSubject = oldSubject; + } + else if (oldSubject instanceof URI) { + newSubject = new URIImpl(Normalizer.normalize(oldSubject.toString(), Normalizer.Form.NFKC)); + } + else { + System.out.println("Subject: " + oldSubject.getClass()); + } + +// if (newObject.toString().contains("ns#type")) +// System.out.println(arg0); + + if (newSubject == null || newObject == null) { + System.out.println(arg0); + return ; + } + + IRI subjectIRI, objectIRI; + try { + if (newSubject instanceof URI){ + subjectIRI = iriFactory.construct(newSubject.toString()); + if (subjectIRI.hasViolation(true)) { + System.out.println(arg0); + return ; + } + } + if (newObject instanceof URI) { + objectIRI = iriFactory.construct(newObject.toString()); + if (objectIRI.hasViolation(true)) { + System.out.println(arg0); + return ; + } + } + + } catch (IRIException e) { + return ; + } + + m_writer.handleStatement(new StatementImpl(newSubject, arg0.getPredicate(), newObject)); + } + + @Override + public void startRDF() throws RDFHandlerException { + m_writer.startRDF(); + } + +} diff --git a/external/uk/ac/ox/cs/data/dbpedia/Normaliser.java b/external/uk/ac/ox/cs/data/dbpedia/Normaliser.java new file mode 100644 index 0000000..e025604 --- /dev/null +++ b/external/uk/ac/ox/cs/data/dbpedia/Normaliser.java @@ -0,0 +1,155 @@ +package uk.ac.ox.cs.data.dbpedia; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.text.Normalizer; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; + +public class Normaliser { + + public static void main(String[] args) throws IOException { + if (args.length == 0) { + args = new String[] { + "/home/yzhou/ontologies/npd/npd-data-dump-minus-datatype.ttl", + "1" + }; + } + + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]))); + String fragment = args[0]; + int size = Integer.valueOf(args[1]), index; + + if ((index = fragment.lastIndexOf(".")) != -1) { + fragment = fragment.substring(0, index) + "_new_fragment" + args[1] + fragment.substring(index); + } + else fragment += "_fragment" + args[1]; + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fragment))); + +// simpleProcess(reader, writer, size); + process(reader, writer, size); + + writer.close(); + reader.close(); + } + + public static void simpleProcess(BufferedReader reader, BufferedWriter writer, int size) throws IOException { + String line; + int index = 0; + while ((line = reader.readLine()) != null) { + if (++index == size) { + index = 0; + writer.write(line); + writer.newLine(); + } + } + } + + static final String illegalSymbols = ",()'‘"; + static final String[][] replacedSymbols = new String[][] { + {"æ", "ae"}, + {"ø", "o"}, + {"ß", "t"}, + {"Ł", "L"}, + {"ı", "i"}, + {"ł", "l"}, + {"–", "-"}, + {"&", "and"}, + {"ð", "o"}, + {"ə", "e"}, + {"Đ", "D"}, + {"ħ", "h"}, +// {"%60", "_"}, + {"đ", "d"}, + {"Þ", "P"} + }; + + static Set symbols2remove; + static Map symbols2replace; + + static { + symbols2remove = new HashSet(); + for (int i = 0; i < illegalSymbols.length(); ++i) + symbols2remove.add(illegalSymbols.charAt(i)); + + symbols2replace = new HashMap(); + for (int i = 0; i < replacedSymbols.length; ++i) + symbols2replace.put(replacedSymbols[i][0].charAt(0), replacedSymbols[i][1]); + } + + static final String urlSymbols = "http://"; + static final int urlSymbolLength = 7; + + public static void process(BufferedReader reader, BufferedWriter writer, int size) throws IOException { + int index = 0; + String line; + + String newLine; + while ((line = reader.readLine()) != null) { + if (line.contains("@")) + continue; + + if (++index == size) { + newLine = process(line); + writer.write(deAccent(newLine.toString())); + writer.write('.'); + writer.newLine(); + index = 0; + } + } + + writer.close(); + reader.close(); + } + + private static String process(String line) { + line = line.replace("%60", "_");//.replace("__", "_"); + + int inURL = 0; + char ch; + String str; + StringBuilder newLine = new StringBuilder(); + for (int i = 0; i < line.length(); ++i) { + ch = line.charAt(i); + + if (ch == '.') { + if (inURL == urlSymbolLength) + newLine.append('.'); + continue; + } + + if (inURL == urlSymbolLength) { + if (ch == '/' || ch == '#' || ch == ')' || ch == '>') inURL = 0; + } + else if (ch == urlSymbols.charAt(inURL)) { + ++inURL; + } + else inURL = 0; + + if ((str = symbols2replace.get(ch)) != null) + newLine.append(str); + else if (!symbols2remove.contains(ch)) + newLine.append(ch); + } + + return newLine.toString(); + } + + public static String deAccent(String str) { + String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD); + Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); + String t = pattern.matcher(nfdNormalizedString).replaceAll(""); + return t; + } + + +} diff --git a/external/uk/ac/ox/cs/data/sample/DataSampling.java b/external/uk/ac/ox/cs/data/sample/DataSampling.java new file mode 100644 index 0000000..1a788e3 --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/DataSampling.java @@ -0,0 +1,320 @@ +package uk.ac.ox.cs.data.sample; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.*; +import java.util.Map.Entry; + +import org.openrdf.model.Resource; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.impl.StatementImpl; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.rio.RDFHandler; +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.RDFParseException; +import org.openrdf.rio.RDFParser; +import org.openrdf.rio.ntriples.NTriplesParser; +import org.openrdf.rio.turtle.*; + +import uk.ac.ox.cs.pagoda.owl.OWLHelper; +import uk.ac.ox.cs.pagoda.util.Namespace; +import uk.ac.ox.cs.pagoda.util.Utility; + +public class DataSampling { + + File[] m_list; + RDFGraph m_graph; + double m_percentage; + Set excludeEntities = new HashSet(); + + public DataSampling(String prefix, String fileName, String excludeFile, double percentage, boolean inTurtle) { + if (excludeFile != null) { + try { + Scanner scanner = new Scanner(new File(excludeFile)); + while (scanner.hasNextLine()) + excludeEntities.add(OWLHelper.removeAngles(scanner.nextLine().trim())); + scanner.close(); + } catch (FileNotFoundException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + } + excludeEntities.add("http://www.w3.org/2002/07/owl#imports"); + + File file = new File(fileName); + if (file.isDirectory()) m_list = file.listFiles(); + else m_list = new File[] {file}; + m_percentage = percentage; + + RDFParser parser = inTurtle ? new TurtleParser() : new NTriplesParser(); + + GraphRDFHandler handler = new GraphRDFHandler(excludeEntities); + parser.setRDFHandler(handler); + + FileInputStream istream; + try { + for (File tFile: m_list) { + parser.parse(istream = new FileInputStream(tFile), prefix); + istream.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } catch (RDFParseException e) { + e.printStackTrace(); + } catch (RDFHandlerException e) { + e.printStackTrace(); + } + + m_graph = handler.getGraph(); + } + + public void sample(String outputFile, boolean multiStart) { + try { + FileOutputStream ostream = new FileOutputStream(outputFile); + TurtleWriter writer = new TurtleWriter(ostream); + writer.startRDF(); + + if (m_percentage < 100) { + Sampler sam = multiStart ? + new RandomWalkMulti(m_graph, writer) : + new RandomWalk(m_graph, writer); + sam.setLimit((int) (m_graph.numberOfStatement / 100 * m_percentage)); + System.out.println("Statement limit: " + (m_graph.numberOfStatement / 100 * m_percentage)); + sam.sample(); + sam.dispose(); + } + else { + m_graph.visit(writer); + } + writer.endRDF(); + ostream.close(); + } catch (IOException e) { + e.printStackTrace(); + } catch (RDFHandlerException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + public static void main(String[] args) { + sampleReactome(); +// sampleChEMBL(); + } + + static void sampleReactome() { +// double[] ps = {40, 70, 100}; + double[] ps = {0.25, 0.5, 0.75}; + for (double per: ps) { + DataSampling sampling = new DataSampling( + "http://www.biopax.org/release/biopax-level3.owl#", +// "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data/data.ttl", + "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data/simplified.nt", +// "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/reactome_sample_40.ttl", + "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/exclude", + per, + true); + sampling.sample("/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/sample_test_" + per + ".ttl", true); +// sampling.sample("/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/simplifed_sample_test_" + per + ".ttl", true); +// sampling.sample("output/sample_reactome_multi.ttl", true); + } + } + + static void sampleChEMBL() { + DataSampling sampling = new DataSampling( + "http://rdf.ebi.ac.uk/terms/chembl", + "/home/yzhou/RDFdata/ChEMBL/facts/chembl_kbfile.nt", + null, + 100, + false); + + sampling.sample("output/sample_chembl_multi.ttl", true); + sampling.sample("output/sample_chembl.ttl", false); + } + +} + +class RDFGraph { + + Map index = new HashMap(); + Map inverseIndex = new HashMap(); + MapToList labels = new MapToList(); + + MapToList edges = new MapToList(); + Set excludeEntities; + + int numberOfIndividuals = 0, numberOfProperties = 0; + + public RDFGraph(Set exclude) { + excludeEntities = exclude; + for (String str: excludeEntities) + System.out.println(str); + System.out.println("---------------"); + } + + public void visit(TurtleWriter writer) throws RDFHandlerException { + Integer key; + for (Entry> entry: labels.entrySet()) { + key = entry.getKey(); + for (Integer type: entry.getValue()) + writer.handleStatement(getStatement(key, type)); + } + + for (Entry> entry: edges.entrySet()) { + key = entry.getKey(); + if ((inverseIndex.get(key) instanceof URI) && + ((URI) inverseIndex.get(key)).toString().equals("http://www.reactome.org/biopax/46/879693#UnificationXref9")) + System.out.println("Here"); + + for (RDFEdge edge: entry.getValue()) + writer.handleStatement(getStatement(key, edge.m_label, edge.m_dst)); + } + } + + private int getID(Value v, boolean isIndividual) { + if (v.toString().contains("imports")) + System.out.println(v.toString()); + if (excludeEntities.contains(v.toString())) { + return 0; + } + + Integer id = index.get(v); + if (id == null) + if (isIndividual) { + index.put(v, id = ++numberOfIndividuals); + inverseIndex.put(id, v); + } + else { + index.put(v, id = --numberOfProperties); + inverseIndex.put(id, v); + } + return id; + } + + int numberOfStatement = 0; + int counter = 0; + + public void addTriple(Resource s, URI p, Value o) { + ++numberOfStatement; + if (numberOfStatement % 1000000 == 0) { + Utility.logInfo("No.of statements: " + numberOfStatement, "\tNo.of individuals: " + numberOfIndividuals, "\tNo.of predicates: " + (-numberOfProperties)); + } + + if (p.equals(rdftype)) { + int type = getID(o, false), i = getID(s, true); + if (i == 0) { +// System.out.println("<" + s + "> <" + p + "> <" + o + ">"); + return ; + } + labels.add(i, type); + } + else { + int i = getID(s, true), j = getID(o, true), prop = getID(p, false) ; + if (i == 0 || j == 0 || prop == 0) { +// System.out.println("<" + s + "> <" + p + "> <" + o + ">"); + return ; + } + edges.add(i, new RDFEdge(prop, j)); + } + } + + URI rdftype = new URIImpl(Namespace.RDF_TYPE); + + public Statement getStatement(int... args) { + if (args.length == 2) + return new StatementImpl((Resource) inverseIndex.get(args[0]), rdftype, (Value) inverseIndex.get(args[1])); + else if (args.length == 3) + return new StatementImpl((Resource) inverseIndex.get(args[0]), (URI) inverseIndex.get(args[1]), (Value) inverseIndex.get(args[2])); + return null; + } + + public String getRawString(int id) { + return inverseIndex.get(id).toString(); + } + +} + +class MapToList { + + private Map> map = new HashMap>(); + + public void add(int key, T value) { + LinkedList list = map.get(key); + if (list == null) + map.put(key, list = new LinkedList()); + list.add(value); + } + + public Set>> entrySet() { + return map.entrySet(); + } + + public void shuffle() { + for (List list: map.values()) + Collections.shuffle(list); + } + + public LinkedList get(int key) { + return map.get(key); + } + +} + +class RDFEdge { + + int m_label, m_dst; + + public RDFEdge(int label, int dst) { + m_label = label; + m_dst = dst; + } + +} + +class GraphRDFHandler implements RDFHandler { + + RDFGraph m_graph; + Set m_exclude; + + public GraphRDFHandler(Set excludeEntities) { + m_exclude = excludeEntities; + } + + @Override + public void startRDF() throws RDFHandlerException { + m_graph = new RDFGraph(m_exclude); + } + + public RDFGraph getGraph() { + return m_graph; + } + + @Override + public void endRDF() throws RDFHandlerException { + // TODO Auto-generated method stub + + } + + @Override + public void handleNamespace(String prefix, String uri) + throws RDFHandlerException { + // TODO Auto-generated method stub + + } + + @Override + public void handleStatement(Statement st) throws RDFHandlerException { + m_graph.addTriple(st.getSubject(), st.getPredicate(), st.getObject()); + } + + @Override + public void handleComment(String comment) throws RDFHandlerException { + // TODO Auto-generated method stub + + } + +} \ No newline at end of file diff --git a/external/uk/ac/ox/cs/data/sample/RandomWalk.java b/external/uk/ac/ox/cs/data/sample/RandomWalk.java new file mode 100644 index 0000000..d9f5107 --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/RandomWalk.java @@ -0,0 +1,88 @@ +package uk.ac.ox.cs.data.sample; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.Stack; + +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.turtle.TurtleWriter; + +import uk.ac.ox.cs.pagoda.util.Utility; + +public class RandomWalk extends Sampler { + + public RandomWalk(RDFGraph graph, TurtleWriter writer) { + super(graph, writer); + } + + protected Random rand = new Random(); + + protected int noOfStatements = 0, statementLimit = 0; + protected Set visited = new HashSet(); + + @Override + public void setLimit(int limit) { + statementLimit = limit; + } + + @Override + public void sample() throws RDFHandlerException { + int u, v, pick, index; + RDFEdge edge; + List edges; + Stack stack = new Stack(); + while (true) { + if (noOfStatements >= statementLimit) { + return ; + } + if (stack.isEmpty()) { + stack.add(v = rand.nextInt(m_graph.numberOfIndividuals)); + Utility.logInfo("A new start: " + m_graph.getRawString(v)); + visit(v); + } + u = stack.peek(); + if (rand.nextInt(100) < 15) { + stack.pop(); + continue; + } + if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) { + stack.clear(); + continue; + } + + index = 0; + pick = rand.nextInt(edges.size()); + for (Iterator iter = edges.iterator(); iter.hasNext(); ++index) { + edge = iter.next(); + if (index == pick) { + stack.add(v = edge.m_dst); + visit(v); + m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst)); + ++noOfStatements; + iter.remove(); + } + + } + } + } + + protected void visit(int node) throws RDFHandlerException { + if (visited.contains(node)) return ; + visited.add(node); + List list = m_graph.labels.get(node); + if (list == null) return ; + for (Iterator iter = list.iterator(); iter.hasNext(); ) + m_writer.handleStatement(m_graph.getStatement(node, iter.next())); + noOfStatements += list.size(); + } + + @Override + public void dispose() { + visited.clear(); + } + + +} diff --git a/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java b/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java new file mode 100644 index 0000000..592f249 --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java @@ -0,0 +1,112 @@ +package uk.ac.ox.cs.data.sample; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; +import java.util.Stack; + +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.turtle.TurtleWriter; + +import uk.ac.ox.cs.pagoda.util.Utility; + + +public class RandomWalkMulti extends RandomWalk { + + public RandomWalkMulti(RDFGraph graph, TurtleWriter writer) { + super(graph, writer); + } + + Queue queue = new LinkedList(); + + @Override + public void sample() throws RDFHandlerException { + getStartNodes(); + + Utility.logInfo(queue.size()); + + int u, v, pick, index; + int individualLimit = statementLimit / queue.size(), currentLimit = 0; + RDFEdge edge; + List edges; + Stack stack = new Stack(); + while (true) { + if (noOfStatements >= statementLimit) { + System.out.println("The number of statements in the sampling: " + noOfStatements); + return ; + } + if (noOfStatements >= currentLimit) { + stack.clear(); + } + + if (stack.isEmpty()) { + if (queue.isEmpty()) + v = rand.nextInt(m_graph.numberOfIndividuals); + else { + v = queue.poll(); + currentLimit += individualLimit; + } + stack.add(v); +// Utility.logInfo(noOfStart + " new start: " + m_graph.getRawString(v)); + visit(v); + } + u = stack.peek(); + if (rand.nextInt(100) < 15) { + stack.pop(); + continue; + } + if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) { + stack.clear(); + continue; + } + + index = 0; + pick = rand.nextInt(edges.size()); + for (Iterator iter = edges.iterator(); iter.hasNext(); ++index) { + edge = iter.next(); + if (index == pick) { + stack.add(v = edge.m_dst); + visit(v); + m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst)); + ++noOfStatements; + iter.remove(); + } + + } + } + } + + private void getStartNodes() throws RDFHandlerException { + Set coveredConcepts = new HashSet(); + Integer concept; + + Iterator iter; + for (Map.Entry> entry: m_graph.labels.entrySet()) { + iter = entry.getValue().iterator(); + concept = null; + + while (iter.hasNext()) { + if (!(coveredConcepts.contains(concept = iter.next()))) { + break; + } + else concept = null; + + } + + if (concept == null) continue; + else { + queue.add(entry.getKey()); + coveredConcepts.add(concept); + while (iter.hasNext()) + coveredConcepts.add(iter.next()); + } + } + + } + + +} diff --git a/external/uk/ac/ox/cs/data/sample/Sampler.java b/external/uk/ac/ox/cs/data/sample/Sampler.java new file mode 100644 index 0000000..205b29b --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/Sampler.java @@ -0,0 +1,23 @@ +package uk.ac.ox.cs.data.sample; + +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.turtle.TurtleWriter; + +public abstract class Sampler { + + protected RDFGraph m_graph; + protected TurtleWriter m_writer; + + public Sampler(RDFGraph graph, TurtleWriter writer) { + m_graph = graph; + m_writer = writer; + } + + public abstract void setLimit(int limit); + + public abstract void sample() throws RDFHandlerException; + + public abstract void dispose(); + + +} -- cgit v1.2.3