aboutsummaryrefslogtreecommitdiff
path: root/external/uk/ac
diff options
context:
space:
mode:
authoryzhou <yzhou@krr-linux.cs.ox.ac.uk>2015-04-30 17:36:35 +0100
committeryzhou <yzhou@krr-linux.cs.ox.ac.uk>2015-04-30 17:36:35 +0100
commit0d8f240c9c0a64f2285324e5a517161e45c698fc (patch)
treef4b4f7078e3be02011b9812cd8791c657a135993 /external/uk/ac
parent68ae342b2a4923bc7b3f378c6a489f2355d85279 (diff)
downloadACQuA-0d8f240c9c0a64f2285324e5a517161e45c698fc.tar.gz
ACQuA-0d8f240c9c0a64f2285324e5a517161e45c698fc.zip
downgrade owl api and reorganised src files
Diffstat (limited to 'external/uk/ac')
-rw-r--r--external/uk/ac/ox/cs/data/AtomicQueryGenerator.java86
-rw-r--r--external/uk/ac/ox/cs/data/Comparator.java131
-rw-r--r--external/uk/ac/ox/cs/data/Fragment.java129
-rw-r--r--external/uk/ac/ox/cs/data/OntologyStatistics.java15
-rw-r--r--external/uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java184
-rw-r--r--external/uk/ac/ox/cs/data/QueryFilter.java30
-rw-r--r--external/uk/ac/ox/cs/data/RemoveDataPropertyRange.java52
-rw-r--r--external/uk/ac/ox/cs/data/RemoveImportInTurtle.java77
-rw-r--r--external/uk/ac/ox/cs/data/WriteIntoTurtle.java69
-rw-r--r--external/uk/ac/ox/cs/data/WriteToNTriple.java57
-rw-r--r--external/uk/ac/ox/cs/data/datatype/DataPropertyEliminator.java56
-rw-r--r--external/uk/ac/ox/cs/data/datatype/DataToObject.java932
-rw-r--r--external/uk/ac/ox/cs/data/dbpedia/DataFilter.java68
-rw-r--r--external/uk/ac/ox/cs/data/dbpedia/DataFilterRDFHandler.java116
-rw-r--r--external/uk/ac/ox/cs/data/dbpedia/Normaliser.java155
-rw-r--r--external/uk/ac/ox/cs/data/sample/DataSampling.java320
-rw-r--r--external/uk/ac/ox/cs/data/sample/RandomWalk.java88
-rw-r--r--external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java112
-rw-r--r--external/uk/ac/ox/cs/data/sample/Sampler.java23
19 files changed, 2700 insertions, 0 deletions
diff --git a/external/uk/ac/ox/cs/data/AtomicQueryGenerator.java b/external/uk/ac/ox/cs/data/AtomicQueryGenerator.java
new file mode 100644
index 0000000..d271e87
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/AtomicQueryGenerator.java
@@ -0,0 +1,86 @@
1package uk.ac.ox.cs.data;
2
3import org.semanticweb.owlapi.model.OWLClass;
4import org.semanticweb.owlapi.model.OWLDataFactory;
5import org.semanticweb.owlapi.model.OWLObjectProperty;
6import org.semanticweb.owlapi.model.OWLOntology;
7import org.semanticweb.owlapi.model.OWLOntologyManager;
8
9import uk.ac.ox.cs.pagoda.owl.OWLHelper;
10import uk.ac.ox.cs.pagoda.tester.PagodaTester;
11import uk.ac.ox.cs.pagoda.util.Utility;
12
13public class AtomicQueryGenerator {
14
15 public static final String template = //"^[query@ID]" + Utility.LINE_SEPARATOR +
16 "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>" + Utility.LINE_SEPARATOR +
17 "SELECT ?X" + Utility.LINE_SEPARATOR +
18 "WHERE {" + Utility.LINE_SEPARATOR +
19 "?X rdf:type <@CLASS>" + Utility.LINE_SEPARATOR +
20 "}";
21
22 public static String outputFile = "output/atomic_fly.sparql";
23
24 public static void main(String[] args) throws Exception {
25 if (args.length == 0) {
26// args = new String[] { "/home/yzhou/backup/20141212/univ-bench-dl-queries.owl"};
27 args = new String[] { PagodaTester.onto_dir + "fly/fly-all-in-one_rolledUp.owl"};
28// args = new String[] { PagodaTester.onto_dir + "dbpedia/integratedOntology-all-in-one-minus-datatype.owl" };
29// args = new String[] { PagodaTester.onto_dir + "npd/npd-all-minus-datatype.owl" };
30// args = new String[] { PagodaTester.onto_dir + "bio2rdf/chembl/cco-noDPR.ttl" };
31// args = new String[] { PagodaTester.onto_dir + "bio2rdf/reactome/biopax-level3-processed.owl" };
32// args = new String[] { PagodaTester.onto_dir + "bio2rdf/uniprot/core-processed-noDis.owl" };
33 }
34
35// OWLOntology ontology = OWLHelper.getMergedOntology(args[0], null);
36// OWLHelper.correctDataTypeRangeAxioms(ontology);
37 OWLOntology ontology = OWLHelper.loadOntology(args[0]);
38
39 OWLOntologyManager manager = ontology.getOWLOntologyManager();
40 OWLDataFactory factory = manager.getOWLDataFactory();
41// manager.saveOntology(ontology, new FileOutputStream(args[0].replace(".owl", "_owlapi.owl")));
42
43 if (outputFile != null)
44 Utility.redirectCurrentOut(outputFile);
45
46 int queryID = 0;
47 for (OWLClass cls: ontology.getClassesInSignature(true)) {
48 if (cls.equals(factory.getOWLThing()) || cls.equals(factory.getOWLNothing()))
49 continue;
50 if (!cls.toStringID().contains("Query")) continue;
51 System.out.println("^[Query" + ++queryID + "]");
52 System.out.println(template.replace("@CLASS", cls.toStringID()));
53 System.out.println();
54 }
55
56 for (OWLOntology onto: ontology.getImportsClosure())
57 for (OWLObjectProperty prop: onto.getObjectPropertiesInSignature()) {
58// if (!prop.toStringID().contains("Query")) continue;
59 System.out.println("^[Query" + ++queryID + "]");
60 System.out.println("SELECT ?X ?Y");
61 System.out.println("WHERE {");
62 System.out.println("?X <" + prop.toStringID() + "> ?Y .");
63 System.out.println("}");
64 System.out.println();
65 }
66
67 String[] answerVars = new String[] {"?X", "?Y"};
68
69 for (OWLOntology onto: ontology.getImportsClosure())
70 for (OWLObjectProperty prop: onto.getObjectPropertiesInSignature()) {
71// if (!prop.toStringID().contains("Query")) continue;
72 for (int i = 0; i < answerVars.length; ++i) {
73 System.out.println("^[Query" + ++queryID + "]");
74 System.out.println("SELECT " + answerVars[i]);
75 System.out.println("WHERE {");
76 System.out.println("?X <" + prop.toStringID() + "> ?Y .");
77 System.out.println("}");
78 System.out.println();
79 }
80 }
81
82 if (outputFile != null)
83 Utility.closeCurrentOut();
84 }
85
86}
diff --git a/external/uk/ac/ox/cs/data/Comparator.java b/external/uk/ac/ox/cs/data/Comparator.java
new file mode 100644
index 0000000..5b61a81
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/Comparator.java
@@ -0,0 +1,131 @@
1package uk.ac.ox.cs.data;
2
3import java.io.BufferedWriter;
4import java.io.File;
5import java.io.FileOutputStream;
6import java.io.IOException;
7import java.io.OutputStreamWriter;
8import java.util.Collection;
9import java.util.HashSet;
10import java.util.Scanner;
11import java.util.Set;
12
13import org.semanticweb.owlapi.model.OWLAxiom;
14import org.semanticweb.owlapi.model.OWLClassExpression;
15import org.semanticweb.owlapi.model.OWLDataFactory;
16import org.semanticweb.owlapi.model.OWLEquivalentClassesAxiom;
17import org.semanticweb.owlapi.model.OWLOntology;
18import uk.ac.ox.cs.pagoda.owl.OWLHelper;
19import uk.ac.ox.cs.pagoda.util.Utility;
20
21public class Comparator {
22
23 public static void main(String[] args) throws IOException {
24 compareFiles(args);
25 }
26
27 public static void compareFiles(String[] args) throws IOException {
28 String directory = "/users/yzhou/workspace/pagoda/";
29 String name1 = "abox1.txt", name2 = "abox2.txt";
30
31 args = (directory + name1 + " " +
32 directory + name2 + " " +
33 directory + "diff.dlog").split("\\ ");
34
35 Scanner s1 = new Scanner(new File(args[0])), s2 = new Scanner(new File(args[1]));
36 HashSet<String> h1 = new HashSet<String>(), h2 = new HashSet<String>();
37 while (s1.hasNextLine()) h1.add(s1.nextLine());
38 s1.close();
39 while (s2.hasNextLine()) h2.add(s2.nextLine().replace("an-minus.owl", "an.owl"));
40 s2.close();
41
42 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[2])));
43
44 writer.write("Elements in " + name1 + ", but not in " + name2);
45 writer.newLine();
46 for (String line: h1)
47 if (!h2.contains(line)) {
48 writer.write(line);
49 writer.newLine();
50 }
51
52 writer.write("--------------------------------------------------------");
53 writer.newLine();
54
55 writer.write("Elements in " + name2 + ", but not in " + name1);
56 writer.newLine();
57 for (String line: h2)
58 if (!h1.contains(line)) {
59 writer.write(line);
60 writer.newLine();
61 }
62
63 writer.close();
64 }
65
66
67 public void compareOntologies(String[] args) throws IOException {
68 String directory = "/home/scratch/yzhou/ontologies/fly/auxiliary/datalog/";
69 String name1 = "eq/elho.owl", name2 = "noEQ/elho.owl";
70
71 args = (directory + name1 + " " +
72 directory + name2 + " " +
73 directory + "diff.owl").split("\\ ");
74
75 OWLOntology o1 = OWLHelper.loadOntology(args[0]);
76 OWLOntology o2 = OWLHelper.loadOntology(args[1]);
77
78 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[2])));
79
80 writer.write("Elements in " + name1 + ", but not in " + name2);
81 writer.newLine();
82 writer.write(compareOntologies(o1, o2));
83
84 writer.write("--------------------------------------------------------");
85 writer.newLine();
86
87 writer.write("Elements in " + name2 + ", but not in " + name1);
88 writer.newLine();
89 writer.write(compareOntologies(o2, o1));
90
91 writer.close();
92 }
93
94 private static String compareOntologies(OWLOntology o1, OWLOntology o2) {
95 StringBuilder sb = new StringBuilder();
96
97 Set<String> axioms = new HashSet<String>();
98 OWLDataFactory factory1 = o1.getOWLOntologyManager().getOWLDataFactory();
99 OWLDataFactory factory2 = o2.getOWLOntologyManager().getOWLDataFactory();
100
101 for (OWLAxiom a: o2.getAxioms())
102 for (OWLAxiom axiom: process(a, factory2)){
103 axioms.add(axiom.toString());
104 }
105
106 for (OWLAxiom a: o1.getAxioms()) {
107 for (OWLAxiom axiom: process(a, factory1))
108 if (!axioms.contains(axiom.toString()))
109 sb.append(axiom.toString()).append(Utility.LINE_SEPARATOR);
110 }
111
112 return sb.toString();
113 }
114
115 private static Collection<OWLAxiom> process(OWLAxiom axiom, OWLDataFactory factory) {
116 Set<OWLAxiom> axioms = new HashSet<OWLAxiom>();
117 OWLEquivalentClassesAxiom equiv;
118 if (axiom instanceof OWLEquivalentClassesAxiom) {
119 equiv = (OWLEquivalentClassesAxiom) axiom;
120 for (OWLClassExpression exp1: equiv.getClassExpressions())
121 for (OWLClassExpression exp2: equiv.getClassExpressions())
122 if (!exp1.equals(exp2))
123 axioms.add(factory.getOWLSubClassOfAxiom(exp1, exp2));
124 }
125 else
126 axioms.add(axiom);
127
128 return axioms;
129 }
130
131}
diff --git a/external/uk/ac/ox/cs/data/Fragment.java b/external/uk/ac/ox/cs/data/Fragment.java
new file mode 100644
index 0000000..1038a33
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/Fragment.java
@@ -0,0 +1,129 @@
1package uk.ac.ox.cs.data;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.FileNotFoundException;
6import java.io.FileOutputStream;
7import java.util.Random;
8
9import org.openrdf.model.Statement;
10import org.openrdf.rio.RDFHandler;
11import org.openrdf.rio.RDFHandlerException;
12import org.openrdf.rio.turtle.TurtleParser;
13import org.openrdf.rio.turtle.TurtleWriter;
14
15import uk.ac.ox.cs.pagoda.util.Utility;
16
17public class Fragment {
18
19 private TurtleWriter m_writer;
20 private FragmentRDFHandler m_handler;
21
22 public Fragment(int fragment, String outFile) {
23 try {
24 m_writer = new TurtleWriter(new FileOutputStream(outFile));
25 } catch (FileNotFoundException e) {
26 e.printStackTrace();
27 }
28 m_handler = new FragmentRDFHandler(fragment, m_writer);
29 }
30
31 public void process(String prefix, String fileName) {
32 FileInputStream istream;
33 try {
34 TurtleParser parser = new TurtleParser();
35 parser.setRDFHandler(m_handler);
36
37 File f = new File(fileName);
38 if (f.isDirectory())
39 for (String tFileName: f.list()) {
40 if (tFileName.endsWith(".ttl")) {
41 parser.parse(istream = new FileInputStream(fileName + Utility.FILE_SEPARATOR + tFileName), prefix);
42 istream.close();
43 }
44 }
45 else {
46 parser.parse(istream = new FileInputStream(fileName), prefix);
47 istream.close();
48 }
49 } catch (Exception e) {
50 e.printStackTrace();
51 Utility.logError("aoaoaoao ~~~~~");
52 return ;
53 }
54 Utility.logInfo("DONE");
55 }
56
57 public void dispose() {
58 try {
59 m_writer.endRDF();
60 } catch (RDFHandlerException e) {
61 e.printStackTrace();
62 }
63 }
64
65 public static void main(String[] args) {
66 /**
67 * for ChEMBL
68 */
69 Fragment f = new Fragment(100, "data_01.ttl");
70 f.process("http://rdf.ebi.ac.uk/terms/chembl#", "/media/krr-nas-share/Yujiao/ontologies/bio2rdf/chembl/data");
71
72 /**
73 * for Reactome
74 * "http://www.biopax.org/release/biopax-level3.owl#",
75 "/home/scratch/yzhou/ontologies/bio2rdf/reactome"
76 "/home/scratch/yzhou/ontologies/bio2rdf/reactome/biopaxrdf",
77 */
78
79// Fragment f = new Fragment(1000, "data_001.ttl");
80// f.process("http://www.biopax.org/release/biopax-level3.owl#", "/media/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data.ttl");
81
82 f.dispose();
83 }
84
85}
86
87
88class FragmentRDFHandler implements RDFHandler {
89
90 int m_mod;
91 TurtleWriter m_writer;
92 Random m_rand = new Random();
93
94 public FragmentRDFHandler(int mod, TurtleWriter writer) {
95 m_mod = mod;
96 m_writer = writer;
97 }
98
99 @Override
100 public void endRDF() throws RDFHandlerException {
101 }
102
103 @Override
104 public void handleComment(String arg0) throws RDFHandlerException {
105 m_writer.handleComment(arg0);
106 Utility.logDebug("handling comment: " + arg0);
107 }
108
109 @Override
110 public void handleNamespace(String arg0, String arg1) throws RDFHandlerException {
111 m_writer.handleNamespace(arg0, arg1);
112 }
113
114 @Override
115 public void handleStatement(Statement arg0) throws RDFHandlerException {
116 if (m_rand.nextInt() % m_mod == 0)
117 m_writer.handleStatement(arg0);
118 }
119
120 boolean m_started = false;
121
122 @Override
123 public void startRDF() throws RDFHandlerException {
124 if (m_started) return ;
125 m_started = true;
126 m_writer.startRDF();
127 }
128
129} \ No newline at end of file
diff --git a/external/uk/ac/ox/cs/data/OntologyStatistics.java b/external/uk/ac/ox/cs/data/OntologyStatistics.java
new file mode 100644
index 0000000..de40dda
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/OntologyStatistics.java
@@ -0,0 +1,15 @@
1package uk.ac.ox.cs.data;
2
3import org.semanticweb.owlapi.model.OWLOntology;
4import uk.ac.ox.cs.pagoda.owl.OWLHelper;
5
6public class OntologyStatistics {
7
8 public static void main(String[] args) {
9 args = ("/home/yzhou/ontologies/uobm/univ-bench-dl-minus.owl").split("\\ ");
10
11 OWLOntology onto = OWLHelper.loadOntology(args[0]);
12 System.out.println(onto.getTBoxAxioms(true).size() + onto.getRBoxAxioms(true).size());
13 }
14
15}
diff --git a/external/uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java b/external/uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java
new file mode 100644
index 0000000..10f1ac2
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/PrepareQueries4Hydrowl.java
@@ -0,0 +1,184 @@
1package uk.ac.ox.cs.data;
2
3import java.io.*;
4import java.util.HashMap;
5import java.util.Map;
6
7import com.hp.hpl.jena.graph.Node;
8import com.hp.hpl.jena.graph.Node_URI;
9import com.hp.hpl.jena.graph.Node_Variable;
10import com.hp.hpl.jena.query.Query;
11import com.hp.hpl.jena.query.QueryFactory;
12import com.hp.hpl.jena.sparql.core.TriplePath;
13import com.hp.hpl.jena.sparql.core.Var;
14import com.hp.hpl.jena.sparql.syntax.Element;
15import com.hp.hpl.jena.sparql.syntax.ElementAssign;
16import com.hp.hpl.jena.sparql.syntax.ElementBind;
17import com.hp.hpl.jena.sparql.syntax.ElementData;
18import com.hp.hpl.jena.sparql.syntax.ElementDataset;
19import com.hp.hpl.jena.sparql.syntax.ElementExists;
20import com.hp.hpl.jena.sparql.syntax.ElementFilter;
21import com.hp.hpl.jena.sparql.syntax.ElementGroup;
22import com.hp.hpl.jena.sparql.syntax.ElementMinus;
23import com.hp.hpl.jena.sparql.syntax.ElementNamedGraph;
24import com.hp.hpl.jena.sparql.syntax.ElementNotExists;
25import com.hp.hpl.jena.sparql.syntax.ElementOptional;
26import com.hp.hpl.jena.sparql.syntax.ElementPathBlock;
27import com.hp.hpl.jena.sparql.syntax.ElementService;
28import com.hp.hpl.jena.sparql.syntax.ElementSubQuery;
29import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock;
30import com.hp.hpl.jena.sparql.syntax.ElementUnion;
31import com.hp.hpl.jena.sparql.syntax.ElementVisitor;
32
33import uk.ac.ox.cs.pagoda.query.QueryManager;
34import uk.ac.ox.cs.pagoda.util.Namespace;
35
36public class PrepareQueries4Hydrowl {
37
38 public static void main(String[] args) throws FileNotFoundException {
39 if (args.length == 0)
40// args = new String[] {"/media/krr-nas-share/Yujiao/ontologies/dbpedia/queries/atomic_ground.sparql"};
41 args = new String[] {"/home/yzhou/temp/ontologies/reactome/example.sparql"};
42// String fileName = args[0].substring(args[0].lastIndexOf(Utility.FILE_SEPARATOR) + 1);
43
44 PrintStream ps = new PrintStream(new File(args[0].replace(".sparql", "_hydrowl.sparql")));
45 if (ps != null) System.setOut(ps);
46
47 StringBuilder sb = new StringBuilder();
48 Map<String, Integer> vars = new HashMap<String, Integer>();
49 for (String text: QueryManager.collectQueryTexts(args[0])) {
50 Query query = QueryFactory.create(text);
51 for (Var var: query.getProjectVars())
52 sb.append(sb.length() == 0 ? "Q(?" : ",?").append(var.getName());
53 sb.append(") <- ");
54 ElementVisitor visitor = new HydrowlGeneratorVisitor(sb);
55 query.getQueryPattern().visit(visitor);
56 sb.setLength(sb.length() - 2);
57 System.out.println(sb);
58 sb.setLength(0);
59 vars.clear();
60 }
61
62 if (ps != null) ps.close();
63 }
64
65}
66
67class HydrowlGeneratorVisitor implements ElementVisitor {
68
69 StringBuilder m_text;
70
71 public HydrowlGeneratorVisitor(StringBuilder text) {
72 m_text = text;
73 }
74
75 @Override
76 public void visit(ElementTriplesBlock el) {
77 // TODO Auto-generated method stub
78
79 }
80
81 @Override
82 public void visit(ElementPathBlock el) {
83 // TODO Auto-generated method stub
84 for (TriplePath p: el.getPattern().getList()) {
85 if (p.getPredicate().getURI().equals(Namespace.RDF_TYPE) && !p.getObject().isVariable())
86 m_text.append(p.getObject().getURI()).append("(").append(getURI(p.getSubject())).append("), ");
87 else
88 m_text.append(p.getPredicate().getURI()).append("(").append(getURI(p.getSubject())).append(", ").append(getURI(p.getObject())).append("), ");
89 }
90 }
91
92 private String getURI(Node node) {
93 if (node instanceof Node_URI) return node.getURI();
94 if (node instanceof Node_Variable) return "?" + node.getName();
95 System.out.println("Unknown node: " + node);
96 return null;
97 }
98
99 @Override
100 public void visit(ElementFilter el) {
101 // TODO Auto-generated method stub
102
103 }
104
105 @Override
106 public void visit(ElementAssign el) {
107 // TODO Auto-generated method stub
108
109 }
110
111 @Override
112 public void visit(ElementBind el) {
113 // TODO Auto-generated method stub
114
115 }
116
117 @Override
118 public void visit(ElementUnion el) {
119 // TODO Auto-generated method stub
120
121 }
122
123 @Override
124 public void visit(ElementOptional el) {
125 // TODO Auto-generated method stub
126
127 }
128
129 @Override
130 public void visit(ElementGroup el) {
131 // TODO Auto-generated method stub
132 for (Element e: el.getElements())
133 e.visit(this);
134 }
135
136 @Override
137 public void visit(ElementDataset el) {
138 // TODO Auto-generated method stub
139
140 }
141
142 @Override
143 public void visit(ElementNamedGraph el) {
144 // TODO Auto-generated method stub
145
146 }
147
148 @Override
149 public void visit(ElementExists el) {
150 // TODO Auto-generated method stub
151
152 }
153
154 @Override
155 public void visit(ElementNotExists el) {
156 // TODO Auto-generated method stub
157
158 }
159
160 @Override
161 public void visit(ElementMinus el) {
162 // TODO Auto-generated method stub
163
164 }
165
166 @Override
167 public void visit(ElementService el) {
168 // TODO Auto-generated method stub
169
170 }
171
172 @Override
173 public void visit(ElementSubQuery el) {
174 // TODO Auto-generated method stub
175
176 }
177
178 @Override
179 public void visit(ElementData el) {
180 // TODO Auto-generated method stub
181
182 }
183
184} \ No newline at end of file
diff --git a/external/uk/ac/ox/cs/data/QueryFilter.java b/external/uk/ac/ox/cs/data/QueryFilter.java
new file mode 100644
index 0000000..36837d5
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/QueryFilter.java
@@ -0,0 +1,30 @@
1package uk.ac.ox.cs.data;
2
3import java.io.File;
4import java.io.FileNotFoundException;
5import java.util.Scanner;
6
7import uk.ac.ox.cs.pagoda.query.QueryManager;
8
9public class QueryFilter {
10
11 public static void main(String[] args) throws FileNotFoundException {
12 args = new String[] {"/media/krr-nas-share/Yujiao/ontologies/npd/queries/atomic.sparql",
13 "/home/yzhou/java-workspace/test-share/results_new/npd/pagoda"};
14 Scanner answerReader = new Scanner(new File(args[1]));
15 int totalNumberOfQueries = 0;
16 String line, prefix = "The number of answer tuples: ";
17 int index = 0, length = prefix.length();
18 for (String query: QueryManager.collectQueryTexts(args[0])) {
19 while (!(line = answerReader.nextLine()).startsWith(prefix));
20 ++totalNumberOfQueries;
21// if (query.contains("?X ?Y")) continue;
22 if (line.charAt(length) == '0') continue;
23 System.out.println("^[Query" + ++index + "]");
24 System.out.println(query);
25 }
26 answerReader.close();
27 System.out.println("Total number of queries: " + totalNumberOfQueries);
28 }
29
30}
diff --git a/external/uk/ac/ox/cs/data/RemoveDataPropertyRange.java b/external/uk/ac/ox/cs/data/RemoveDataPropertyRange.java
new file mode 100644
index 0000000..acaa91b
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/RemoveDataPropertyRange.java
@@ -0,0 +1,52 @@
1package uk.ac.ox.cs.data;
2
3import java.io.File;
4import java.io.FileOutputStream;
5import java.io.IOException;
6
7import org.semanticweb.owlapi.apibinding.OWLManager;
8import org.semanticweb.owlapi.model.OWLAxiom;
9import org.semanticweb.owlapi.model.OWLDataPropertyRangeAxiom;
10import org.semanticweb.owlapi.model.OWLException;
11import org.semanticweb.owlapi.model.OWLOntology;
12import org.semanticweb.owlapi.model.OWLOntologyManager;
13
14import uk.ac.ox.cs.pagoda.tester.PagodaTester;
15import uk.ac.ox.cs.pagoda.util.Utility;
16
17public class RemoveDataPropertyRange {
18
19 public static void process(String file) throws OWLException, IOException {
20 OWLOntologyManager originalManager = OWLManager.createOWLOntologyManager();
21 OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
22
23 OWLOntology originalOntology = originalManager.loadOntologyFromOntologyDocument(new File(file));
24 OWLOntology ontology = manager.createOntology(originalOntology.getOntologyID().getOntologyIRI());
25
26 for (OWLOntology onto: originalOntology.getImportsClosure())
27 for (OWLAxiom axiom: onto.getAxioms()) {
28 if (!(axiom instanceof OWLDataPropertyRangeAxiom))
29 manager.addAxiom(ontology, axiom);
30 }
31 originalManager.removeOntology(originalOntology);
32
33 String extension = file.substring(file.lastIndexOf("."));
34 String fileName = file.substring(file.lastIndexOf(Utility.FILE_SEPARATOR) + 1);
35 String dest = fileName.replace(extension, "-noDPR.owl");
36 manager.saveOntology(ontology, new FileOutputStream(dest));
37 System.out.println("The processed ontology is saved in " + dest + " successfully.");
38 manager.removeOntology(ontology);
39 }
40
41 public static void main(String[] args) {
42 try {
43 process(PagodaTester.chembl_tbox);
44 } catch (OWLException e) {
45 e.printStackTrace();
46 } catch (IOException e) {
47 e.printStackTrace();
48 }
49
50 }
51
52}
diff --git a/external/uk/ac/ox/cs/data/RemoveImportInTurtle.java b/external/uk/ac/ox/cs/data/RemoveImportInTurtle.java
new file mode 100644
index 0000000..2c0fb00
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/RemoveImportInTurtle.java
@@ -0,0 +1,77 @@
1package uk.ac.ox.cs.data;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.FileOutputStream;
6import java.io.IOException;
7
8import org.openrdf.model.Statement;
9import org.openrdf.rio.RDFHandler;
10import org.openrdf.rio.RDFHandlerException;
11import org.openrdf.rio.RDFParseException;
12import org.openrdf.rio.turtle.TurtleParser;
13import org.openrdf.rio.turtle.TurtleWriter;
14
15public class RemoveImportInTurtle {
16
17 public static void main(String[] args) throws RDFParseException, RDFHandlerException, IOException {
18 if (args.length == 0)
19 args = new String[] {
20// "/media/krr-nas-share/Yujiao/ontologies/lubm/data/lubm1.ttl",
21// "../trowl/lubm_trowl/lubm1.ttl",
22// "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#"
23 "/media/krr-nas-share/Yujiao/ontologies/npd/data/npd-data-dump-minus-datatype-new.ttl",
24 "/users/yzhou/temp/npd.ttl",
25 "http://sws.ifi.uio.no/data/npd-v2/#"
26 };
27 TurtleParser parser = new TurtleParser();
28 TurtleWriter writer = new TurtleWriter(new FileOutputStream(new File(args[1])));
29 parser.setRDFHandler(new LocalRDFHandler(writer));
30 parser.parse(new FileInputStream(new File(args[0])), args[2]);
31 }
32
33}
34
35class LocalRDFHandler implements RDFHandler {
36
37 TurtleWriter m_writer;
38
39 public LocalRDFHandler(TurtleWriter writer) {
40 m_writer = writer;
41 }
42
43 @Override
44 public void startRDF() throws RDFHandlerException {
45 m_writer.startRDF();
46
47 }
48
49 @Override
50 public void endRDF() throws RDFHandlerException {
51 m_writer.endRDF();
52 }
53
54 @Override
55 public void handleNamespace(String prefix, String uri)
56 throws RDFHandlerException {
57 m_writer.handleNamespace(prefix, uri);
58
59 }
60
61 @Override
62 public void handleStatement(Statement st) throws RDFHandlerException {
63 if (st.getObject().toString().equals("http://www.w3.org/2002/07/owl#Ontology"))
64 return ;
65 if (st.getPredicate().toString().equals("http://www.w3.org/2002/07/owl#imports"))
66 return ;
67 m_writer.handleStatement(st);
68
69 }
70
71 @Override
72 public void handleComment(String comment) throws RDFHandlerException {
73 m_writer.handleComment(comment);
74
75 }
76
77} \ No newline at end of file
diff --git a/external/uk/ac/ox/cs/data/WriteIntoTurtle.java b/external/uk/ac/ox/cs/data/WriteIntoTurtle.java
new file mode 100644
index 0000000..b17e035
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/WriteIntoTurtle.java
@@ -0,0 +1,69 @@
1package uk.ac.ox.cs.data;
2
3import org.semanticweb.simpleETL.SimpleETL;
4
5public class WriteIntoTurtle {
6
7 public void rewriteUOBM(int number) {
8 rewrite(
9 "http://semantics.crl.ibm.com/univ-bench-dl.owl#",
10 "/home/yzhou/krr-nas-share/Yujiao/ontologies/uobm/data/uobm" + number + "_owl",
11 "/home/yzhou/krr-nas-share/Yujiao/ontologies/uobm/data/uobm" + number + ".ttl"
12 );
13 }
14
15 public void rewriteUOBM15() {
16 rewriteUOBM(15);
17 }
18
19 public void rewriteUOBM300() {
20 rewriteUOBM(300);
21 }
22
23 public void testUOBM400() {
24 rewriteUOBM(400);
25 }
26
27 public void rewriteLUBM(int number) {
28 rewrite(
29 "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#",
30 "/home/yzhou/krr-nas-share/Yujiao/ontologies/lubm/data/lubm" + number + "_owl",
31 "/home/yzhou/krr-nas-share/Yujiao/ontologies/lubm/data/lubm" + number + ".ttl"
32 );
33 }
34
35 public void testLUBM900() {
36 rewriteLUBM(900);
37 }
38
39 public static void main(String[] args) {
40// "http://identifiers.org/biomodels.vocabulary#",
41// "/home/yzhou/krr-nas-share/Yujiao/BioModels/sbml2rdfall",
42// "/users/yzhou/ontologies/biomodels");
43
44// "http://www.biopax.org/release/biopax-level3.owl#",
45// "/home/scratch/yzhou/ontologies/bio2rdf/reactome/biopaxrdf",
46// "/home/scratch/yzhou/ontologies/bio2rdf/reactome"
47
48 new WriteIntoTurtle().rewriteUOBM(20);
49
50// args = new String[] {
51// "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#",
52// "/home/yzhou/krr-nas-share/Yujiao/ontologies/lubm/data/lubm400_owl",
53// "/home/yzhou/krr-nas-share/Yujiao/ontologies/lubm/data/lubm400.ttl"
54// };
55//
56// new WriteIntoTurtle().rewrite(args);
57 }
58
59 public void rewrite(String... args) {
60 SimpleETL rewriter = new SimpleETL(args[0], args[1], args[2]);
61
62 try {
63 rewriter.rewrite();
64 } catch (Exception e) {
65 e.printStackTrace();
66 }
67 }
68
69}
diff --git a/external/uk/ac/ox/cs/data/WriteToNTriple.java b/external/uk/ac/ox/cs/data/WriteToNTriple.java
new file mode 100644
index 0000000..27e69b9
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/WriteToNTriple.java
@@ -0,0 +1,57 @@
1package uk.ac.ox.cs.data;
2
3import java.io.FileInputStream;
4import java.io.FileOutputStream;
5import java.io.IOException;
6
7import org.openrdf.model.Statement;
8import org.openrdf.rio.RDFHandler;
9import org.openrdf.rio.RDFHandlerException;
10import org.openrdf.rio.RDFParseException;
11import org.openrdf.rio.RDFParser;
12import org.openrdf.rio.RDFWriter;
13import org.openrdf.rio.ntriples.NTriplesWriter;
14import org.openrdf.rio.turtle.TurtleParser;
15
16
17public class WriteToNTriple {
18
19 public static void main(String... args) throws RDFParseException, RDFHandlerException, IOException {
20 if (args.length == 0)
21 args = new String[] {"/media/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data/data.ttl",
22 "http://www.biopax.org/release/biopax-level3.owl#"};
23
24 RDFParser parser = new TurtleParser();
25 final RDFWriter writer = new NTriplesWriter(new FileOutputStream(args[0].replace(".ttl", ".nt")));
26
27 parser.setRDFHandler(new RDFHandler() {
28
29 @Override
30 public void startRDF() throws RDFHandlerException {
31 writer.startRDF();
32 }
33
34 @Override
35 public void handleStatement(Statement arg0) throws RDFHandlerException {
36 writer.handleStatement(arg0);
37 }
38
39 @Override
40 public void handleNamespace(String arg0, String arg1) throws RDFHandlerException {
41 writer.handleNamespace(arg0, arg1);
42 }
43
44 @Override
45 public void handleComment(String arg0) throws RDFHandlerException {
46 writer.handleComment(arg0);
47 }
48
49 @Override
50 public void endRDF() throws RDFHandlerException {
51 writer.endRDF();
52 }
53 });
54
55 parser.parse(new FileInputStream(args[0]), args[1]);
56 }
57}
diff --git a/external/uk/ac/ox/cs/data/datatype/DataPropertyEliminator.java b/external/uk/ac/ox/cs/data/datatype/DataPropertyEliminator.java
new file mode 100644
index 0000000..95765f3
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/datatype/DataPropertyEliminator.java
@@ -0,0 +1,56 @@
1package uk.ac.ox.cs.data.datatype;
2
3import java.io.File;
4
5import org.semanticweb.owlapi.apibinding.OWLManager;
6import org.semanticweb.owlapi.model.IRI;
7import org.semanticweb.owlapi.model.OWLAxiom;
8import org.semanticweb.owlapi.model.OWLOntology;
9import org.semanticweb.owlapi.model.OWLOntologyCreationException;
10import org.semanticweb.owlapi.model.OWLOntologyManager;
11import org.semanticweb.owlapi.model.OWLOntologyStorageException;
12import uk.ac.ox.cs.pagoda.owl.OWLHelper;
13
14public class DataPropertyEliminator {
15
16 private static final String FLAG = "-minus-datatype";
17
18 public static void main(String[] args) {
19 // for NPD dataset
20// args = "/home/yzhou/ontologies/npd/npd-all.owl".split("\\ ");
21
22 args = "/home/yzhou/ontologies/dbpedia/integratedOntology-all-in-one.owl".split("\\ ");
23
24 String file = args[0];
25 String newFile = file.replace(".owl", FLAG + ".owl");
26
27 OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
28 OWLOntology onto = OWLHelper.loadOntology(manager, file);
29 OWLOntology newOntology;
30
31 try {
32 if (onto.getOntologyID().getOntologyIRI() != null) {
33 String iri = onto.getOntologyID().getOntologyIRI().toString();
34 iri = iri.replace(".owl", FLAG + ".owl");
35 newOntology = manager.createOntology(IRI.create(iri));
36 }
37 else newOntology = manager.createOntology();
38
39 for (OWLOntology o: onto.getImportsClosure())
40 for (OWLAxiom axiom: o.getAxioms()) {
41 if (axiom.getDatatypesInSignature().isEmpty() && axiom.getDataPropertiesInSignature().isEmpty()) {
42 manager.addAxiom(newOntology, axiom);
43 }
44 }
45
46 manager.saveOntology(newOntology, IRI.create(new File(newFile)));
47 }
48 catch (OWLOntologyCreationException e) {
49 e.printStackTrace();
50 } catch (OWLOntologyStorageException e) {
51 e.printStackTrace();
52 }
53
54 }
55
56} \ No newline at end of file
diff --git a/external/uk/ac/ox/cs/data/datatype/DataToObject.java b/external/uk/ac/ox/cs/data/datatype/DataToObject.java
new file mode 100644
index 0000000..90794fd
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/datatype/DataToObject.java
@@ -0,0 +1,932 @@
1package uk.ac.ox.cs.data.datatype;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.FileNotFoundException;
6import java.io.FileOutputStream;
7import java.io.IOException;
8import java.text.Normalizer;
9import java.util.HashSet;
10import java.util.Set;
11
12import org.openrdf.model.Resource;
13import org.openrdf.model.Statement;
14import org.openrdf.model.URI;
15import org.openrdf.model.Value;
16import org.openrdf.model.impl.StatementImpl;
17import org.openrdf.model.impl.URIImpl;
18import org.openrdf.rio.RDFHandler;
19import org.openrdf.rio.RDFHandlerException;
20import org.openrdf.rio.RDFParseException;
21import org.openrdf.rio.RDFWriter;
22import org.openrdf.rio.turtle.TurtleParser;
23import org.openrdf.rio.turtle.TurtleWriter;
24import org.semanticweb.owlapi.apibinding.OWLManager;
25import org.semanticweb.owlapi.model.IRI;
26import org.semanticweb.owlapi.model.OWLAnnotation;
27import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom;
28import org.semanticweb.owlapi.model.OWLAnnotationProperty;
29import org.semanticweb.owlapi.model.OWLAnnotationPropertyDomainAxiom;
30import org.semanticweb.owlapi.model.OWLAnnotationPropertyRangeAxiom;
31import org.semanticweb.owlapi.model.OWLAnonymousIndividual;
32import org.semanticweb.owlapi.model.OWLAsymmetricObjectPropertyAxiom;
33import org.semanticweb.owlapi.model.OWLAxiom;
34import org.semanticweb.owlapi.model.OWLClass;
35import org.semanticweb.owlapi.model.OWLClassAssertionAxiom;
36import org.semanticweb.owlapi.model.OWLClassExpression;
37import org.semanticweb.owlapi.model.OWLDataAllValuesFrom;
38import org.semanticweb.owlapi.model.OWLDataComplementOf;
39import org.semanticweb.owlapi.model.OWLDataExactCardinality;
40import org.semanticweb.owlapi.model.OWLDataFactory;
41import org.semanticweb.owlapi.model.OWLDataHasValue;
42import org.semanticweb.owlapi.model.OWLDataIntersectionOf;
43import org.semanticweb.owlapi.model.OWLDataMaxCardinality;
44import org.semanticweb.owlapi.model.OWLDataMinCardinality;
45import org.semanticweb.owlapi.model.OWLDataOneOf;
46import org.semanticweb.owlapi.model.OWLDataProperty;
47import org.semanticweb.owlapi.model.OWLDataPropertyAssertionAxiom;
48import org.semanticweb.owlapi.model.OWLDataPropertyDomainAxiom;
49import org.semanticweb.owlapi.model.OWLDataPropertyExpression;
50import org.semanticweb.owlapi.model.OWLDataPropertyRangeAxiom;
51import org.semanticweb.owlapi.model.OWLDataRange;
52import org.semanticweb.owlapi.model.OWLDataSomeValuesFrom;
53import org.semanticweb.owlapi.model.OWLDataUnionOf;
54import org.semanticweb.owlapi.model.OWLDatatype;
55import org.semanticweb.owlapi.model.OWLDatatypeDefinitionAxiom;
56import org.semanticweb.owlapi.model.OWLDatatypeRestriction;
57import org.semanticweb.owlapi.model.OWLDeclarationAxiom;
58import org.semanticweb.owlapi.model.OWLDifferentIndividualsAxiom;
59import org.semanticweb.owlapi.model.OWLDisjointClassesAxiom;
60import org.semanticweb.owlapi.model.OWLDisjointDataPropertiesAxiom;
61import org.semanticweb.owlapi.model.OWLDisjointObjectPropertiesAxiom;
62import org.semanticweb.owlapi.model.OWLDisjointUnionAxiom;
63import org.semanticweb.owlapi.model.OWLEntity;
64import org.semanticweb.owlapi.model.OWLEquivalentClassesAxiom;
65import org.semanticweb.owlapi.model.OWLEquivalentDataPropertiesAxiom;
66import org.semanticweb.owlapi.model.OWLEquivalentObjectPropertiesAxiom;
67import org.semanticweb.owlapi.model.OWLFacetRestriction;
68import org.semanticweb.owlapi.model.OWLFunctionalDataPropertyAxiom;
69import org.semanticweb.owlapi.model.OWLFunctionalObjectPropertyAxiom;
70import org.semanticweb.owlapi.model.OWLHasKeyAxiom;
71import org.semanticweb.owlapi.model.OWLIndividual;
72import org.semanticweb.owlapi.model.OWLInverseFunctionalObjectPropertyAxiom;
73import org.semanticweb.owlapi.model.OWLInverseObjectPropertiesAxiom;
74import org.semanticweb.owlapi.model.OWLIrreflexiveObjectPropertyAxiom;
75import org.semanticweb.owlapi.model.OWLLiteral;
76import org.semanticweb.owlapi.model.OWLNamedIndividual;
77import org.semanticweb.owlapi.model.OWLNegativeDataPropertyAssertionAxiom;
78import org.semanticweb.owlapi.model.OWLNegativeObjectPropertyAssertionAxiom;
79import org.semanticweb.owlapi.model.OWLObjectAllValuesFrom;
80import org.semanticweb.owlapi.model.OWLObjectComplementOf;
81import org.semanticweb.owlapi.model.OWLObjectExactCardinality;
82import org.semanticweb.owlapi.model.OWLObjectHasSelf;
83import org.semanticweb.owlapi.model.OWLObjectHasValue;
84import org.semanticweb.owlapi.model.OWLObjectIntersectionOf;
85import org.semanticweb.owlapi.model.OWLObjectInverseOf;
86import org.semanticweb.owlapi.model.OWLObjectMaxCardinality;
87import org.semanticweb.owlapi.model.OWLObjectMinCardinality;
88import org.semanticweb.owlapi.model.OWLObjectOneOf;
89import org.semanticweb.owlapi.model.OWLObjectProperty;
90import org.semanticweb.owlapi.model.OWLObjectPropertyAssertionAxiom;
91import org.semanticweb.owlapi.model.OWLObjectPropertyDomainAxiom;
92import org.semanticweb.owlapi.model.OWLObjectPropertyExpression;
93import org.semanticweb.owlapi.model.OWLObjectPropertyRangeAxiom;
94import org.semanticweb.owlapi.model.OWLObjectSomeValuesFrom;
95import org.semanticweb.owlapi.model.OWLObjectUnionOf;
96import org.semanticweb.owlapi.model.OWLObjectVisitorEx;
97import org.semanticweb.owlapi.model.OWLOntology;
98import org.semanticweb.owlapi.model.OWLOntologyCreationException;
99import org.semanticweb.owlapi.model.OWLOntologyManager;
100import org.semanticweb.owlapi.model.OWLOntologyStorageException;
101import org.semanticweb.owlapi.model.OWLReflexiveObjectPropertyAxiom;
102import org.semanticweb.owlapi.model.OWLSameIndividualAxiom;
103import org.semanticweb.owlapi.model.OWLSubAnnotationPropertyOfAxiom;
104import org.semanticweb.owlapi.model.OWLSubClassOfAxiom;
105import org.semanticweb.owlapi.model.OWLSubDataPropertyOfAxiom;
106import org.semanticweb.owlapi.model.OWLSubObjectPropertyOfAxiom;
107import org.semanticweb.owlapi.model.OWLSubPropertyChainOfAxiom;
108import org.semanticweb.owlapi.model.OWLSymmetricObjectPropertyAxiom;
109import org.semanticweb.owlapi.model.OWLTransitiveObjectPropertyAxiom;
110import org.semanticweb.owlapi.model.SWRLBuiltInAtom;
111import org.semanticweb.owlapi.model.SWRLClassAtom;
112import org.semanticweb.owlapi.model.SWRLDataPropertyAtom;
113import org.semanticweb.owlapi.model.SWRLDataRangeAtom;
114import org.semanticweb.owlapi.model.SWRLDifferentIndividualsAtom;
115import org.semanticweb.owlapi.model.SWRLIndividualArgument;
116import org.semanticweb.owlapi.model.SWRLLiteralArgument;
117import org.semanticweb.owlapi.model.SWRLObjectPropertyAtom;
118import org.semanticweb.owlapi.model.SWRLRule;
119import org.semanticweb.owlapi.model.SWRLSameIndividualAtom;
120import org.semanticweb.owlapi.model.SWRLVariable;
121
122import uk.ac.ox.cs.data.dbpedia.DataFilterRDFHandler;
123import uk.ac.ox.cs.pagoda.owl.OWLHelper;
124import uk.ac.ox.cs.pagoda.util.Utility;
125
126public class DataToObject {
127
128 private static final String FLAG = "-replaced";
129 public static final String PREFIX_LITERAL = "http://www.datatypevalue.org#";
130
131 String m_ontoFile, m_dataFile;
132 String m_newOntoFile, m_newDataFile;
133
134 Set<String> m_dataProperties = new HashSet<String>();
135 String m_prefix;
136
137 public DataToObject(String prefix, String ontoFile, String dataFile) {
138 m_prefix = prefix;
139
140 m_ontoFile = ontoFile;
141 String ext = m_ontoFile.substring(m_ontoFile.lastIndexOf("."));
142 m_newOntoFile = m_ontoFile.replace(ext, FLAG + ext);
143
144 if (dataFile == null || dataFile.isEmpty())
145 m_dataFile = m_newDataFile = null;
146 else {
147 m_dataFile = dataFile;
148 m_newDataFile = m_dataFile.replace(".ttl", FLAG + ".ttl");
149 }
150 }
151
152 public static void main(String[] args) {
153 DataToObject p = new DataToObject(
154// "http://dbpedia.org/ontology/",
155// "/home/yzhou/ontologies/dbpedia/integratedOntology-all-in-one.owl",
156// "/home/yzhou/workspace/payQ/ontologies/dbpedia/dbpedia.ttl");
157
158 // for NPD dataset
159// "http://sws.ifi.uio.no/vocab/npd-all.owl",
160// "/home/yzhou/ontologies/npd/npd-all.owl",
161// "/home/yzhou/ontologies/npd/data/npd-data-dump-processed.ttl");
162
163 // for ChEmBL
164 "http://rdf.ebi.ac.uk/terms/chembl#",
165 "/home/scratch/yzhou/ontologies/bio2rdf/chembl/cco (copy).ttl",
166 null);
167
168 p.processOntology();
169 Utility.logInfo("Ontology Processing DONE.");
170
171 p.processData();
172 Utility.logInfo("Data Processing DONE."); }
173
174 public void setOutputOntologyFile(String file) {
175 m_newOntoFile = file;
176 }
177
178 public void setOutputDataFile(String file) {
179 m_newDataFile = file;
180 }
181
182 public String processData() {
183 if (m_dataFile == null)
184 return null;
185
186 TurtleParser parser = new TurtleParser();
187 TurtleWriter writer;
188 try {
189 writer = new TurtleWriter(new FileOutputStream(m_newDataFile));
190 } catch (FileNotFoundException e) {
191 e.printStackTrace();
192 new File(m_newDataFile).delete();
193 return null;
194 }
195
196 parser.setRDFHandler(new DataToObjectRDFHandler(writer, m_dataProperties));
197 try {
198 parser.parse(new FileInputStream(m_dataFile), m_prefix);
199 } catch (RDFParseException e) {
200 e.printStackTrace();
201 } catch (RDFHandlerException e) {
202 e.printStackTrace();
203 } catch (IOException e) {
204 e.printStackTrace();
205 }
206
207 return m_newDataFile;
208 }
209
210 public String processOntology() {
211 OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
212 OWLOntology newOntology, oldOntology;
213 oldOntology = OWLHelper.loadOntology(manager, m_ontoFile);
214 for (OWLDataProperty property: oldOntology.getDataPropertiesInSignature())
215 m_dataProperties.add(property.toStringID());
216
217 DataToObjectVisitor visitor = new DataToObjectVisitor(manager);
218 newOntology = (OWLOntology) oldOntology.accept(visitor);
219
220 try {
221 manager.saveOntology(newOntology, IRI.create(new File(m_newOntoFile)));
222 } catch (OWLOntologyStorageException e) {
223 e.printStackTrace();
224 }
225
226 return m_newOntoFile;
227 }
228
229 protected class DataToObjectVisitor implements OWLObjectVisitorEx<Object> {
230
231 private final OWLOntologyManager m_manager;
232 private final OWLDataFactory m_factory;
233
234 public DataToObjectVisitor(OWLOntologyManager man) {
235 m_manager = man;
236 m_factory = man.getOWLDataFactory();
237 }
238
239 private void reportUnsupportedFeature() {
240 Utility.logError("Unsupported features");
241 }
242
243 @Override
244 public Object visit(OWLDataProperty property) {
245 return m_factory.getOWLObjectProperty(property.getIRI());
246 }
247
248 @Override
249 public Object visit(OWLObjectOneOf ce) {
250 return ce;
251 }
252
253 @Override
254 public Object visit(OWLDataHasValue node) {
255 return m_factory.getOWLObjectHasValue(
256 (OWLObjectPropertyExpression) node.getProperty().accept(this),
257 (OWLIndividual) node.getValue().accept(this)
258 );
259 }
260
261 @Override
262 public Object visit(OWLDataSomeValuesFrom node) {
263 OWLClassExpression exp = null;
264 try {
265 exp = m_factory.getOWLObjectSomeValuesFrom(
266 (OWLObjectPropertyExpression) node.getProperty().accept(this),
267 (OWLClassExpression) node.getFiller().accept(this)
268 );
269 return exp;
270 } catch (Exception e) {
271 e.printStackTrace();
272 }
273 return node;
274 }
275
276 @Override
277 public Object visit(OWLDataIntersectionOf node) {
278 Set<OWLClassExpression> exps = new HashSet<OWLClassExpression>();
279 for (OWLDataRange range: node.getOperands())
280 exps.add((OWLClassExpression) range.accept(this));
281
282 return m_factory.getOWLObjectIntersectionOf(exps);
283 }
284
285 @Override
286 public Object visit(OWLSubDataPropertyOfAxiom axiom) {
287 return m_factory.getOWLSubObjectPropertyOfAxiom(
288 (OWLObjectPropertyExpression) axiom.getSubProperty().accept(this),
289 (OWLObjectPropertyExpression) axiom.getSuperProperty().accept(this));
290 }
291
292 @Override
293 public Object visit(OWLEquivalentDataPropertiesAxiom axiom) {
294 Set<OWLObjectPropertyExpression> props = new HashSet<OWLObjectPropertyExpression>();
295 for (OWLDataPropertyExpression dataProperty: axiom.getProperties())
296 props.add((OWLObjectPropertyExpression) dataProperty.accept(this));
297 return m_factory.getOWLEquivalentObjectPropertiesAxiom(props);
298 }
299
300 @Override
301 public Object visit(OWLTransitiveObjectPropertyAxiom axiom) {
302 return axiom;
303 }
304
305 @Override
306 public Object visit(OWLReflexiveObjectPropertyAxiom axiom) {
307 return axiom;
308 }
309
310 @Override
311 public Object visit(OWLDataPropertyDomainAxiom axiom) {
312 return m_factory.getOWLObjectPropertyDomainAxiom(
313 (OWLObjectPropertyExpression) axiom.getProperty().accept(this),
314 (OWLClassExpression) axiom.getDomain().accept(this)
315 );
316 }
317
318 @Override
319 public Object visit(OWLDataPropertyRangeAxiom axiom) {
320 return m_factory.getOWLObjectPropertyRangeAxiom(
321 (OWLObjectPropertyExpression) axiom.getProperty().accept(this),
322 (OWLClassExpression) axiom.getRange().accept(this)
323 );
324 }
325
326 @Override
327 public Object visit(OWLDataPropertyAssertionAxiom axiom) {
328 return m_factory.getOWLObjectPropertyAssertionAxiom(
329 (OWLObjectPropertyExpression) axiom.getProperty().accept(this),
330 axiom.getSubject(),
331 (OWLIndividual) axiom.getObject().accept(this)
332 );
333 }
334
335 @Override
336 public Object visit(OWLNegativeDataPropertyAssertionAxiom axiom) {
337 return m_factory.getOWLNegativeObjectPropertyAssertionAxiom(
338 (OWLObjectPropertyExpression) axiom.getProperty().accept(this),
339 axiom.getSubject(),
340 (OWLIndividual) axiom.getObject().accept(this)
341 );
342 }
343
344 @Override
345 public Object visit(OWLNegativeObjectPropertyAssertionAxiom axiom) {
346 return axiom;
347 }
348
349 @Override
350 public Object visit(OWLFunctionalDataPropertyAxiom axiom) {
351 return m_factory.getOWLFunctionalObjectPropertyAxiom(
352 (OWLObjectPropertyExpression) axiom.getProperty().accept(this)
353 );
354 }
355
356 @Override
357 public Object visit(OWLHasKeyAxiom axiom) {
358 Set<OWLObjectPropertyExpression> props = new HashSet<OWLObjectPropertyExpression>(axiom.getObjectPropertyExpressions());
359 for (OWLDataPropertyExpression dataProperty: axiom.getDataPropertyExpressions())
360 props.add((OWLObjectPropertyExpression) dataProperty.accept(this));
361 return m_factory.getOWLHasKeyAxiom(
362 (OWLClassExpression) axiom.getClassExpression().accept(this),
363 props
364 );
365 }
366
367
368 @Override
369 public Object visit(OWLObjectHasSelf node) {
370 return node;
371 }
372
373
374 @Override
375 public Object visit(OWLDataOneOf node) {
376 Set<OWLIndividual> individuals = new HashSet<OWLIndividual>();
377 for (OWLLiteral literal: node.getValues())
378 individuals.add((OWLIndividual) literal.accept(this));
379 return m_factory.getOWLObjectOneOf(individuals);
380 }
381
382
383
384 @Override
385 public Object visit(OWLSubPropertyChainOfAxiom axiom) {
386 return axiom;
387 }
388
389 @Override
390 public Object visit(OWLOntology ontology) {
391 OWLOntology newOntology = null;
392 try {
393 if (ontology.getOntologyID().getOntologyIRI() != null) {
394 String ontologyIRI = ontology.getOntologyID().getOntologyIRI().toString();
395 if (ontologyIRI.contains(".owl"))
396 ontologyIRI = ontologyIRI.replace(".owl", FLAG + ".owl");
397 else
398 ontologyIRI += FLAG;
399
400 newOntology = m_manager.createOntology(IRI.create(ontologyIRI));
401 }
402 else newOntology = m_manager.createOntology();
403
404 for (OWLOntology onto: ontology.getImportsClosure())
405 for (OWLAxiom axiom: onto.getAxioms()) {
406 OWLAxiom newAxiom = (OWLAxiom) axiom.accept(this);
407 m_manager.addAxiom(newOntology, newAxiom);
408 }
409
410 } catch (OWLOntologyCreationException e) {
411 e.printStackTrace();
412 }
413
414 return newOntology;
415 }
416
417 @Override
418 public Object visit(OWLSubClassOfAxiom axiom) {
419 return m_factory.getOWLSubClassOfAxiom(
420 (OWLClassExpression) axiom.getSubClass().accept(this),
421 (OWLClassExpression) axiom.getSuperClass().accept(this)
422 );
423 }
424
425 @Override
426 public Object visit(OWLAsymmetricObjectPropertyAxiom axiom) {
427 return axiom;
428 }
429
430 @Override
431 public Object visit(OWLDisjointClassesAxiom axiom) {
432 Set<OWLClassExpression> exps = new HashSet<OWLClassExpression>();
433 for (OWLClassExpression exp: axiom.getClassExpressions())
434 exps.add((OWLClassExpression) exp.accept(this));
435 return m_factory.getOWLDisjointClassesAxiom(exps);
436 }
437
438 @Override
439 public Object visit(OWLObjectPropertyDomainAxiom axiom) {
440 return m_factory.getOWLObjectPropertyDomainAxiom(
441 axiom.getProperty(),
442 (OWLClassExpression) axiom.getDomain().accept(this)
443 );
444 }
445
446 @Override
447 public Object visit(OWLEquivalentObjectPropertiesAxiom axiom) {
448 return axiom;
449 }
450
451 @Override
452 public Object visit(OWLDifferentIndividualsAxiom axiom) {
453 return axiom;
454 }
455
456 @Override
457 public Object visit(OWLDisjointDataPropertiesAxiom axiom) {
458 Set<OWLObjectPropertyExpression> props = new HashSet<OWLObjectPropertyExpression>();
459 for (OWLDataPropertyExpression dataProperty: axiom.getProperties())
460 props.add((OWLObjectPropertyExpression) dataProperty.accept(this));
461 return m_factory.getOWLDisjointObjectPropertiesAxiom(props);
462 }
463
464 @Override
465 public Object visit(OWLDisjointObjectPropertiesAxiom axiom) {
466 return axiom;
467 }
468
469 @Override
470 public Object visit(OWLObjectPropertyRangeAxiom axiom) {
471 return axiom;
472 }
473
474 @Override
475 public Object visit(OWLObjectPropertyAssertionAxiom axiom) {
476 return axiom;
477 }
478
479 @Override
480 public Object visit(OWLFunctionalObjectPropertyAxiom axiom) {
481 return axiom;
482 }
483
484 @Override
485 public Object visit(OWLSubObjectPropertyOfAxiom axiom) {
486 return axiom;
487 }
488
489 @Override
490 public Object visit(OWLDisjointUnionAxiom axiom) {
491 Set<OWLClassExpression> exps = new HashSet<OWLClassExpression>();
492 for (OWLClassExpression exp: axiom.getClassExpressions())
493 exps.add((OWLClassExpression) exp.accept(this));
494 return m_factory.getOWLDisjointUnionAxiom((OWLClass) axiom.getOWLClass().accept(this), exps);
495 }
496
497 @Override
498 public Object visit(OWLDeclarationAxiom axiom) {
499 OWLEntity entity = axiom.getEntity();
500 if (entity instanceof OWLDataProperty)
501 return m_factory.getOWLDeclarationAxiom(m_factory.getOWLObjectProperty(entity.getIRI()));
502 else if (entity instanceof OWLDatatype)
503 return m_factory.getOWLDeclarationAxiom((OWLClass) entity.accept(this));
504 else
505 return axiom;
506 }
507
508 @Override
509 public Object visit(OWLAnnotationAssertionAxiom axiom) {
510 return axiom;
511 }
512
513 @Override
514 public Object visit(OWLSymmetricObjectPropertyAxiom axiom) {
515 return axiom;
516 }
517
518 @Override
519 public Object visit(OWLClassAssertionAxiom axiom) {
520 return m_factory.getOWLClassAssertionAxiom(
521 (OWLClassExpression) axiom.getClassExpression().accept(this),
522 axiom.getIndividual());
523 }
524
525 @Override
526 public Object visit(OWLEquivalentClassesAxiom axiom) {
527 Set<OWLClassExpression> exps = new HashSet<OWLClassExpression>();
528 for (OWLClassExpression exp: axiom.getClassExpressions())
529 exps.add((OWLClassExpression) exp.accept(this));
530 return m_factory.getOWLEquivalentClassesAxiom(exps);
531 }
532
533 @Override
534 public Object visit(OWLIrreflexiveObjectPropertyAxiom axiom) {
535 return axiom;
536 }
537
538 @Override
539 public Object visit(OWLInverseFunctionalObjectPropertyAxiom axiom) {
540 return axiom;
541 }
542
543 @Override
544 public Object visit(OWLSameIndividualAxiom axiom) {
545 return axiom;
546 }
547
548 @Override
549 public Object visit(OWLInverseObjectPropertiesAxiom axiom) {
550 return axiom;
551 }
552
553 @Override
554 public Object visit(OWLDatatypeDefinitionAxiom axiom) {
555 reportUnsupportedFeature();
556 return null;
557 }
558
559 @Override
560 public Object visit(SWRLRule rule) {
561 reportUnsupportedFeature();
562 return null;
563 }
564
565 @Override
566 public Object visit(OWLSubAnnotationPropertyOfAxiom axiom) {
567 return axiom;
568 }
569
570 @Override
571 public Object visit(OWLAnnotationPropertyDomainAxiom axiom) {
572 return axiom;
573 }
574
575 @Override
576 public Object visit(OWLAnnotationPropertyRangeAxiom axiom) {
577 return axiom;
578 }
579
580 @Override
581 public Object visit(OWLClass ce) {
582 return ce;
583 }
584
585 @Override
586 public Object visit(OWLObjectIntersectionOf ce) {
587 Set<OWLClassExpression> exps = new HashSet<OWLClassExpression>();
588 for (OWLClassExpression exp: ce.getOperands())
589 exps.add((OWLClassExpression) exp.accept(this));
590 return m_factory.getOWLObjectIntersectionOf(exps);
591 }
592
593 @Override
594 public Object visit(OWLObjectUnionOf ce) {
595 Set<OWLClassExpression> exps = new HashSet<OWLClassExpression>();
596 for (OWLClassExpression exp: ce.getOperands())
597 exps.add((OWLClassExpression) exp.accept(this));
598 return m_factory.getOWLObjectUnionOf(exps);
599 }
600
601 @Override
602 public Object visit(OWLObjectComplementOf ce) {
603 return m_factory.getOWLObjectComplementOf((OWLClassExpression) ce.getOperand().accept(this));
604 }
605
606 @Override
607 public Object visit(OWLObjectSomeValuesFrom ce) {
608 return m_factory.getOWLObjectSomeValuesFrom(ce.getProperty(), (OWLClassExpression) ce.getFiller().accept(this));
609 }
610
611 @Override
612 public Object visit(OWLObjectAllValuesFrom ce) {
613 return m_factory.getOWLObjectAllValuesFrom(ce.getProperty(), (OWLClassExpression) ce.getFiller().accept(this));
614 }
615
616 @Override
617 public Object visit(OWLObjectHasValue ce) {
618 return ce;
619 }
620
621 @Override
622 public Object visit(OWLObjectMinCardinality ce) {
623 if (ce.getFiller().equals(m_factory.getOWLThing()))
624 return ce;
625 else
626 return m_factory.getOWLObjectMinCardinality(
627 ce.getCardinality(),
628 ce.getProperty(),
629 (OWLClassExpression) ce.getFiller().accept(this)
630 );
631 }
632
633 @Override
634 public Object visit(OWLObjectExactCardinality ce) {
635 if (ce.getFiller().equals(m_factory.getOWLThing()))
636 return ce;
637 else
638 return m_factory.getOWLObjectExactCardinality(
639 ce.getCardinality(),
640 ce.getProperty(),
641 (OWLClassExpression) ce.getFiller().accept(this)
642 );
643 }
644
645 @Override
646 public Object visit(OWLObjectMaxCardinality ce) {
647 if (ce.getFiller().equals(m_factory.getOWLThing()))
648 return ce;
649 else
650 return m_factory.getOWLObjectMaxCardinality(
651 ce.getCardinality(),
652 ce.getProperty(),
653 (OWLClassExpression) ce.getFiller().accept(this)
654 );
655 }
656
657 @Override
658 public Object visit(OWLDataAllValuesFrom ce) {
659 return m_factory.getOWLObjectAllValuesFrom(
660 (OWLObjectPropertyExpression) ce.getProperty().accept(this),
661 (OWLClassExpression) ce.getFiller().accept(this)
662 );
663 }
664
665 @Override
666 public Object visit(OWLDataMinCardinality ce) {
667 if (ce.getFiller().equals(m_factory.getTopDatatype()))
668 return m_factory.getOWLObjectMinCardinality(
669 ce.getCardinality(),
670 (OWLObjectPropertyExpression) ce.getProperty().accept(this)
671 );
672 else
673 return m_factory.getOWLObjectMinCardinality(
674 ce.getCardinality(),
675 (OWLObjectPropertyExpression) ce.getProperty().accept(this),
676 (OWLClassExpression) ce.getFiller().accept(this)
677 );
678 }
679
680 @Override
681 public Object visit(OWLDataExactCardinality ce) {
682 if (ce.getFiller().equals(m_factory.getTopDatatype()))
683 return m_factory.getOWLObjectExactCardinality(
684 ce.getCardinality(),
685 (OWLObjectPropertyExpression) ce.getProperty().accept(this)
686 );
687 else
688 return m_factory.getOWLObjectExactCardinality(
689 ce.getCardinality(),
690 (OWLObjectPropertyExpression) ce.getProperty().accept(this),
691 (OWLClassExpression) ce.getFiller().accept(this)
692 );
693 }
694
695 @Override
696 public Object visit(OWLDataMaxCardinality ce) {
697 if (ce.getFiller().equals(m_factory.getTopDatatype()))
698 return m_factory.getOWLObjectMaxCardinality(
699 ce.getCardinality(),
700 (OWLObjectPropertyExpression) ce.getProperty().accept(this)
701 );
702 else
703 return m_factory.getOWLObjectMaxCardinality(
704 ce.getCardinality(),
705 (OWLObjectPropertyExpression) ce.getProperty().accept(this),
706 (OWLClassExpression) ce.getFiller().accept(this)
707 );
708 }
709
710 @Override
711 public Object visit(OWLDatatype node) {
712 return m_factory.getOWLClass(node.getIRI());
713 }
714
715 @Override
716 public Object visit(OWLDataComplementOf node) {
717 return m_factory.getOWLObjectComplementOf(
718 (OWLClassExpression) node.getDataRange().accept(this)
719 );
720 }
721
722 /* (non-Javadoc)
723 * @see org.semanticweb.owlapi.model.OWLDataVisitorEx#visit(org.semanticweb.owlapi.model.OWLDataUnionOf)
724 */
725 @Override
726 public Object visit(OWLDataUnionOf node) {
727 Set<OWLClassExpression> exps = new HashSet<OWLClassExpression>();
728 for (OWLDataRange range: node.getOperands())
729 exps.add((OWLClassExpression) range.accept(this));
730 return m_factory.getOWLObjectUnionOf(exps);
731 }
732
733 @Override
734 public Object visit(OWLDatatypeRestriction node) {
735 reportUnsupportedFeature();
736 return null;
737 }
738
739 @Override
740 public Object visit(OWLLiteral node) {
741 String name = PREFIX_LITERAL + node.getLiteral() + getTypeTag(node.getDatatype());
742 return m_factory.getOWLNamedIndividual(IRI.create(name));
743 }
744
745 private String getTypeTag(OWLDatatype datatype) {
746 if (datatype.isBoolean()) return "_boolean";
747 if (datatype.isDouble()) return "_double";
748 if (datatype.isFloat()) return "_float";
749 if (datatype.isInteger()) return "_integer";
750 if (datatype.isRDFPlainLiteral()) return "_plain";
751 if (datatype.isString()) return "_string";
752 return null;
753 }
754
755 @Override
756 public Object visit(OWLFacetRestriction node) {
757 reportUnsupportedFeature();
758 return null;
759 }
760
761 @Override
762 public Object visit(OWLObjectProperty property) {
763 return property;
764 }
765
766 @Override
767 public Object visit(OWLObjectInverseOf property) {
768 return property;
769 }
770
771 @Override
772 public Object visit(OWLNamedIndividual individual) {
773 return individual;
774 }
775
776 @Override
777 public Object visit(OWLAnnotationProperty property) {
778 return property;
779 }
780
781 @Override
782 public Object visit(OWLAnnotation node) {
783 return node;
784 }
785
786 @Override
787 public Object visit(IRI iri) {
788 return iri;
789 }
790
791 @Override
792 public Object visit(OWLAnonymousIndividual individual) {
793 return individual;
794 }
795
796 @Override
797 public Object visit(SWRLClassAtom node) {
798 reportUnsupportedFeature();
799 return null;
800 }
801
802 @Override
803 public Object visit(SWRLDataRangeAtom node) {
804 reportUnsupportedFeature();
805 return null;
806 }
807
808 @Override
809 public Object visit(SWRLObjectPropertyAtom node) {
810 reportUnsupportedFeature();
811 return null;
812 }
813
814 @Override
815 public Object visit(SWRLDataPropertyAtom node) {
816 reportUnsupportedFeature();
817 return null;
818 }
819
820 @Override
821 public Object visit(SWRLBuiltInAtom node) {
822 reportUnsupportedFeature();
823 return null;
824 }
825
826 @Override
827 public Object visit(SWRLVariable node) {
828 reportUnsupportedFeature();
829 return null;
830 }
831
832 @Override
833 public Object visit(SWRLIndividualArgument node) {
834 reportUnsupportedFeature();
835 return null;
836 }
837
838 @Override
839 public Object visit(SWRLLiteralArgument node) {
840 reportUnsupportedFeature();
841 return null;
842 }
843
844 @Override
845 public Object visit(SWRLSameIndividualAtom node) {
846 reportUnsupportedFeature();
847 return null;
848 }
849
850 @Override
851 public Object visit(SWRLDifferentIndividualsAtom node) {
852 reportUnsupportedFeature();
853 return null;
854 }
855 }
856
857 protected class DataToObjectRDFHandler implements RDFHandler {
858
859 RDFWriter m_writer;
860 Set<String> m_properties;
861 DataToObjectVisitor m_visitor;
862
863 public DataToObjectRDFHandler(TurtleWriter writer, Set<String> dataProperties) {
864 m_writer = writer;
865 m_properties = dataProperties;
866 }
867
868 @Override
869 public void endRDF() throws RDFHandlerException {
870 m_writer.endRDF();
871 }
872
873 @Override
874 public void handleComment(String arg0) throws RDFHandlerException {
875 m_writer.handleComment(arg0);
876 }
877
878 @Override
879 public void handleNamespace(String arg0, String arg1) throws RDFHandlerException {
880 m_writer.handleNamespace(arg0, arg1);
881 }
882
883 @Override
884 public void handleStatement(Statement arg0) throws RDFHandlerException {
885 URI predicate = arg0.getPredicate();
886 Resource subject = arg0.getSubject();
887 Value object = arg0.getObject();
888
889 if (subject instanceof URI) {
890 String newSubject = Normalizer.normalize(arg0.getSubject().toString(), Normalizer.Form.NFKC);
891 if (!isValidIRI(newSubject)) {
892 return ;
893 }
894 else subject = new URIImpl(newSubject);
895 }
896
897 if (m_properties.contains(predicate.toString()) || object.toString().contains("\"^^")) {
898 String newObject = Normalizer.normalize(getIndividual(object.toString()), Normalizer.Form.NFKC);
899 if (!isValidIRI(newObject)) {
900 return ;
901 }
902
903 m_writer.handleStatement(new StatementImpl(subject, predicate, new URIImpl(newObject)));
904 }
905 else
906 m_writer.handleStatement(arg0);
907 }
908
909 private boolean isValidIRI(String newSubject) {
910 org.apache.jena.iri.IRI iri;
911 try {
912 iri = DataFilterRDFHandler.iriFactory.construct(newSubject);
913 if (iri.hasViolation(true)) return false;
914 } catch (org.apache.jena.iri.IRIException e) {
915 return false;
916 }
917 return true;
918 }
919
920 private String getIndividual(String s) {
921 if (s.startsWith("_:")) return s;
922 int left = s.indexOf("\""), right = s.lastIndexOf("\"");
923 return PREFIX_LITERAL + s.substring(left + 1, right).replace(' ', '-');
924 }
925
926 @Override
927 public void startRDF() throws RDFHandlerException {
928 m_writer.startRDF();
929 }
930
931 }
932}
diff --git a/external/uk/ac/ox/cs/data/dbpedia/DataFilter.java b/external/uk/ac/ox/cs/data/dbpedia/DataFilter.java
new file mode 100644
index 0000000..dc2f3e0
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/dbpedia/DataFilter.java
@@ -0,0 +1,68 @@
1package uk.ac.ox.cs.data.dbpedia;
2
3import java.io.FileInputStream;
4import java.io.FileNotFoundException;
5import java.io.FileOutputStream;
6import java.io.IOException;
7import java.util.HashSet;
8import java.util.Set;
9
10import org.openrdf.rio.RDFHandlerException;
11import org.openrdf.rio.RDFParseException;
12import org.openrdf.rio.turtle.TurtleParser;
13import org.openrdf.rio.turtle.TurtleWriter;
14import org.semanticweb.owlapi.model.OWLAnnotationProperty;
15import org.semanticweb.owlapi.model.OWLDataProperty;
16import org.semanticweb.owlapi.model.OWLOntology;
17import uk.ac.ox.cs.pagoda.owl.OWLHelper;
18
19public class DataFilter {
20
21 public static void main(String[] args) throws FileNotFoundException {
22 filteringDBPedia();
23 }
24
25 /**
26 * Filter out data property assertions and annotation property assertions in the data set.
27 *
28 * @throws FileNotFoundException
29 */
30 private static void filteringDBPedia() throws FileNotFoundException {
31 String[] args = (
32// "/home/yzhou/ontologies/npd/npd-all.owl " +
33// "/home/yzhou/ontologies/npd/data/npd-data-dump-processed.ttl " +
34// "/home/yzhou/ontologies/npd/data/npd-data-dump-minus-datatype-new.ttl " +
35// "http://sws.ifi.uio.no/vocab/npd-all.owl#"
36
37 "/media/RDFData/yzhou/dbpedia/integratedOntology.owl " +
38 "/media/RDFData/yzhou/dbpedia/data/dbpedia-processed.ttl " +
39 "/home/yzhou/ontologies/dbpedia/data/dbpedia-minus-datatype-new.ttl " +
40 "http://dbpedia.org/ontology/"
41 ).split("\\ ");
42
43
44 OWLOntology ontology = OWLHelper.loadOntology(args[0]);
45
46 Set<String> properties2ignore = new HashSet<String>();
47 for (OWLDataProperty prop: ontology.getDataPropertiesInSignature(true))
48 properties2ignore.add(prop.toStringID());
49 for (OWLAnnotationProperty prop: ontology.getAnnotationPropertiesInSignature())
50 properties2ignore.add(prop.toStringID());
51
52 TurtleParser parser = new TurtleParser();
53 TurtleWriter writer = new TurtleWriter(new FileOutputStream(args[2]));
54
55 parser.setRDFHandler(new DataFilterRDFHandler(writer, properties2ignore));
56 try {
57 parser.parse(new FileInputStream(args[1]), args[3]);
58 } catch (RDFParseException e) {
59 e.printStackTrace();
60 } catch (RDFHandlerException e) {
61 e.printStackTrace();
62 } catch (IOException e) {
63 e.printStackTrace();
64 }
65
66 }
67
68}
diff --git a/external/uk/ac/ox/cs/data/dbpedia/DataFilterRDFHandler.java b/external/uk/ac/ox/cs/data/dbpedia/DataFilterRDFHandler.java
new file mode 100644
index 0000000..6dbac91
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/dbpedia/DataFilterRDFHandler.java
@@ -0,0 +1,116 @@
1package uk.ac.ox.cs.data.dbpedia;
2
3import java.text.Normalizer;
4import java.util.Set;
5
6import org.apache.jena.iri.IRI;
7import org.apache.jena.iri.IRIException;
8import org.apache.jena.iri.IRIFactory;
9
10import org.openrdf.model.BNode;
11import org.openrdf.model.Resource;
12import org.openrdf.model.URI;
13import org.openrdf.model.Value;
14import org.openrdf.model.Literal;
15import org.openrdf.model.Statement;
16import org.openrdf.model.impl.StatementImpl;
17import org.openrdf.model.impl.URIImpl;
18import org.openrdf.rio.RDFHandler;
19import org.openrdf.rio.RDFHandlerException;
20import org.openrdf.rio.RDFWriter;
21
22public class DataFilterRDFHandler implements RDFHandler {
23
24 public static IRIFactory iriFactory = IRIFactory.semanticWebImplementation();
25
26 RDFWriter m_writer;
27 Set<String> m_properties;
28
29 public DataFilterRDFHandler(RDFWriter writer, Set<String> properties2ignore) {
30 m_writer = writer;
31 m_properties = properties2ignore;
32 }
33
34 @Override
35 public void endRDF() throws RDFHandlerException {
36 m_writer.endRDF();
37 }
38
39 @Override
40 public void handleComment(String arg0) throws RDFHandlerException {
41 m_writer.handleComment(arg0);
42 }
43
44 @Override
45 public void handleNamespace(String arg0, String arg1) throws RDFHandlerException {
46 m_writer.handleNamespace(arg0, arg1);
47 }
48
49 @Override
50 public void handleStatement(Statement arg0) throws RDFHandlerException {
51 Value newObject = null, oldObject = arg0.getObject();
52
53 if (oldObject instanceof Literal)
54 return ;
55 else if (oldObject instanceof BNode) {
56 newObject = oldObject;
57 }
58 else if (oldObject instanceof URI)
59 newObject = new URIImpl(Normalizer.normalize(oldObject.toString(), Normalizer.Form.NFKC));
60 else {
61 System.out.println("Object: " + oldObject.getClass());
62 }
63
64 String predicate = arg0.getPredicate().toString();
65 if (m_properties.contains(predicate)) return ;
66
67 Resource newSubject = null, oldSubject = arg0.getSubject();
68
69 if (oldSubject instanceof BNode) {
70 newSubject = oldSubject;
71 }
72 else if (oldSubject instanceof URI) {
73 newSubject = new URIImpl(Normalizer.normalize(oldSubject.toString(), Normalizer.Form.NFKC));
74 }
75 else {
76 System.out.println("Subject: " + oldSubject.getClass());
77 }
78
79// if (newObject.toString().contains("ns#type"))
80// System.out.println(arg0);
81
82 if (newSubject == null || newObject == null) {
83 System.out.println(arg0);
84 return ;
85 }
86
87 IRI subjectIRI, objectIRI;
88 try {
89 if (newSubject instanceof URI){
90 subjectIRI = iriFactory.construct(newSubject.toString());
91 if (subjectIRI.hasViolation(true)) {
92 System.out.println(arg0);
93 return ;
94 }
95 }
96 if (newObject instanceof URI) {
97 objectIRI = iriFactory.construct(newObject.toString());
98 if (objectIRI.hasViolation(true)) {
99 System.out.println(arg0);
100 return ;
101 }
102 }
103
104 } catch (IRIException e) {
105 return ;
106 }
107
108 m_writer.handleStatement(new StatementImpl(newSubject, arg0.getPredicate(), newObject));
109 }
110
111 @Override
112 public void startRDF() throws RDFHandlerException {
113 m_writer.startRDF();
114 }
115
116}
diff --git a/external/uk/ac/ox/cs/data/dbpedia/Normaliser.java b/external/uk/ac/ox/cs/data/dbpedia/Normaliser.java
new file mode 100644
index 0000000..e025604
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/dbpedia/Normaliser.java
@@ -0,0 +1,155 @@
1package uk.ac.ox.cs.data.dbpedia;
2
3import java.io.BufferedReader;
4import java.io.BufferedWriter;
5import java.io.FileInputStream;
6import java.io.FileOutputStream;
7import java.io.IOException;
8import java.io.InputStreamReader;
9import java.io.OutputStreamWriter;
10import java.text.Normalizer;
11import java.util.HashMap;
12import java.util.HashSet;
13import java.util.Map;
14import java.util.Set;
15import java.util.regex.Pattern;
16
17public class Normaliser {
18
19 public static void main(String[] args) throws IOException {
20 if (args.length == 0) {
21 args = new String[] {
22 "/home/yzhou/ontologies/npd/npd-data-dump-minus-datatype.ttl",
23 "1"
24 };
25 }
26
27 BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
28 String fragment = args[0];
29 int size = Integer.valueOf(args[1]), index;
30
31 if ((index = fragment.lastIndexOf(".")) != -1) {
32 fragment = fragment.substring(0, index) + "_new_fragment" + args[1] + fragment.substring(index);
33 }
34 else fragment += "_fragment" + args[1];
35
36 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fragment)));
37
38// simpleProcess(reader, writer, size);
39 process(reader, writer, size);
40
41 writer.close();
42 reader.close();
43 }
44
45 public static void simpleProcess(BufferedReader reader, BufferedWriter writer, int size) throws IOException {
46 String line;
47 int index = 0;
48 while ((line = reader.readLine()) != null) {
49 if (++index == size) {
50 index = 0;
51 writer.write(line);
52 writer.newLine();
53 }
54 }
55 }
56
57 static final String illegalSymbols = ",()'‘";
58 static final String[][] replacedSymbols = new String[][] {
59 {"æ", "ae"},
60 {"ø", "o"},
61 {"ß", "t"},
62 {"Ł", "L"},
63 {"ı", "i"},
64 {"ł", "l"},
65 {"–", "-"},
66 {"&", "and"},
67 {"ð", "o"},
68 {"ə", "e"},
69 {"Đ", "D"},
70 {"ħ", "h"},
71// {"%60", "_"},
72 {"đ", "d"},
73 {"Þ", "P"}
74 };
75
76 static Set<Character> symbols2remove;
77 static Map<Character, String> symbols2replace;
78
79 static {
80 symbols2remove = new HashSet<Character>();
81 for (int i = 0; i < illegalSymbols.length(); ++i)
82 symbols2remove.add(illegalSymbols.charAt(i));
83
84 symbols2replace = new HashMap<Character, String>();
85 for (int i = 0; i < replacedSymbols.length; ++i)
86 symbols2replace.put(replacedSymbols[i][0].charAt(0), replacedSymbols[i][1]);
87 }
88
89 static final String urlSymbols = "http://";
90 static final int urlSymbolLength = 7;
91
92 public static void process(BufferedReader reader, BufferedWriter writer, int size) throws IOException {
93 int index = 0;
94 String line;
95
96 String newLine;
97 while ((line = reader.readLine()) != null) {
98 if (line.contains("@"))
99 continue;
100
101 if (++index == size) {
102 newLine = process(line);
103 writer.write(deAccent(newLine.toString()));
104 writer.write('.');
105 writer.newLine();
106 index = 0;
107 }
108 }
109
110 writer.close();
111 reader.close();
112 }
113
114 private static String process(String line) {
115 line = line.replace("%60", "_");//.replace("__", "_");
116
117 int inURL = 0;
118 char ch;
119 String str;
120 StringBuilder newLine = new StringBuilder();
121 for (int i = 0; i < line.length(); ++i) {
122 ch = line.charAt(i);
123
124 if (ch == '.') {
125 if (inURL == urlSymbolLength)
126 newLine.append('.');
127 continue;
128 }
129
130 if (inURL == urlSymbolLength) {
131 if (ch == '/' || ch == '#' || ch == ')' || ch == '>') inURL = 0;
132 }
133 else if (ch == urlSymbols.charAt(inURL)) {
134 ++inURL;
135 }
136 else inURL = 0;
137
138 if ((str = symbols2replace.get(ch)) != null)
139 newLine.append(str);
140 else if (!symbols2remove.contains(ch))
141 newLine.append(ch);
142 }
143
144 return newLine.toString();
145 }
146
147 public static String deAccent(String str) {
148 String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD);
149 Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
150 String t = pattern.matcher(nfdNormalizedString).replaceAll("");
151 return t;
152 }
153
154
155}
diff --git a/external/uk/ac/ox/cs/data/sample/DataSampling.java b/external/uk/ac/ox/cs/data/sample/DataSampling.java
new file mode 100644
index 0000000..1a788e3
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/sample/DataSampling.java
@@ -0,0 +1,320 @@
1package uk.ac.ox.cs.data.sample;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.FileNotFoundException;
6import java.io.FileOutputStream;
7import java.io.IOException;
8import java.util.*;
9import java.util.Map.Entry;
10
11import org.openrdf.model.Resource;
12import org.openrdf.model.Statement;
13import org.openrdf.model.URI;
14import org.openrdf.model.Value;
15import org.openrdf.model.impl.StatementImpl;
16import org.openrdf.model.impl.URIImpl;
17import org.openrdf.rio.RDFHandler;
18import org.openrdf.rio.RDFHandlerException;
19import org.openrdf.rio.RDFParseException;
20import org.openrdf.rio.RDFParser;
21import org.openrdf.rio.ntriples.NTriplesParser;
22import org.openrdf.rio.turtle.*;
23
24import uk.ac.ox.cs.pagoda.owl.OWLHelper;
25import uk.ac.ox.cs.pagoda.util.Namespace;
26import uk.ac.ox.cs.pagoda.util.Utility;
27
28public class DataSampling {
29
30 File[] m_list;
31 RDFGraph m_graph;
32 double m_percentage;
33 Set<String> excludeEntities = new HashSet<String>();
34
35 public DataSampling(String prefix, String fileName, String excludeFile, double percentage, boolean inTurtle) {
36 if (excludeFile != null) {
37 try {
38 Scanner scanner = new Scanner(new File(excludeFile));
39 while (scanner.hasNextLine())
40 excludeEntities.add(OWLHelper.removeAngles(scanner.nextLine().trim()));
41 scanner.close();
42 } catch (FileNotFoundException e1) {
43 // TODO Auto-generated catch block
44 e1.printStackTrace();
45 }
46 }
47 excludeEntities.add("http://www.w3.org/2002/07/owl#imports");
48
49 File file = new File(fileName);
50 if (file.isDirectory()) m_list = file.listFiles();
51 else m_list = new File[] {file};
52 m_percentage = percentage;
53
54 RDFParser parser = inTurtle ? new TurtleParser() : new NTriplesParser();
55
56 GraphRDFHandler handler = new GraphRDFHandler(excludeEntities);
57 parser.setRDFHandler(handler);
58
59 FileInputStream istream;
60 try {
61 for (File tFile: m_list) {
62 parser.parse(istream = new FileInputStream(tFile), prefix);
63 istream.close();
64 }
65 } catch (IOException e) {
66 e.printStackTrace();
67 } catch (RDFParseException e) {
68 e.printStackTrace();
69 } catch (RDFHandlerException e) {
70 e.printStackTrace();
71 }
72
73 m_graph = handler.getGraph();
74 }
75
76 public void sample(String outputFile, boolean multiStart) {
77 try {
78 FileOutputStream ostream = new FileOutputStream(outputFile);
79 TurtleWriter writer = new TurtleWriter(ostream);
80 writer.startRDF();
81
82 if (m_percentage < 100) {
83 Sampler sam = multiStart ?
84 new RandomWalkMulti(m_graph, writer) :
85 new RandomWalk(m_graph, writer);
86 sam.setLimit((int) (m_graph.numberOfStatement / 100 * m_percentage));
87 System.out.println("Statement limit: " + (m_graph.numberOfStatement / 100 * m_percentage));
88 sam.sample();
89 sam.dispose();
90 }
91 else {
92 m_graph.visit(writer);
93 }
94 writer.endRDF();
95 ostream.close();
96 } catch (IOException e) {
97 e.printStackTrace();
98 } catch (RDFHandlerException e) {
99 // TODO Auto-generated catch block
100 e.printStackTrace();
101 }
102 }
103
104 public static void main(String[] args) {
105 sampleReactome();
106// sampleChEMBL();
107 }
108
109 static void sampleReactome() {
110// double[] ps = {40, 70, 100};
111 double[] ps = {0.25, 0.5, 0.75};
112 for (double per: ps) {
113 DataSampling sampling = new DataSampling(
114 "http://www.biopax.org/release/biopax-level3.owl#",
115// "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data/data.ttl",
116 "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data/simplified.nt",
117// "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/reactome_sample_40.ttl",
118 "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/exclude",
119 per,
120 true);
121 sampling.sample("/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/sample_test_" + per + ".ttl", true);
122// sampling.sample("/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/simplifed_sample_test_" + per + ".ttl", true);
123// sampling.sample("output/sample_reactome_multi.ttl", true);
124 }
125 }
126
127 static void sampleChEMBL() {
128 DataSampling sampling = new DataSampling(
129 "http://rdf.ebi.ac.uk/terms/chembl",
130 "/home/yzhou/RDFdata/ChEMBL/facts/chembl_kbfile.nt",
131 null,
132 100,
133 false);
134
135 sampling.sample("output/sample_chembl_multi.ttl", true);
136 sampling.sample("output/sample_chembl.ttl", false);
137 }
138
139}
140
141class RDFGraph {
142
143 Map<Value, Integer> index = new HashMap<Value, Integer>();
144 Map<Integer, Value> inverseIndex = new HashMap<Integer, Value>();
145 MapToList<Integer> labels = new MapToList<Integer>();
146
147 MapToList<RDFEdge> edges = new MapToList<RDFEdge>();
148 Set<String> excludeEntities;
149
150 int numberOfIndividuals = 0, numberOfProperties = 0;
151
152 public RDFGraph(Set<String> exclude) {
153 excludeEntities = exclude;
154 for (String str: excludeEntities)
155 System.out.println(str);
156 System.out.println("---------------");
157 }
158
159 public void visit(TurtleWriter writer) throws RDFHandlerException {
160 Integer key;
161 for (Entry<Integer, LinkedList<Integer>> entry: labels.entrySet()) {
162 key = entry.getKey();
163 for (Integer type: entry.getValue())
164 writer.handleStatement(getStatement(key, type));
165 }
166
167 for (Entry<Integer, LinkedList<RDFEdge>> entry: edges.entrySet()) {
168 key = entry.getKey();
169 if ((inverseIndex.get(key) instanceof URI) &&
170 ((URI) inverseIndex.get(key)).toString().equals("http://www.reactome.org/biopax/46/879693#UnificationXref9"))
171 System.out.println("Here");
172
173 for (RDFEdge edge: entry.getValue())
174 writer.handleStatement(getStatement(key, edge.m_label, edge.m_dst));
175 }
176 }
177
178 private int getID(Value v, boolean isIndividual) {
179 if (v.toString().contains("imports"))
180 System.out.println(v.toString());
181 if (excludeEntities.contains(v.toString())) {
182 return 0;
183 }
184
185 Integer id = index.get(v);
186 if (id == null)
187 if (isIndividual) {
188 index.put(v, id = ++numberOfIndividuals);
189 inverseIndex.put(id, v);
190 }
191 else {
192 index.put(v, id = --numberOfProperties);
193 inverseIndex.put(id, v);
194 }
195 return id;
196 }
197
198 int numberOfStatement = 0;
199 int counter = 0;
200
201 public void addTriple(Resource s, URI p, Value o) {
202 ++numberOfStatement;
203 if (numberOfStatement % 1000000 == 0) {
204 Utility.logInfo("No.of statements: " + numberOfStatement, "\tNo.of individuals: " + numberOfIndividuals, "\tNo.of predicates: " + (-numberOfProperties));
205 }
206
207 if (p.equals(rdftype)) {
208 int type = getID(o, false), i = getID(s, true);
209 if (i == 0) {
210// System.out.println("<" + s + "> <" + p + "> <" + o + ">");
211 return ;
212 }
213 labels.add(i, type);
214 }
215 else {
216 int i = getID(s, true), j = getID(o, true), prop = getID(p, false) ;
217 if (i == 0 || j == 0 || prop == 0) {
218// System.out.println("<" + s + "> <" + p + "> <" + o + ">");
219 return ;
220 }
221 edges.add(i, new RDFEdge(prop, j));
222 }
223 }
224
225 URI rdftype = new URIImpl(Namespace.RDF_TYPE);
226
227 public Statement getStatement(int... args) {
228 if (args.length == 2)
229 return new StatementImpl((Resource) inverseIndex.get(args[0]), rdftype, (Value) inverseIndex.get(args[1]));
230 else if (args.length == 3)
231 return new StatementImpl((Resource) inverseIndex.get(args[0]), (URI) inverseIndex.get(args[1]), (Value) inverseIndex.get(args[2]));
232 return null;
233 }
234
235 public String getRawString(int id) {
236 return inverseIndex.get(id).toString();
237 }
238
239}
240
241class MapToList<T> {
242
243 private Map<Integer, LinkedList<T>> map = new HashMap<Integer, LinkedList<T>>();
244
245 public void add(int key, T value) {
246 LinkedList<T> list = map.get(key);
247 if (list == null)
248 map.put(key, list = new LinkedList<T>());
249 list.add(value);
250 }
251
252 public Set<Map.Entry<Integer, LinkedList<T>>> entrySet() {
253 return map.entrySet();
254 }
255
256 public void shuffle() {
257 for (List<T> list: map.values())
258 Collections.shuffle(list);
259 }
260
261 public LinkedList<T> get(int key) {
262 return map.get(key);
263 }
264
265}
266
267class RDFEdge {
268
269 int m_label, m_dst;
270
271 public RDFEdge(int label, int dst) {
272 m_label = label;
273 m_dst = dst;
274 }
275
276}
277
278class GraphRDFHandler implements RDFHandler {
279
280 RDFGraph m_graph;
281 Set<String> m_exclude;
282
283 public GraphRDFHandler(Set<String> excludeEntities) {
284 m_exclude = excludeEntities;
285 }
286
287 @Override
288 public void startRDF() throws RDFHandlerException {
289 m_graph = new RDFGraph(m_exclude);
290 }
291
292 public RDFGraph getGraph() {
293 return m_graph;
294 }
295
296 @Override
297 public void endRDF() throws RDFHandlerException {
298 // TODO Auto-generated method stub
299
300 }
301
302 @Override
303 public void handleNamespace(String prefix, String uri)
304 throws RDFHandlerException {
305 // TODO Auto-generated method stub
306
307 }
308
309 @Override
310 public void handleStatement(Statement st) throws RDFHandlerException {
311 m_graph.addTriple(st.getSubject(), st.getPredicate(), st.getObject());
312 }
313
314 @Override
315 public void handleComment(String comment) throws RDFHandlerException {
316 // TODO Auto-generated method stub
317
318 }
319
320} \ No newline at end of file
diff --git a/external/uk/ac/ox/cs/data/sample/RandomWalk.java b/external/uk/ac/ox/cs/data/sample/RandomWalk.java
new file mode 100644
index 0000000..d9f5107
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/sample/RandomWalk.java
@@ -0,0 +1,88 @@
1package uk.ac.ox.cs.data.sample;
2
3import java.util.HashSet;
4import java.util.Iterator;
5import java.util.List;
6import java.util.Random;
7import java.util.Set;
8import java.util.Stack;
9
10import org.openrdf.rio.RDFHandlerException;
11import org.openrdf.rio.turtle.TurtleWriter;
12
13import uk.ac.ox.cs.pagoda.util.Utility;
14
15public class RandomWalk extends Sampler {
16
17 public RandomWalk(RDFGraph graph, TurtleWriter writer) {
18 super(graph, writer);
19 }
20
21 protected Random rand = new Random();
22
23 protected int noOfStatements = 0, statementLimit = 0;
24 protected Set<Integer> visited = new HashSet<Integer>();
25
26 @Override
27 public void setLimit(int limit) {
28 statementLimit = limit;
29 }
30
31 @Override
32 public void sample() throws RDFHandlerException {
33 int u, v, pick, index;
34 RDFEdge edge;
35 List<RDFEdge> edges;
36 Stack<Integer> stack = new Stack<Integer>();
37 while (true) {
38 if (noOfStatements >= statementLimit) {
39 return ;
40 }
41 if (stack.isEmpty()) {
42 stack.add(v = rand.nextInt(m_graph.numberOfIndividuals));
43 Utility.logInfo("A new start: " + m_graph.getRawString(v));
44 visit(v);
45 }
46 u = stack.peek();
47 if (rand.nextInt(100) < 15) {
48 stack.pop();
49 continue;
50 }
51 if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) {
52 stack.clear();
53 continue;
54 }
55
56 index = 0;
57 pick = rand.nextInt(edges.size());
58 for (Iterator<RDFEdge> iter = edges.iterator(); iter.hasNext(); ++index) {
59 edge = iter.next();
60 if (index == pick) {
61 stack.add(v = edge.m_dst);
62 visit(v);
63 m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst));
64 ++noOfStatements;
65 iter.remove();
66 }
67
68 }
69 }
70 }
71
72 protected void visit(int node) throws RDFHandlerException {
73 if (visited.contains(node)) return ;
74 visited.add(node);
75 List<Integer> list = m_graph.labels.get(node);
76 if (list == null) return ;
77 for (Iterator<Integer> iter = list.iterator(); iter.hasNext(); )
78 m_writer.handleStatement(m_graph.getStatement(node, iter.next()));
79 noOfStatements += list.size();
80 }
81
82 @Override
83 public void dispose() {
84 visited.clear();
85 }
86
87
88}
diff --git a/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java b/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java
new file mode 100644
index 0000000..592f249
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java
@@ -0,0 +1,112 @@
1package uk.ac.ox.cs.data.sample;
2
3import java.util.HashSet;
4import java.util.Iterator;
5import java.util.LinkedList;
6import java.util.List;
7import java.util.Map;
8import java.util.Queue;
9import java.util.Set;
10import java.util.Stack;
11
12import org.openrdf.rio.RDFHandlerException;
13import org.openrdf.rio.turtle.TurtleWriter;
14
15import uk.ac.ox.cs.pagoda.util.Utility;
16
17
18public class RandomWalkMulti extends RandomWalk {
19
20 public RandomWalkMulti(RDFGraph graph, TurtleWriter writer) {
21 super(graph, writer);
22 }
23
24 Queue<Integer> queue = new LinkedList<Integer>();
25
26 @Override
27 public void sample() throws RDFHandlerException {
28 getStartNodes();
29
30 Utility.logInfo(queue.size());
31
32 int u, v, pick, index;
33 int individualLimit = statementLimit / queue.size(), currentLimit = 0;
34 RDFEdge edge;
35 List<RDFEdge> edges;
36 Stack<Integer> stack = new Stack<Integer>();
37 while (true) {
38 if (noOfStatements >= statementLimit) {
39 System.out.println("The number of statements in the sampling: " + noOfStatements);
40 return ;
41 }
42 if (noOfStatements >= currentLimit) {
43 stack.clear();
44 }
45
46 if (stack.isEmpty()) {
47 if (queue.isEmpty())
48 v = rand.nextInt(m_graph.numberOfIndividuals);
49 else {
50 v = queue.poll();
51 currentLimit += individualLimit;
52 }
53 stack.add(v);
54// Utility.logInfo(noOfStart + " new start: " + m_graph.getRawString(v));
55 visit(v);
56 }
57 u = stack.peek();
58 if (rand.nextInt(100) < 15) {
59 stack.pop();
60 continue;
61 }
62 if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) {
63 stack.clear();
64 continue;
65 }
66
67 index = 0;
68 pick = rand.nextInt(edges.size());
69 for (Iterator<RDFEdge> iter = edges.iterator(); iter.hasNext(); ++index) {
70 edge = iter.next();
71 if (index == pick) {
72 stack.add(v = edge.m_dst);
73 visit(v);
74 m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst));
75 ++noOfStatements;
76 iter.remove();
77 }
78
79 }
80 }
81 }
82
83 private void getStartNodes() throws RDFHandlerException {
84 Set<Integer> coveredConcepts = new HashSet<Integer>();
85 Integer concept;
86
87 Iterator<Integer> iter;
88 for (Map.Entry<Integer, LinkedList<Integer>> entry: m_graph.labels.entrySet()) {
89 iter = entry.getValue().iterator();
90 concept = null;
91
92 while (iter.hasNext()) {
93 if (!(coveredConcepts.contains(concept = iter.next()))) {
94 break;
95 }
96 else concept = null;
97
98 }
99
100 if (concept == null) continue;
101 else {
102 queue.add(entry.getKey());
103 coveredConcepts.add(concept);
104 while (iter.hasNext())
105 coveredConcepts.add(iter.next());
106 }
107 }
108
109 }
110
111
112}
diff --git a/external/uk/ac/ox/cs/data/sample/Sampler.java b/external/uk/ac/ox/cs/data/sample/Sampler.java
new file mode 100644
index 0000000..205b29b
--- /dev/null
+++ b/external/uk/ac/ox/cs/data/sample/Sampler.java
@@ -0,0 +1,23 @@
1package uk.ac.ox.cs.data.sample;
2
3import org.openrdf.rio.RDFHandlerException;
4import org.openrdf.rio.turtle.TurtleWriter;
5
6public abstract class Sampler {
7
8 protected RDFGraph m_graph;
9 protected TurtleWriter m_writer;
10
11 public Sampler(RDFGraph graph, TurtleWriter writer) {
12 m_graph = graph;
13 m_writer = writer;
14 }
15
16 public abstract void setLimit(int limit);
17
18 public abstract void sample() throws RDFHandlerException;
19
20 public abstract void dispose();
21
22
23}