diff options
| author | yzhou <yzhou@krr-linux.cs.ox.ac.uk> | 2015-04-30 17:36:35 +0100 |
|---|---|---|
| committer | yzhou <yzhou@krr-linux.cs.ox.ac.uk> | 2015-04-30 17:36:35 +0100 |
| commit | 0d8f240c9c0a64f2285324e5a517161e45c698fc (patch) | |
| tree | f4b4f7078e3be02011b9812cd8791c657a135993 /external/uk/ac/ox/cs/data/sample | |
| parent | 68ae342b2a4923bc7b3f378c6a489f2355d85279 (diff) | |
| download | ACQuA-0d8f240c9c0a64f2285324e5a517161e45c698fc.tar.gz ACQuA-0d8f240c9c0a64f2285324e5a517161e45c698fc.zip | |
downgrade owl api and reorganised src files
Diffstat (limited to 'external/uk/ac/ox/cs/data/sample')
| -rw-r--r-- | external/uk/ac/ox/cs/data/sample/DataSampling.java | 320 | ||||
| -rw-r--r-- | external/uk/ac/ox/cs/data/sample/RandomWalk.java | 88 | ||||
| -rw-r--r-- | external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java | 112 | ||||
| -rw-r--r-- | external/uk/ac/ox/cs/data/sample/Sampler.java | 23 |
4 files changed, 543 insertions, 0 deletions
diff --git a/external/uk/ac/ox/cs/data/sample/DataSampling.java b/external/uk/ac/ox/cs/data/sample/DataSampling.java new file mode 100644 index 0000000..1a788e3 --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/DataSampling.java | |||
| @@ -0,0 +1,320 @@ | |||
| 1 | package uk.ac.ox.cs.data.sample; | ||
| 2 | |||
| 3 | import java.io.File; | ||
| 4 | import java.io.FileInputStream; | ||
| 5 | import java.io.FileNotFoundException; | ||
| 6 | import java.io.FileOutputStream; | ||
| 7 | import java.io.IOException; | ||
| 8 | import java.util.*; | ||
| 9 | import java.util.Map.Entry; | ||
| 10 | |||
| 11 | import org.openrdf.model.Resource; | ||
| 12 | import org.openrdf.model.Statement; | ||
| 13 | import org.openrdf.model.URI; | ||
| 14 | import org.openrdf.model.Value; | ||
| 15 | import org.openrdf.model.impl.StatementImpl; | ||
| 16 | import org.openrdf.model.impl.URIImpl; | ||
| 17 | import org.openrdf.rio.RDFHandler; | ||
| 18 | import org.openrdf.rio.RDFHandlerException; | ||
| 19 | import org.openrdf.rio.RDFParseException; | ||
| 20 | import org.openrdf.rio.RDFParser; | ||
| 21 | import org.openrdf.rio.ntriples.NTriplesParser; | ||
| 22 | import org.openrdf.rio.turtle.*; | ||
| 23 | |||
| 24 | import uk.ac.ox.cs.pagoda.owl.OWLHelper; | ||
| 25 | import uk.ac.ox.cs.pagoda.util.Namespace; | ||
| 26 | import uk.ac.ox.cs.pagoda.util.Utility; | ||
| 27 | |||
| 28 | public class DataSampling { | ||
| 29 | |||
| 30 | File[] m_list; | ||
| 31 | RDFGraph m_graph; | ||
| 32 | double m_percentage; | ||
| 33 | Set<String> excludeEntities = new HashSet<String>(); | ||
| 34 | |||
| 35 | public DataSampling(String prefix, String fileName, String excludeFile, double percentage, boolean inTurtle) { | ||
| 36 | if (excludeFile != null) { | ||
| 37 | try { | ||
| 38 | Scanner scanner = new Scanner(new File(excludeFile)); | ||
| 39 | while (scanner.hasNextLine()) | ||
| 40 | excludeEntities.add(OWLHelper.removeAngles(scanner.nextLine().trim())); | ||
| 41 | scanner.close(); | ||
| 42 | } catch (FileNotFoundException e1) { | ||
| 43 | // TODO Auto-generated catch block | ||
| 44 | e1.printStackTrace(); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | excludeEntities.add("http://www.w3.org/2002/07/owl#imports"); | ||
| 48 | |||
| 49 | File file = new File(fileName); | ||
| 50 | if (file.isDirectory()) m_list = file.listFiles(); | ||
| 51 | else m_list = new File[] {file}; | ||
| 52 | m_percentage = percentage; | ||
| 53 | |||
| 54 | RDFParser parser = inTurtle ? new TurtleParser() : new NTriplesParser(); | ||
| 55 | |||
| 56 | GraphRDFHandler handler = new GraphRDFHandler(excludeEntities); | ||
| 57 | parser.setRDFHandler(handler); | ||
| 58 | |||
| 59 | FileInputStream istream; | ||
| 60 | try { | ||
| 61 | for (File tFile: m_list) { | ||
| 62 | parser.parse(istream = new FileInputStream(tFile), prefix); | ||
| 63 | istream.close(); | ||
| 64 | } | ||
| 65 | } catch (IOException e) { | ||
| 66 | e.printStackTrace(); | ||
| 67 | } catch (RDFParseException e) { | ||
| 68 | e.printStackTrace(); | ||
| 69 | } catch (RDFHandlerException e) { | ||
| 70 | e.printStackTrace(); | ||
| 71 | } | ||
| 72 | |||
| 73 | m_graph = handler.getGraph(); | ||
| 74 | } | ||
| 75 | |||
| 76 | public void sample(String outputFile, boolean multiStart) { | ||
| 77 | try { | ||
| 78 | FileOutputStream ostream = new FileOutputStream(outputFile); | ||
| 79 | TurtleWriter writer = new TurtleWriter(ostream); | ||
| 80 | writer.startRDF(); | ||
| 81 | |||
| 82 | if (m_percentage < 100) { | ||
| 83 | Sampler sam = multiStart ? | ||
| 84 | new RandomWalkMulti(m_graph, writer) : | ||
| 85 | new RandomWalk(m_graph, writer); | ||
| 86 | sam.setLimit((int) (m_graph.numberOfStatement / 100 * m_percentage)); | ||
| 87 | System.out.println("Statement limit: " + (m_graph.numberOfStatement / 100 * m_percentage)); | ||
| 88 | sam.sample(); | ||
| 89 | sam.dispose(); | ||
| 90 | } | ||
| 91 | else { | ||
| 92 | m_graph.visit(writer); | ||
| 93 | } | ||
| 94 | writer.endRDF(); | ||
| 95 | ostream.close(); | ||
| 96 | } catch (IOException e) { | ||
| 97 | e.printStackTrace(); | ||
| 98 | } catch (RDFHandlerException e) { | ||
| 99 | // TODO Auto-generated catch block | ||
| 100 | e.printStackTrace(); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | public static void main(String[] args) { | ||
| 105 | sampleReactome(); | ||
| 106 | // sampleChEMBL(); | ||
| 107 | } | ||
| 108 | |||
| 109 | static void sampleReactome() { | ||
| 110 | // double[] ps = {40, 70, 100}; | ||
| 111 | double[] ps = {0.25, 0.5, 0.75}; | ||
| 112 | for (double per: ps) { | ||
| 113 | DataSampling sampling = new DataSampling( | ||
| 114 | "http://www.biopax.org/release/biopax-level3.owl#", | ||
| 115 | // "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data/data.ttl", | ||
| 116 | "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data/simplified.nt", | ||
| 117 | // "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/reactome_sample_40.ttl", | ||
| 118 | "/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/exclude", | ||
| 119 | per, | ||
| 120 | true); | ||
| 121 | sampling.sample("/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/sample_test_" + per + ".ttl", true); | ||
| 122 | // sampling.sample("/home/yzhou/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/graph sampling/simplifed_sample_test_" + per + ".ttl", true); | ||
| 123 | // sampling.sample("output/sample_reactome_multi.ttl", true); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | static void sampleChEMBL() { | ||
| 128 | DataSampling sampling = new DataSampling( | ||
| 129 | "http://rdf.ebi.ac.uk/terms/chembl", | ||
| 130 | "/home/yzhou/RDFdata/ChEMBL/facts/chembl_kbfile.nt", | ||
| 131 | null, | ||
| 132 | 100, | ||
| 133 | false); | ||
| 134 | |||
| 135 | sampling.sample("output/sample_chembl_multi.ttl", true); | ||
| 136 | sampling.sample("output/sample_chembl.ttl", false); | ||
| 137 | } | ||
| 138 | |||
| 139 | } | ||
| 140 | |||
| 141 | class RDFGraph { | ||
| 142 | |||
| 143 | Map<Value, Integer> index = new HashMap<Value, Integer>(); | ||
| 144 | Map<Integer, Value> inverseIndex = new HashMap<Integer, Value>(); | ||
| 145 | MapToList<Integer> labels = new MapToList<Integer>(); | ||
| 146 | |||
| 147 | MapToList<RDFEdge> edges = new MapToList<RDFEdge>(); | ||
| 148 | Set<String> excludeEntities; | ||
| 149 | |||
| 150 | int numberOfIndividuals = 0, numberOfProperties = 0; | ||
| 151 | |||
| 152 | public RDFGraph(Set<String> exclude) { | ||
| 153 | excludeEntities = exclude; | ||
| 154 | for (String str: excludeEntities) | ||
| 155 | System.out.println(str); | ||
| 156 | System.out.println("---------------"); | ||
| 157 | } | ||
| 158 | |||
| 159 | public void visit(TurtleWriter writer) throws RDFHandlerException { | ||
| 160 | Integer key; | ||
| 161 | for (Entry<Integer, LinkedList<Integer>> entry: labels.entrySet()) { | ||
| 162 | key = entry.getKey(); | ||
| 163 | for (Integer type: entry.getValue()) | ||
| 164 | writer.handleStatement(getStatement(key, type)); | ||
| 165 | } | ||
| 166 | |||
| 167 | for (Entry<Integer, LinkedList<RDFEdge>> entry: edges.entrySet()) { | ||
| 168 | key = entry.getKey(); | ||
| 169 | if ((inverseIndex.get(key) instanceof URI) && | ||
| 170 | ((URI) inverseIndex.get(key)).toString().equals("http://www.reactome.org/biopax/46/879693#UnificationXref9")) | ||
| 171 | System.out.println("Here"); | ||
| 172 | |||
| 173 | for (RDFEdge edge: entry.getValue()) | ||
| 174 | writer.handleStatement(getStatement(key, edge.m_label, edge.m_dst)); | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | private int getID(Value v, boolean isIndividual) { | ||
| 179 | if (v.toString().contains("imports")) | ||
| 180 | System.out.println(v.toString()); | ||
| 181 | if (excludeEntities.contains(v.toString())) { | ||
| 182 | return 0; | ||
| 183 | } | ||
| 184 | |||
| 185 | Integer id = index.get(v); | ||
| 186 | if (id == null) | ||
| 187 | if (isIndividual) { | ||
| 188 | index.put(v, id = ++numberOfIndividuals); | ||
| 189 | inverseIndex.put(id, v); | ||
| 190 | } | ||
| 191 | else { | ||
| 192 | index.put(v, id = --numberOfProperties); | ||
| 193 | inverseIndex.put(id, v); | ||
| 194 | } | ||
| 195 | return id; | ||
| 196 | } | ||
| 197 | |||
| 198 | int numberOfStatement = 0; | ||
| 199 | int counter = 0; | ||
| 200 | |||
| 201 | public void addTriple(Resource s, URI p, Value o) { | ||
| 202 | ++numberOfStatement; | ||
| 203 | if (numberOfStatement % 1000000 == 0) { | ||
| 204 | Utility.logInfo("No.of statements: " + numberOfStatement, "\tNo.of individuals: " + numberOfIndividuals, "\tNo.of predicates: " + (-numberOfProperties)); | ||
| 205 | } | ||
| 206 | |||
| 207 | if (p.equals(rdftype)) { | ||
| 208 | int type = getID(o, false), i = getID(s, true); | ||
| 209 | if (i == 0) { | ||
| 210 | // System.out.println("<" + s + "> <" + p + "> <" + o + ">"); | ||
| 211 | return ; | ||
| 212 | } | ||
| 213 | labels.add(i, type); | ||
| 214 | } | ||
| 215 | else { | ||
| 216 | int i = getID(s, true), j = getID(o, true), prop = getID(p, false) ; | ||
| 217 | if (i == 0 || j == 0 || prop == 0) { | ||
| 218 | // System.out.println("<" + s + "> <" + p + "> <" + o + ">"); | ||
| 219 | return ; | ||
| 220 | } | ||
| 221 | edges.add(i, new RDFEdge(prop, j)); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | URI rdftype = new URIImpl(Namespace.RDF_TYPE); | ||
| 226 | |||
| 227 | public Statement getStatement(int... args) { | ||
| 228 | if (args.length == 2) | ||
| 229 | return new StatementImpl((Resource) inverseIndex.get(args[0]), rdftype, (Value) inverseIndex.get(args[1])); | ||
| 230 | else if (args.length == 3) | ||
| 231 | return new StatementImpl((Resource) inverseIndex.get(args[0]), (URI) inverseIndex.get(args[1]), (Value) inverseIndex.get(args[2])); | ||
| 232 | return null; | ||
| 233 | } | ||
| 234 | |||
| 235 | public String getRawString(int id) { | ||
| 236 | return inverseIndex.get(id).toString(); | ||
| 237 | } | ||
| 238 | |||
| 239 | } | ||
| 240 | |||
| 241 | class MapToList<T> { | ||
| 242 | |||
| 243 | private Map<Integer, LinkedList<T>> map = new HashMap<Integer, LinkedList<T>>(); | ||
| 244 | |||
| 245 | public void add(int key, T value) { | ||
| 246 | LinkedList<T> list = map.get(key); | ||
| 247 | if (list == null) | ||
| 248 | map.put(key, list = new LinkedList<T>()); | ||
| 249 | list.add(value); | ||
| 250 | } | ||
| 251 | |||
| 252 | public Set<Map.Entry<Integer, LinkedList<T>>> entrySet() { | ||
| 253 | return map.entrySet(); | ||
| 254 | } | ||
| 255 | |||
| 256 | public void shuffle() { | ||
| 257 | for (List<T> list: map.values()) | ||
| 258 | Collections.shuffle(list); | ||
| 259 | } | ||
| 260 | |||
| 261 | public LinkedList<T> get(int key) { | ||
| 262 | return map.get(key); | ||
| 263 | } | ||
| 264 | |||
| 265 | } | ||
| 266 | |||
| 267 | class RDFEdge { | ||
| 268 | |||
| 269 | int m_label, m_dst; | ||
| 270 | |||
| 271 | public RDFEdge(int label, int dst) { | ||
| 272 | m_label = label; | ||
| 273 | m_dst = dst; | ||
| 274 | } | ||
| 275 | |||
| 276 | } | ||
| 277 | |||
| 278 | class GraphRDFHandler implements RDFHandler { | ||
| 279 | |||
| 280 | RDFGraph m_graph; | ||
| 281 | Set<String> m_exclude; | ||
| 282 | |||
| 283 | public GraphRDFHandler(Set<String> excludeEntities) { | ||
| 284 | m_exclude = excludeEntities; | ||
| 285 | } | ||
| 286 | |||
| 287 | @Override | ||
| 288 | public void startRDF() throws RDFHandlerException { | ||
| 289 | m_graph = new RDFGraph(m_exclude); | ||
| 290 | } | ||
| 291 | |||
| 292 | public RDFGraph getGraph() { | ||
| 293 | return m_graph; | ||
| 294 | } | ||
| 295 | |||
| 296 | @Override | ||
| 297 | public void endRDF() throws RDFHandlerException { | ||
| 298 | // TODO Auto-generated method stub | ||
| 299 | |||
| 300 | } | ||
| 301 | |||
| 302 | @Override | ||
| 303 | public void handleNamespace(String prefix, String uri) | ||
| 304 | throws RDFHandlerException { | ||
| 305 | // TODO Auto-generated method stub | ||
| 306 | |||
| 307 | } | ||
| 308 | |||
| 309 | @Override | ||
| 310 | public void handleStatement(Statement st) throws RDFHandlerException { | ||
| 311 | m_graph.addTriple(st.getSubject(), st.getPredicate(), st.getObject()); | ||
| 312 | } | ||
| 313 | |||
| 314 | @Override | ||
| 315 | public void handleComment(String comment) throws RDFHandlerException { | ||
| 316 | // TODO Auto-generated method stub | ||
| 317 | |||
| 318 | } | ||
| 319 | |||
| 320 | } \ No newline at end of file | ||
diff --git a/external/uk/ac/ox/cs/data/sample/RandomWalk.java b/external/uk/ac/ox/cs/data/sample/RandomWalk.java new file mode 100644 index 0000000..d9f5107 --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/RandomWalk.java | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | package uk.ac.ox.cs.data.sample; | ||
| 2 | |||
| 3 | import java.util.HashSet; | ||
| 4 | import java.util.Iterator; | ||
| 5 | import java.util.List; | ||
| 6 | import java.util.Random; | ||
| 7 | import java.util.Set; | ||
| 8 | import java.util.Stack; | ||
| 9 | |||
| 10 | import org.openrdf.rio.RDFHandlerException; | ||
| 11 | import org.openrdf.rio.turtle.TurtleWriter; | ||
| 12 | |||
| 13 | import uk.ac.ox.cs.pagoda.util.Utility; | ||
| 14 | |||
| 15 | public class RandomWalk extends Sampler { | ||
| 16 | |||
| 17 | public RandomWalk(RDFGraph graph, TurtleWriter writer) { | ||
| 18 | super(graph, writer); | ||
| 19 | } | ||
| 20 | |||
| 21 | protected Random rand = new Random(); | ||
| 22 | |||
| 23 | protected int noOfStatements = 0, statementLimit = 0; | ||
| 24 | protected Set<Integer> visited = new HashSet<Integer>(); | ||
| 25 | |||
| 26 | @Override | ||
| 27 | public void setLimit(int limit) { | ||
| 28 | statementLimit = limit; | ||
| 29 | } | ||
| 30 | |||
| 31 | @Override | ||
| 32 | public void sample() throws RDFHandlerException { | ||
| 33 | int u, v, pick, index; | ||
| 34 | RDFEdge edge; | ||
| 35 | List<RDFEdge> edges; | ||
| 36 | Stack<Integer> stack = new Stack<Integer>(); | ||
| 37 | while (true) { | ||
| 38 | if (noOfStatements >= statementLimit) { | ||
| 39 | return ; | ||
| 40 | } | ||
| 41 | if (stack.isEmpty()) { | ||
| 42 | stack.add(v = rand.nextInt(m_graph.numberOfIndividuals)); | ||
| 43 | Utility.logInfo("A new start: " + m_graph.getRawString(v)); | ||
| 44 | visit(v); | ||
| 45 | } | ||
| 46 | u = stack.peek(); | ||
| 47 | if (rand.nextInt(100) < 15) { | ||
| 48 | stack.pop(); | ||
| 49 | continue; | ||
| 50 | } | ||
| 51 | if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) { | ||
| 52 | stack.clear(); | ||
| 53 | continue; | ||
| 54 | } | ||
| 55 | |||
| 56 | index = 0; | ||
| 57 | pick = rand.nextInt(edges.size()); | ||
| 58 | for (Iterator<RDFEdge> iter = edges.iterator(); iter.hasNext(); ++index) { | ||
| 59 | edge = iter.next(); | ||
| 60 | if (index == pick) { | ||
| 61 | stack.add(v = edge.m_dst); | ||
| 62 | visit(v); | ||
| 63 | m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst)); | ||
| 64 | ++noOfStatements; | ||
| 65 | iter.remove(); | ||
| 66 | } | ||
| 67 | |||
| 68 | } | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | protected void visit(int node) throws RDFHandlerException { | ||
| 73 | if (visited.contains(node)) return ; | ||
| 74 | visited.add(node); | ||
| 75 | List<Integer> list = m_graph.labels.get(node); | ||
| 76 | if (list == null) return ; | ||
| 77 | for (Iterator<Integer> iter = list.iterator(); iter.hasNext(); ) | ||
| 78 | m_writer.handleStatement(m_graph.getStatement(node, iter.next())); | ||
| 79 | noOfStatements += list.size(); | ||
| 80 | } | ||
| 81 | |||
| 82 | @Override | ||
| 83 | public void dispose() { | ||
| 84 | visited.clear(); | ||
| 85 | } | ||
| 86 | |||
| 87 | |||
| 88 | } | ||
diff --git a/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java b/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java new file mode 100644 index 0000000..592f249 --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | package uk.ac.ox.cs.data.sample; | ||
| 2 | |||
| 3 | import java.util.HashSet; | ||
| 4 | import java.util.Iterator; | ||
| 5 | import java.util.LinkedList; | ||
| 6 | import java.util.List; | ||
| 7 | import java.util.Map; | ||
| 8 | import java.util.Queue; | ||
| 9 | import java.util.Set; | ||
| 10 | import java.util.Stack; | ||
| 11 | |||
| 12 | import org.openrdf.rio.RDFHandlerException; | ||
| 13 | import org.openrdf.rio.turtle.TurtleWriter; | ||
| 14 | |||
| 15 | import uk.ac.ox.cs.pagoda.util.Utility; | ||
| 16 | |||
| 17 | |||
| 18 | public class RandomWalkMulti extends RandomWalk { | ||
| 19 | |||
| 20 | public RandomWalkMulti(RDFGraph graph, TurtleWriter writer) { | ||
| 21 | super(graph, writer); | ||
| 22 | } | ||
| 23 | |||
| 24 | Queue<Integer> queue = new LinkedList<Integer>(); | ||
| 25 | |||
| 26 | @Override | ||
| 27 | public void sample() throws RDFHandlerException { | ||
| 28 | getStartNodes(); | ||
| 29 | |||
| 30 | Utility.logInfo(queue.size()); | ||
| 31 | |||
| 32 | int u, v, pick, index; | ||
| 33 | int individualLimit = statementLimit / queue.size(), currentLimit = 0; | ||
| 34 | RDFEdge edge; | ||
| 35 | List<RDFEdge> edges; | ||
| 36 | Stack<Integer> stack = new Stack<Integer>(); | ||
| 37 | while (true) { | ||
| 38 | if (noOfStatements >= statementLimit) { | ||
| 39 | System.out.println("The number of statements in the sampling: " + noOfStatements); | ||
| 40 | return ; | ||
| 41 | } | ||
| 42 | if (noOfStatements >= currentLimit) { | ||
| 43 | stack.clear(); | ||
| 44 | } | ||
| 45 | |||
| 46 | if (stack.isEmpty()) { | ||
| 47 | if (queue.isEmpty()) | ||
| 48 | v = rand.nextInt(m_graph.numberOfIndividuals); | ||
| 49 | else { | ||
| 50 | v = queue.poll(); | ||
| 51 | currentLimit += individualLimit; | ||
| 52 | } | ||
| 53 | stack.add(v); | ||
| 54 | // Utility.logInfo(noOfStart + " new start: " + m_graph.getRawString(v)); | ||
| 55 | visit(v); | ||
| 56 | } | ||
| 57 | u = stack.peek(); | ||
| 58 | if (rand.nextInt(100) < 15) { | ||
| 59 | stack.pop(); | ||
| 60 | continue; | ||
| 61 | } | ||
| 62 | if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) { | ||
| 63 | stack.clear(); | ||
| 64 | continue; | ||
| 65 | } | ||
| 66 | |||
| 67 | index = 0; | ||
| 68 | pick = rand.nextInt(edges.size()); | ||
| 69 | for (Iterator<RDFEdge> iter = edges.iterator(); iter.hasNext(); ++index) { | ||
| 70 | edge = iter.next(); | ||
| 71 | if (index == pick) { | ||
| 72 | stack.add(v = edge.m_dst); | ||
| 73 | visit(v); | ||
| 74 | m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst)); | ||
| 75 | ++noOfStatements; | ||
| 76 | iter.remove(); | ||
| 77 | } | ||
| 78 | |||
| 79 | } | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | private void getStartNodes() throws RDFHandlerException { | ||
| 84 | Set<Integer> coveredConcepts = new HashSet<Integer>(); | ||
| 85 | Integer concept; | ||
| 86 | |||
| 87 | Iterator<Integer> iter; | ||
| 88 | for (Map.Entry<Integer, LinkedList<Integer>> entry: m_graph.labels.entrySet()) { | ||
| 89 | iter = entry.getValue().iterator(); | ||
| 90 | concept = null; | ||
| 91 | |||
| 92 | while (iter.hasNext()) { | ||
| 93 | if (!(coveredConcepts.contains(concept = iter.next()))) { | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | else concept = null; | ||
| 97 | |||
| 98 | } | ||
| 99 | |||
| 100 | if (concept == null) continue; | ||
| 101 | else { | ||
| 102 | queue.add(entry.getKey()); | ||
| 103 | coveredConcepts.add(concept); | ||
| 104 | while (iter.hasNext()) | ||
| 105 | coveredConcepts.add(iter.next()); | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | } | ||
| 110 | |||
| 111 | |||
| 112 | } | ||
diff --git a/external/uk/ac/ox/cs/data/sample/Sampler.java b/external/uk/ac/ox/cs/data/sample/Sampler.java new file mode 100644 index 0000000..205b29b --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/Sampler.java | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | package uk.ac.ox.cs.data.sample; | ||
| 2 | |||
| 3 | import org.openrdf.rio.RDFHandlerException; | ||
| 4 | import org.openrdf.rio.turtle.TurtleWriter; | ||
| 5 | |||
| 6 | public abstract class Sampler { | ||
| 7 | |||
| 8 | protected RDFGraph m_graph; | ||
| 9 | protected TurtleWriter m_writer; | ||
| 10 | |||
| 11 | public Sampler(RDFGraph graph, TurtleWriter writer) { | ||
| 12 | m_graph = graph; | ||
| 13 | m_writer = writer; | ||
| 14 | } | ||
| 15 | |||
| 16 | public abstract void setLimit(int limit); | ||
| 17 | |||
| 18 | public abstract void sample() throws RDFHandlerException; | ||
| 19 | |||
| 20 | public abstract void dispose(); | ||
| 21 | |||
| 22 | |||
| 23 | } | ||
