From 0d8f240c9c0a64f2285324e5a517161e45c698fc Mon Sep 17 00:00:00 2001 From: yzhou Date: Thu, 30 Apr 2015 17:36:35 +0100 Subject: downgrade owl api and reorganised src files --- .../uk/ac/ox/cs/data/sample/RandomWalkMulti.java | 112 +++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java (limited to 'external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java') diff --git a/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java b/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java new file mode 100644 index 0000000..592f249 --- /dev/null +++ b/external/uk/ac/ox/cs/data/sample/RandomWalkMulti.java @@ -0,0 +1,112 @@ +package uk.ac.ox.cs.data.sample; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; +import java.util.Stack; + +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.turtle.TurtleWriter; + +import uk.ac.ox.cs.pagoda.util.Utility; + + +public class RandomWalkMulti extends RandomWalk { + + public RandomWalkMulti(RDFGraph graph, TurtleWriter writer) { + super(graph, writer); + } + + Queue queue = new LinkedList(); + + @Override + public void sample() throws RDFHandlerException { + getStartNodes(); + + Utility.logInfo(queue.size()); + + int u, v, pick, index; + int individualLimit = statementLimit / queue.size(), currentLimit = 0; + RDFEdge edge; + List edges; + Stack stack = new Stack(); + while (true) { + if (noOfStatements >= statementLimit) { + System.out.println("The number of statements in the sampling: " + noOfStatements); + return ; + } + if (noOfStatements >= currentLimit) { + stack.clear(); + } + + if (stack.isEmpty()) { + if (queue.isEmpty()) + v = rand.nextInt(m_graph.numberOfIndividuals); + else { + v = queue.poll(); + currentLimit += individualLimit; + } + stack.add(v); +// Utility.logInfo(noOfStart + " new start: " + m_graph.getRawString(v)); + visit(v); + } + u = stack.peek(); + if (rand.nextInt(100) < 15) { + stack.pop(); + continue; + } + if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) { + stack.clear(); + continue; + } + + index = 0; + pick = rand.nextInt(edges.size()); + for (Iterator iter = edges.iterator(); iter.hasNext(); ++index) { + edge = iter.next(); + if (index == pick) { + stack.add(v = edge.m_dst); + visit(v); + m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst)); + ++noOfStatements; + iter.remove(); + } + + } + } + } + + private void getStartNodes() throws RDFHandlerException { + Set coveredConcepts = new HashSet(); + Integer concept; + + Iterator iter; + for (Map.Entry> entry: m_graph.labels.entrySet()) { + iter = entry.getValue().iterator(); + concept = null; + + while (iter.hasNext()) { + if (!(coveredConcepts.contains(concept = iter.next()))) { + break; + } + else concept = null; + + } + + if (concept == null) continue; + else { + queue.add(entry.getKey()); + coveredConcepts.add(concept); + while (iter.hasNext()) + coveredConcepts.add(iter.next()); + } + } + + } + + +} -- cgit v1.2.3