From 9ce65c5a963b03ee97fe9cb6c5aa65a3c04a80a8 Mon Sep 17 00:00:00 2001 From: yzhou Date: Tue, 21 Apr 2015 10:34:27 +0100 Subject: initial version --- test/uk/ac/ox/cs/data/sample/RandomWalk.java | 88 ++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 test/uk/ac/ox/cs/data/sample/RandomWalk.java (limited to 'test/uk/ac/ox/cs/data/sample/RandomWalk.java') diff --git a/test/uk/ac/ox/cs/data/sample/RandomWalk.java b/test/uk/ac/ox/cs/data/sample/RandomWalk.java new file mode 100644 index 0000000..d9f5107 --- /dev/null +++ b/test/uk/ac/ox/cs/data/sample/RandomWalk.java @@ -0,0 +1,88 @@ +package uk.ac.ox.cs.data.sample; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.Stack; + +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.turtle.TurtleWriter; + +import uk.ac.ox.cs.pagoda.util.Utility; + +public class RandomWalk extends Sampler { + + public RandomWalk(RDFGraph graph, TurtleWriter writer) { + super(graph, writer); + } + + protected Random rand = new Random(); + + protected int noOfStatements = 0, statementLimit = 0; + protected Set visited = new HashSet(); + + @Override + public void setLimit(int limit) { + statementLimit = limit; + } + + @Override + public void sample() throws RDFHandlerException { + int u, v, pick, index; + RDFEdge edge; + List edges; + Stack stack = new Stack(); + while (true) { + if (noOfStatements >= statementLimit) { + return ; + } + if (stack.isEmpty()) { + stack.add(v = rand.nextInt(m_graph.numberOfIndividuals)); + Utility.logInfo("A new start: " + m_graph.getRawString(v)); + visit(v); + } + u = stack.peek(); + if (rand.nextInt(100) < 15) { + stack.pop(); + continue; + } + if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) { + stack.clear(); + continue; + } + + index = 0; + pick = rand.nextInt(edges.size()); + for (Iterator iter = edges.iterator(); iter.hasNext(); ++index) { + edge = iter.next(); + if (index == pick) { + stack.add(v = edge.m_dst); + visit(v); + m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst)); + ++noOfStatements; + iter.remove(); + } + + } + } + } + + protected void visit(int node) throws RDFHandlerException { + if (visited.contains(node)) return ; + visited.add(node); + List list = m_graph.labels.get(node); + if (list == null) return ; + for (Iterator iter = list.iterator(); iter.hasNext(); ) + m_writer.handleStatement(m_graph.getStatement(node, iter.next())); + noOfStatements += list.size(); + } + + @Override + public void dispose() { + visited.clear(); + } + + +} -- cgit v1.2.3