diff options
Diffstat (limited to 'test/uk/ac/ox/cs/data/sample/RandomWalkMulti.java')
| -rw-r--r-- | test/uk/ac/ox/cs/data/sample/RandomWalkMulti.java | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/test/uk/ac/ox/cs/data/sample/RandomWalkMulti.java b/test/uk/ac/ox/cs/data/sample/RandomWalkMulti.java new file mode 100644 index 0000000..592f249 --- /dev/null +++ b/test/uk/ac/ox/cs/data/sample/RandomWalkMulti.java | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | package uk.ac.ox.cs.data.sample; | ||
| 2 | |||
| 3 | import java.util.HashSet; | ||
| 4 | import java.util.Iterator; | ||
| 5 | import java.util.LinkedList; | ||
| 6 | import java.util.List; | ||
| 7 | import java.util.Map; | ||
| 8 | import java.util.Queue; | ||
| 9 | import java.util.Set; | ||
| 10 | import java.util.Stack; | ||
| 11 | |||
| 12 | import org.openrdf.rio.RDFHandlerException; | ||
| 13 | import org.openrdf.rio.turtle.TurtleWriter; | ||
| 14 | |||
| 15 | import uk.ac.ox.cs.pagoda.util.Utility; | ||
| 16 | |||
| 17 | |||
| 18 | public class RandomWalkMulti extends RandomWalk { | ||
| 19 | |||
| 20 | public RandomWalkMulti(RDFGraph graph, TurtleWriter writer) { | ||
| 21 | super(graph, writer); | ||
| 22 | } | ||
| 23 | |||
| 24 | Queue<Integer> queue = new LinkedList<Integer>(); | ||
| 25 | |||
| 26 | @Override | ||
| 27 | public void sample() throws RDFHandlerException { | ||
| 28 | getStartNodes(); | ||
| 29 | |||
| 30 | Utility.logInfo(queue.size()); | ||
| 31 | |||
| 32 | int u, v, pick, index; | ||
| 33 | int individualLimit = statementLimit / queue.size(), currentLimit = 0; | ||
| 34 | RDFEdge edge; | ||
| 35 | List<RDFEdge> edges; | ||
| 36 | Stack<Integer> stack = new Stack<Integer>(); | ||
| 37 | while (true) { | ||
| 38 | if (noOfStatements >= statementLimit) { | ||
| 39 | System.out.println("The number of statements in the sampling: " + noOfStatements); | ||
| 40 | return ; | ||
| 41 | } | ||
| 42 | if (noOfStatements >= currentLimit) { | ||
| 43 | stack.clear(); | ||
| 44 | } | ||
| 45 | |||
| 46 | if (stack.isEmpty()) { | ||
| 47 | if (queue.isEmpty()) | ||
| 48 | v = rand.nextInt(m_graph.numberOfIndividuals); | ||
| 49 | else { | ||
| 50 | v = queue.poll(); | ||
| 51 | currentLimit += individualLimit; | ||
| 52 | } | ||
| 53 | stack.add(v); | ||
| 54 | // Utility.logInfo(noOfStart + " new start: " + m_graph.getRawString(v)); | ||
| 55 | visit(v); | ||
| 56 | } | ||
| 57 | u = stack.peek(); | ||
| 58 | if (rand.nextInt(100) < 15) { | ||
| 59 | stack.pop(); | ||
| 60 | continue; | ||
| 61 | } | ||
| 62 | if ((edges = m_graph.edges.get(u)) == null || edges.size() == 0) { | ||
| 63 | stack.clear(); | ||
| 64 | continue; | ||
| 65 | } | ||
| 66 | |||
| 67 | index = 0; | ||
| 68 | pick = rand.nextInt(edges.size()); | ||
| 69 | for (Iterator<RDFEdge> iter = edges.iterator(); iter.hasNext(); ++index) { | ||
| 70 | edge = iter.next(); | ||
| 71 | if (index == pick) { | ||
| 72 | stack.add(v = edge.m_dst); | ||
| 73 | visit(v); | ||
| 74 | m_writer.handleStatement(m_graph.getStatement(u, edge.m_label, edge.m_dst)); | ||
| 75 | ++noOfStatements; | ||
| 76 | iter.remove(); | ||
| 77 | } | ||
| 78 | |||
| 79 | } | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | private void getStartNodes() throws RDFHandlerException { | ||
| 84 | Set<Integer> coveredConcepts = new HashSet<Integer>(); | ||
| 85 | Integer concept; | ||
| 86 | |||
| 87 | Iterator<Integer> iter; | ||
| 88 | for (Map.Entry<Integer, LinkedList<Integer>> entry: m_graph.labels.entrySet()) { | ||
| 89 | iter = entry.getValue().iterator(); | ||
| 90 | concept = null; | ||
| 91 | |||
| 92 | while (iter.hasNext()) { | ||
| 93 | if (!(coveredConcepts.contains(concept = iter.next()))) { | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | else concept = null; | ||
| 97 | |||
| 98 | } | ||
| 99 | |||
| 100 | if (concept == null) continue; | ||
| 101 | else { | ||
| 102 | queue.add(entry.getKey()); | ||
| 103 | coveredConcepts.add(concept); | ||
| 104 | while (iter.hasNext()) | ||
| 105 | coveredConcepts.add(iter.next()); | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | } | ||
| 110 | |||
| 111 | |||
| 112 | } | ||
