1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
package uk.ac.ox.cs.data;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.Random;
import org.openrdf.model.Statement;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.turtle.TurtleParser;
import org.openrdf.rio.turtle.TurtleWriter;
import uk.ac.ox.cs.pagoda.util.Utility;
public class Fragment {
private TurtleWriter m_writer;
private FragmentRDFHandler m_handler;
public Fragment(int fragment, String outFile) {
try {
m_writer = new TurtleWriter(new FileOutputStream(outFile));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
m_handler = new FragmentRDFHandler(fragment, m_writer);
}
public void process(String prefix, String fileName) {
FileInputStream istream;
try {
TurtleParser parser = new TurtleParser();
parser.setRDFHandler(m_handler);
File f = new File(fileName);
if (f.isDirectory())
for (String tFileName: f.list()) {
if (tFileName.endsWith(".ttl")) {
parser.parse(istream = new FileInputStream(fileName + Utility.FILE_SEPARATOR + tFileName), prefix);
istream.close();
}
}
else {
parser.parse(istream = new FileInputStream(fileName), prefix);
istream.close();
}
} catch (Exception e) {
e.printStackTrace();
Utility.logError("aoaoaoao ~~~~~");
return ;
}
Utility.logInfo("DONE");
}
public void dispose() {
try {
m_writer.endRDF();
} catch (RDFHandlerException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
/**
* for ChEMBL
*/
Fragment f = new Fragment(100, "data_01.ttl");
f.process("http://rdf.ebi.ac.uk/terms/chembl#", "/media/krr-nas-share/Yujiao/ontologies/bio2rdf/chembl/data");
/**
* for Reactome
* "http://www.biopax.org/release/biopax-level3.owl#",
"/home/scratch/yzhou/ontologies/bio2rdf/reactome"
"/home/scratch/yzhou/ontologies/bio2rdf/reactome/biopaxrdf",
*/
// Fragment f = new Fragment(1000, "data_001.ttl");
// f.process("http://www.biopax.org/release/biopax-level3.owl#", "/media/krr-nas-share/Yujiao/ontologies/bio2rdf/reactome/data.ttl");
f.dispose();
}
}
class FragmentRDFHandler implements RDFHandler {
int m_mod;
TurtleWriter m_writer;
Random m_rand = new Random();
public FragmentRDFHandler(int mod, TurtleWriter writer) {
m_mod = mod;
m_writer = writer;
}
@Override
public void endRDF() throws RDFHandlerException {
}
@Override
public void handleComment(String arg0) throws RDFHandlerException {
m_writer.handleComment(arg0);
Utility.logDebug("handling comment: " + arg0);
}
@Override
public void handleNamespace(String arg0, String arg1) throws RDFHandlerException {
m_writer.handleNamespace(arg0, arg1);
}
@Override
public void handleStatement(Statement arg0) throws RDFHandlerException {
if (m_rand.nextInt() % m_mod == 0)
m_writer.handleStatement(arg0);
}
boolean m_started = false;
@Override
public void startRDF() throws RDFHandlerException {
if (m_started) return ;
m_started = true;
m_writer.startRDF();
}
}
|