blob: 65c6df2a45a637b5da23d5dcf22574b92f0cbfd8 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
import sys
import re
import json
queryID_regex = '^\s*-+\s*[qQ]uery\s*(\d+)\s*-+\s*$'
def main(args):
records = []
query_found = text_found = variables_found = answers_found = False
cur_record = {}
answers = []
with open(args[0], 'r') as input_file:
for line in input_file:
if not query_found:
match = re.search(queryID_regex, line)
if not match:
continue
query_found = True
# print 'query found'
cur_record['queryID'] = int(match.group(1))
continue
if not text_found:
# print 'text found'
cur_record['queryText'] = line.strip()
text_found = True
continue
if not variables_found:
# print 'vars found'
cur_record['answerVariables'] = line.strip().split()
variables_found = True
continue
if not answers_found:
# print 'answers found'
answers_found = True
continue
if len(line.strip()) > 0:
answers.append(line.strip())
else:
cur_record['answers'] = answers
records.append(cur_record)
print '\rParsed ' + str(len(records)) + ' query records',
query_found = text_found = variables_found = answers_found = False
cur_record = {}
answers = []
print
with open(args[1], 'w') as output_file:
output_file.write(json.dumps(records, indent=2))
if __name__ == '__main__':
main(sys.argv[1:])
|