aboutsummaryrefslogtreecommitdiff
path: root/scripts/PagodaOutputJsonConverter.py
blob: 65c6df2a45a637b5da23d5dcf22574b92f0cbfd8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import sys
import re
import json


queryID_regex = '^\s*-+\s*[qQ]uery\s*(\d+)\s*-+\s*$'


def main(args):
    records = []

    query_found = text_found = variables_found = answers_found = False
    cur_record = {}
    answers = []
    with open(args[0], 'r') as input_file:
        for line in input_file:
            if not query_found:
                match = re.search(queryID_regex, line)
                if not match:
                    continue
                query_found = True
                # print 'query found'
                cur_record['queryID'] = int(match.group(1))
                continue
            if not text_found:
                # print 'text found'
                cur_record['queryText'] = line.strip()
                text_found = True
                continue
            if not variables_found:
                # print 'vars found'
                cur_record['answerVariables'] = line.strip().split()
                variables_found = True
                continue
            if not answers_found:
                # print 'answers found'
                answers_found = True
                continue

            if len(line.strip()) > 0:
                answers.append(line.strip())
            else:
                cur_record['answers'] = answers
                records.append(cur_record)
                print '\rParsed ' + str(len(records)) + ' query records',

                query_found = text_found = variables_found = answers_found = False
                cur_record = {}
                answers = []
        print

    with open(args[1], 'w') as output_file:
        output_file.write(json.dumps(records, indent=2))


if __name__ == '__main__':
    main(sys.argv[1:])