diff options
| author | RncLsn <rnc.lsn@gmail.com> | 2015-06-04 14:00:20 +0100 |
|---|---|---|
| committer | RncLsn <rnc.lsn@gmail.com> | 2015-06-04 14:00:20 +0100 |
| commit | b3ce74df783ebe665182dbd916a7288cff8bc127 (patch) | |
| tree | bf40a35615a3148f68e4d09fe72e693b7ec8b8fc /scripts | |
| parent | 8a5d02f50e96d531a867e79607e7d283a756a4ec (diff) | |
| download | ACQuA-b3ce74df783ebe665182dbd916a7288cff8bc127.tar.gz ACQuA-b3ce74df783ebe665182dbd916a7288cff8bc127.zip | |
SyGENiA query converter.
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/SyGENiA_query_converter.py | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/scripts/SyGENiA_query_converter.py b/scripts/SyGENiA_query_converter.py new file mode 100644 index 0000000..a40c05f --- /dev/null +++ b/scripts/SyGENiA_query_converter.py | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | import sys | ||
| 2 | import os | ||
| 3 | from os.path import join | ||
| 4 | import re | ||
| 5 | import argparse | ||
| 6 | import random | ||
| 7 | |||
| 8 | |||
| 9 | # example query | ||
| 10 | # Q(?0)<-takesCourse(?0,?1), Course(?1) | ||
| 11 | |||
| 12 | # example namespace_pairs | ||
| 13 | # unibench:http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl | ||
| 14 | |||
| 15 | |||
| 16 | var_map = {'0': 'x', '1': 'y', '2': 'z', '3':'u', '4': 'v', '5': 'w'} | ||
| 17 | atom_regex = '(?P<name>[A-Za-z]+)\(((?P<vars>\?[0-9]+(,\?[0-9]+)*))\)' | ||
| 18 | |||
| 19 | |||
| 20 | def get_var(numeric_var, answer_vars, blank_pct=0): | ||
| 21 | """Given a numeric var (e.g. ?1), | ||
| 22 | it returns a var (e.g. ?x) or a blank node (e.g. _:x). | ||
| 23 | """ | ||
| 24 | |||
| 25 | blank_flag = random.random() < (float(blank_pct) / 100) | ||
| 26 | var = '?' + var_map[numeric_var[1:]] | ||
| 27 | if blank_flag and var not in answer_vars: | ||
| 28 | return '_:' + var_map[numeric_var[1:]] | ||
| 29 | else: | ||
| 30 | return '?' + var_map[numeric_var[1:]] | ||
| 31 | |||
| 32 | |||
| 33 | |||
| 34 | def parse_query(query, namespace_pair, query_id, blank_pct=0): | ||
| 35 | """Translates a query from FOL notation to SPARQL""" | ||
| 36 | |||
| 37 | namespace_id = namespace_pair[:namespace_pair.find(':')] | ||
| 38 | namespace = namespace_pair[namespace_pair.find(':') + 1:] | ||
| 39 | |||
| 40 | head, body = query.split('<-') | ||
| 41 | answer_vars = map(lambda x: '?' + var_map[x[1:]], head.strip()[2:-1].split(',')) | ||
| 42 | body_atoms = map(lambda m: (m.group('name'), m.group('vars')), re.finditer(atom_regex, body)) | ||
| 43 | |||
| 44 | triples = [] | ||
| 45 | var_cache = {} | ||
| 46 | for name, atom_vars_str in body_atoms: | ||
| 47 | |||
| 48 | atom_vars = [] | ||
| 49 | for x in atom_vars_str.split(','): | ||
| 50 | if x not in var_cache: | ||
| 51 | var_cache[x] = get_var(x, answer_vars, blank_pct) | ||
| 52 | atom_vars.append(var_cache[x]) | ||
| 53 | |||
| 54 | if len(atom_vars) == 1: | ||
| 55 | triples.append((atom_vars[0], 'rdf:type', namespace_id + ':' + name)) | ||
| 56 | elif len(atom_vars) == 2: | ||
| 57 | triples.append((atom_vars[0], namespace_id + ':' + name, atom_vars[1])) | ||
| 58 | else: | ||
| 59 | raise IOError('Predicated of arity > 2') | ||
| 60 | |||
| 61 | query_text = '^[query%d]\n' % query_id | ||
| 62 | query_text += 'PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n' | ||
| 63 | query_text += 'PREFIX ' +namespace_id + ': ' + namespace + '\n' | ||
| 64 | query_text += 'SELECT ' + ' '.join(answer_vars) + '\n' | ||
| 65 | query_text += 'WHERE {\n' | ||
| 66 | for triple in triples: | ||
| 67 | query_text += ' ' + ' '.join(triple) + ' .\n' | ||
| 68 | query_text = query_text[:-2] + '\n}' | ||
| 69 | |||
| 70 | return query_text | ||
| 71 | |||
| 72 | |||
| 73 | if __name__ == '__main__': | ||
| 74 | |||
| 75 | parser = argparse.ArgumentParser(description='Convert queries from FOL notation to SPARQL.') | ||
| 76 | parser.add_argument('-b', '--blank', default='0' , | ||
| 77 | help='percentage of vars to be randomly replaced with blank nodes') | ||
| 78 | parser.add_argument('namespace', | ||
| 79 | help='<id>:<namespace>, that is a colon-separated pair with an id and the namespace for all the individuals in the query') | ||
| 80 | parser.add_argument('input', help='<input-dir> or <input-file>, that is an input directory or a single file') | ||
| 81 | args = parser.parse_args() | ||
| 82 | |||
| 83 | query_id = 1 | ||
| 84 | for input_file in os.listdir(args.input): | ||
| 85 | if os.path.isfile(join(args.input, input_file)): | ||
| 86 | with open(join(args.input, input_file), 'r') as in_file: | ||
| 87 | query = in_file.read() | ||
| 88 | parsed_query = parse_query(query, args.namespace, query_id, args.blank) | ||
| 89 | query_id += 1 | ||
| 90 | print parsed_query | ||
| 91 | |||
