SyGENiA query converter.

author: RncLsn <rnc.lsn@gmail.com> 2015-06-04 14:00:20 +0100
committer: RncLsn <rnc.lsn@gmail.com> 2015-06-04 14:00:20 +0100
commit: b3ce74df783ebe665182dbd916a7288cff8bc127 (patch)
tree: bf40a35615a3148f68e4d09fe72e693b7ec8b8fc /scripts
parent: 8a5d02f50e96d531a867e79607e7d283a756a4ec (diff)
download: ACQuA-b3ce74df783ebe665182dbd916a7288cff8bc127.tar.gz
ACQuA-b3ce74df783ebe665182dbd916a7288cff8bc127.zip
1 files changed, 91 insertions, 0 deletions
diff --git a/scripts/SyGENiA_query_converter.py b/scripts/SyGENiA_query_converter.py
new file mode 100644
index 0000000..a40c05f
--- /dev/null
+++ b/scripts/SyGENiA_query_converter.py
@@ -0,0 +1,91 @@
+import sys
+import os
+from os.path import join
+import re
+import argparse
+import random
+# example query
+# Q(?0)<-takesCourse(?0,?1), Course(?1)
+# example namespace_pairs
+# unibench:http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl
+var_map = {'0': 'x', '1': 'y', '2': 'z', '3':'u', '4': 'v', '5': 'w'}
+atom_regex = '(?P<name>[A-Za-z]+)\(((?P<vars>\?[0-9]+(,\?[0-9]+)*))\)'
+def get_var(numeric_var, answer_vars, blank_pct=0):
+    """Given a numeric var (e.g. ?1),
+       it returns a var (e.g. ?x) or a blank node (e.g. _:x).
+    """
+    blank_flag = random.random() < (float(blank_pct) / 100)
+    var = '?' + var_map[numeric_var[1:]]
+    if blank_flag and var not in answer_vars:
+        return '_:' + var_map[numeric_var[1:]]
+    else:
+        return '?' + var_map[numeric_var[1:]]
+def parse_query(query, namespace_pair, query_id, blank_pct=0):
+    """Translates a query from FOL notation to SPARQL"""
+    namespace_id = namespace_pair[:namespace_pair.find(':')]
+    namespace = namespace_pair[namespace_pair.find(':') + 1:]
+    head, body = query.split('<-')
+    answer_vars = map(lambda x: '?' + var_map[x[1:]], head.strip()[2:-1].split(','))
+    body_atoms = map(lambda m: (m.group('name'), m.group('vars')), re.finditer(atom_regex, body))
+    triples = []
+    var_cache = {}
+    for name, atom_vars_str in body_atoms:
+        atom_vars = []
+        for x in atom_vars_str.split(','):
+            if x not in var_cache:
+                var_cache[x] = get_var(x, answer_vars, blank_pct)
+            atom_vars.append(var_cache[x])
+        if len(atom_vars) == 1:
+            triples.append((atom_vars[0], 'rdf:type', namespace_id + ':' + name))
+        elif len(atom_vars) == 2:
+            triples.append((atom_vars[0], namespace_id + ':' + name, atom_vars[1]))
+        else:
+            raise IOError('Predicated of arity > 2')
+    query_text = '^[query%d]\n' % query_id
+    query_text += 'PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n'
+    query_text += 'PREFIX ' +namespace_id + ': ' + namespace + '\n'
+    query_text += 'SELECT ' + ' '.join(answer_vars) + '\n'
+    query_text += 'WHERE {\n'
+    for triple in triples:
+        query_text += '  ' + ' '.join(triple) + ' .\n'
+    query_text = query_text[:-2] + '\n}'
+    return query_text
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Convert queries from FOL notation to SPARQL.')
+    parser.add_argument('-b', '--blank', default='0' ,
+        help='percentage of vars to be randomly replaced with blank nodes')
+    parser.add_argument('namespace',
+        help='<id>:<namespace>, that is a colon-separated pair with an id and the namespace for all the individuals in the query')
+    parser.add_argument('input', help='<input-dir> or <input-file>, that is an input directory or a single file')
+    args = parser.parse_args()
+    query_id = 1
+    for input_file in os.listdir(args.input):
+        if os.path.isfile(join(args.input, input_file)):
+            with open(join(args.input, input_file), 'r') as in_file:
+                query = in_file.read()
+            parsed_query = parse_query(query, args.namespace, query_id, args.blank)
+            query_id += 1
+            print parsed_query
+            print
author	RncLsn <rnc.lsn@gmail.com>	2015-06-04 14:00:20 +0100
committer	RncLsn <rnc.lsn@gmail.com>	2015-06-04 14:00:20 +0100
commit	b3ce74df783ebe665182dbd916a7288cff8bc127 (patch)
tree	bf40a35615a3148f68e4d09fe72e693b7ec8b8fc /scripts
parent	8a5d02f50e96d531a867e79607e7d283a756a4ec (diff)
download	ACQuA-b3ce74df783ebe665182dbd916a7288cff8bc127.tar.gz ACQuA-b3ce74df783ebe665182dbd916a7288cff8bc127.zip

diff --git a/scripts/SyGENiA_query_converter.py b/scripts/SyGENiA_query_converter.py new file mode 100644 index 0000000..a40c05f --- /dev/null +++ b/scripts/SyGENiA_query_converter.py
@@ -0,0 +1,91 @@
	1	import sys
	2	import os
	3	from os.path import join
	4	import re
	5	import argparse
	6	import random
	7
	8
	9	# example query
	10	# Q(?0)<-takesCourse(?0,?1), Course(?1)
	11
	12	# example namespace_pairs
	13	# unibench:http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl
	14
	15
	16	var_map = {'0': 'x', '1': 'y', '2': 'z', '3':'u', '4': 'v', '5': 'w'}
	17	atom_regex = '(?P<name>[A-Za-z]+)\(((?P<vars>\?[0-9]+(,\?[0-9]+)*))\)'
	18
	19
	20	def get_var(numeric_var, answer_vars, blank_pct=0):
	21	"""Given a numeric var (e.g. ?1),
	22	it returns a var (e.g. ?x) or a blank node (e.g. _:x).
	23	"""
	24
	25	blank_flag = random.random() < (float(blank_pct) / 100)
	26	var = '?' + var_map[numeric_var[1:]]
	27	if blank_flag and var not in answer_vars:
	28	return '_:' + var_map[numeric_var[1:]]
	29	else:
	30	return '?' + var_map[numeric_var[1:]]
	31
	32
	33
	34	def parse_query(query, namespace_pair, query_id, blank_pct=0):
	35	"""Translates a query from FOL notation to SPARQL"""
	36
	37	namespace_id = namespace_pair[:namespace_pair.find(':')]
	38	namespace = namespace_pair[namespace_pair.find(':') + 1:]
	39
	40	head, body = query.split('<-')
	41	answer_vars = map(lambda x: '?' + var_map[x[1:]], head.strip()[2:-1].split(','))
	42	body_atoms = map(lambda m: (m.group('name'), m.group('vars')), re.finditer(atom_regex, body))
	43
	44	triples = []
	45	var_cache = {}
	46	for name, atom_vars_str in body_atoms:
	47
	48	atom_vars = []
	49	for x in atom_vars_str.split(','):
	50	if x not in var_cache:
	51	var_cache[x] = get_var(x, answer_vars, blank_pct)
	52	atom_vars.append(var_cache[x])
	53
	54	if len(atom_vars) == 1:
	55	triples.append((atom_vars[0], 'rdf:type', namespace_id + ':' + name))
	56	elif len(atom_vars) == 2:
	57	triples.append((atom_vars[0], namespace_id + ':' + name, atom_vars[1]))
	58	else:
	59	raise IOError('Predicated of arity > 2')
	60
	61	query_text = '^[query%d]\n' % query_id
	62	query_text += 'PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n'
	63	query_text += 'PREFIX ' +namespace_id + ': ' + namespace + '\n'
	64	query_text += 'SELECT ' + ' '.join(answer_vars) + '\n'
	65	query_text += 'WHERE {\n'
	66	for triple in triples:
	67	query_text += ' ' + ' '.join(triple) + ' .\n'
	68	query_text = query_text[:-2] + '\n}'
	69
	70	return query_text
	71
	72
	73	if __name__ == '__main__':
	74
	75	parser = argparse.ArgumentParser(description='Convert queries from FOL notation to SPARQL.')
	76	parser.add_argument('-b', '--blank', default='0' ,
	77	help='percentage of vars to be randomly replaced with blank nodes')
	78	parser.add_argument('namespace',
	79	help='<id>:<namespace>, that is a colon-separated pair with an id and the namespace for all the individuals in the query')
	80	parser.add_argument('input', help='<input-dir> or <input-file>, that is an input directory or a single file')
	81	args = parser.parse_args()
	82
	83	query_id = 1
	84	for input_file in os.listdir(args.input):
	85	if os.path.isfile(join(args.input, input_file)):
	86	with open(join(args.input, input_file), 'r') as in_file:
	87	query = in_file.read()
	88	parsed_query = parse_query(query, args.namespace, query_id, args.blank)
	89	query_id += 1
	90	print parsed_query
	91	print