#!/usr/bin/env python3
import sys,re
if(len(sys.argv) > 1):
in_file = open(sys.argv[1])
else:
in_file = sys.stdin
pn_dict = {}
in_pronunciation = False
definition = ""
pn_lines = []
sub_dict = {}
for line in in_file:
if line.startswith("
"):
#print(line,end='')
if len(sub_dict) > 0:
#print(definition,sub_dict)
pn_dict[definition] = sub_dict
#print("".join(pn_lines),end='')
pn_lines = []
sub_dict = {}
definition = re.search("(.*)",line).group(1)
elif line.startswith("===Pronunciation") or line.startswith("====Pronunciation"):
in_pronunciation = True
elif in_pronunciation and line.startswith("=="):
in_pronunciation = False
elif in_pronunciation:
pn_lines.append(line)
#print(line,end='')
match_ipa = re.search("({{IPA.*?}})",line)
if match_ipa:
match_a = re.search("{{a\|(.*?)}}",line)
ipa = match_ipa.group(1)
if match_a:
accent = match_a.group(1)
else:
accent = "NA"
match_lang = re.search("lang=([a-z]*)",ipa)
if match_lang:
lang = match_lang.group(1)
else:
lang = "en"
#print("accent:"+accent+" ipa: "+ipa)
key = (lang,accent)
if key in sub_dict:
#print("Duplicate accent ",accent)
accent_set = set()
old = sub_dict[key]
if type(old) == type(set()):
accent_set.update(old)
else:
accent_set.add(old)
accent_set.add(ipa)
sub_dict[key] = accent_set
else:
sub_dict[key] = ipa
print(pn_dict)