#!/usr/bin/env python3
# -*- coding: utf8 -*-
"""
Parse hunter arts name, name_jp, and description from wikia:
http://monsterhunter.wikia.com/wiki/MHX:_Hunter_Arts
Returns list of dict, e.g.:
[
{
"section": "Heavy Bowgun",
"description": "",
"name": "Acceleration Shower I",
"name_jp": "\\u30a2\\u30af\\u30bb\\u30eb\\u30b7\\u30e3\\u30ef\\u30fc I"
},
...
]
"""
import sys
import re
import json
from collections import defaultdict, OrderedDict
#
Lance
#Absolute Evasion 絶対回避
# | The hunter's body spins and evades attacks while retreating from the immediate area. Your weapon will always be sheathed after this technique.
SECTION_RE = re.compile('^([^<]*)')
NAME_RE = re.compile(
'^ | ([^<]*) (.*)')
def parse_wikia_hunter_arts(f):
section = None
data = []
skill = {}
while True:
line = f.readline()
if not line:
break
line = line.strip()
m = SECTION_RE.match(line)
if m:
section = m.group(1)
continue
m = NAME_RE.match(line)
if m:
skill["section"] = section
skill["name"] = m.group(1)
if skill["name"].endswith("II"):
# don't need to translate I-III multiple times, and
# descriptions are also the same
continue
skill["name_jp"] = m.group(2)
# next line is description
description = f.readline().strip().replace(" | ", "")
skill["description"] = description
data.append(skill)
skill = {}
return data
def _main():
with open(sys.argv[1]) as f:
data = parse_wikia_hunter_arts(f)
print(json.dumps(data, indent=2))
if __name__ == '__main__':
_main()
|