refactor layout, add jp sort pages, data update

This commit is contained in:
Bryce Allen
2015-12-03 20:21:00 -06:00
parent e5d4d8d244
commit 728730de2e
6 changed files with 1034 additions and 211 deletions

53
bin/parse-wikia-skills.py Executable file
View File

@@ -0,0 +1,53 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-
"""
Parse skill tree names and jp names for monster hunter X.
http://monsterhunter.wikia.com/wiki/MHX:_Skill_List
Returns list of dict, e.g.:
[
{
"name": "Testucabra",
"name_jp": "...",
},
...
]
"""
import sys
import re
import json
from collections import defaultdict, OrderedDict
import requests
#<td rowspan="1" style="vertical-align: top; background-color: #ddeeee; font-size:12pt; border-bottom: 2px solid #000000;"><h3><span class="mw-headline" id="Ammo_Saver">Ammo Saver</span></h3>弾薬節約
TREE_RE = re.compile('^<td [^>]*><h[23]><span class="mw-headline" id="[^"]*">(?:<b>)?([^<]*)(?:</b>)?</span></h[23]>([^<]*)')
def parse_wikia_skill_trees(f):
data = []
seen = set()
while True:
line = f.readline()
if not line:
break
line = line.strip()
m = TREE_RE.match(line)
if m:
stree = dict(name=m.group(1), name_jp=m.group(2))
if stree["name"] not in seen:
data.append(stree)
seen.add(stree["name"])
return data
def _main():
with open(sys.argv[1]) as f:
stree_list = parse_wikia_skill_trees(f)
print json.dumps(stree_list, indent=2)
if __name__ == '__main__':
_main()