You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							70 lines
						
					
					
						
							2.0 KiB
						
					
					
				
			
		
		
	
	
							70 lines
						
					
					
						
							2.0 KiB
						
					
					
				#!/usr/bin/env python3
 | 
						|
# -*- coding: utf8 -*-
 | 
						|
"""
 | 
						|
Parse hunter arts name, name_jp, and description from wikia:
 | 
						|
http://monsterhunter.wikia.com/wiki/MHX:_Hunter_Arts
 | 
						|
 | 
						|
Returns list of dict, e.g.:
 | 
						|
[
 | 
						|
  {
 | 
						|
    "section": "Heavy Bowgun",
 | 
						|
    "description": "",
 | 
						|
    "name": "Acceleration Shower I",
 | 
						|
    "name_jp": "\\u30a2\\u30af\\u30bb\\u30eb\\u30b7\\u30e3\\u30ef\\u30fc I"
 | 
						|
  },
 | 
						|
  ...
 | 
						|
]
 | 
						|
"""
 | 
						|
 | 
						|
import sys
 | 
						|
import re
 | 
						|
import json
 | 
						|
from collections import defaultdict, OrderedDict
 | 
						|
 | 
						|
 | 
						|
#<h3><span class="mw-headline" id="Lance">Lance</span></h3>
 | 
						|
#<td style="vertical-align: top; background-color: #ddeeee; font-size:12pt;">Absolute Evasion<br />絶対回避
 | 
						|
#</td><td>The hunter's body spins and evades attacks while retreating from the immediate area. Your weapon will always be sheathed after this technique.
 | 
						|
SECTION_RE = re.compile('^<h[23]><span class="mw-headline" id="[^"]*">([^<]*)</span></h[23]>')
 | 
						|
NAME_RE = re.compile(
 | 
						|
    '^<td style="vertical-align: top; background-color: #ddeeee; font-size:12pt;">([^<]*)<br />(.*)')
 | 
						|
 | 
						|
def parse_wikia_hunter_arts(f):
 | 
						|
    section = None
 | 
						|
    data = []
 | 
						|
    skill = {}
 | 
						|
    while True:
 | 
						|
        line = f.readline()
 | 
						|
        if not line:
 | 
						|
            break
 | 
						|
        line = line.strip()
 | 
						|
        m = SECTION_RE.match(line)
 | 
						|
        if m:
 | 
						|
            section = m.group(1)
 | 
						|
            continue
 | 
						|
        m = NAME_RE.match(line)
 | 
						|
        if m:
 | 
						|
            skill["section"] = section
 | 
						|
            skill["name"] = m.group(1)
 | 
						|
            if skill["name"].endswith("II"):
 | 
						|
                # don't need to translate I-III multiple times, and
 | 
						|
                # descriptions are also the same
 | 
						|
                continue
 | 
						|
            skill["name_jp"] = m.group(2)
 | 
						|
            # next line is description
 | 
						|
            description = f.readline().strip().replace("</td><td>", "")
 | 
						|
            skill["description"] = description
 | 
						|
            data.append(skill)
 | 
						|
            skill = {}
 | 
						|
    return data
 | 
						|
 | 
						|
 | 
						|
def _main():
 | 
						|
    with open(sys.argv[1]) as f:
 | 
						|
        data = parse_wikia_hunter_arts(f)
 | 
						|
    print(json.dumps(data, indent=2))
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    _main()
 |