parent
							
								
									43c9e6c79c
								
							
						
					
					
						commit
						45cf2c8d5f
					
				@ -0,0 +1,90 @@
 | 
				
			||||
[
 | 
				
			||||
{
 | 
				
			||||
  "name": "Forest and Hills",
 | 
				
			||||
  "name_jp": "森丘"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Swamp",
 | 
				
			||||
  "name_jp": "沼地"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Snowy Mountains",
 | 
				
			||||
  "name_jp": "雪山"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Volcano",
 | 
				
			||||
  "name_jp": "火山"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Deserted Island",
 | 
				
			||||
  "name_jp": "孤島"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Misty Peaks",
 | 
				
			||||
  "name_jp": "渓流"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Sacred Mountain",
 | 
				
			||||
  "name_jp": "霊峰"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Ancestral Steppe",
 | 
				
			||||
  "name_jp": "遺跡平原"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Sunken Hollow",
 | 
				
			||||
  "name_jp": "地下洞窟"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Primal Forest",
 | 
				
			||||
  "name_jp": "原生林"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Frozen Seaway",
 | 
				
			||||
  "name_jp": "氷海"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Volcanic Hollow",
 | 
				
			||||
  "name_jp": "地底火山"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Sanctuary",
 | 
				
			||||
  "name_jp": "禁足地"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Dunes",
 | 
				
			||||
  "name_jp": "旧砂漠"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Ancient Forest",
 | 
				
			||||
  "name_jp": "古代森"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Wyvern Graveyard",
 | 
				
			||||
  "name_jp": "竜ノ墓場"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Tower 3",
 | 
				
			||||
  "name_jp": "塔の秘竟"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Ingle Isle",
 | 
				
			||||
  "name_jp": "溶岩島"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Polar Field",
 | 
				
			||||
  "name_jp": "極圏"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Arena",
 | 
				
			||||
  "name_jp": "闘技場"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Slayground",
 | 
				
			||||
  "name_jp": "立体闘技場"
 | 
				
			||||
},
 | 
				
			||||
{
 | 
				
			||||
  "name": "Moat Arena",
 | 
				
			||||
  "name_jp": "水上闘技場"
 | 
				
			||||
}
 | 
				
			||||
]
 | 
				
			||||
@ -0,0 +1,113 @@
 | 
				
			||||
#!/usr/bin/env python2
 | 
				
			||||
# vim: set fileencoding=utf8 :
 | 
				
			||||
 | 
				
			||||
import urllib
 | 
				
			||||
import os
 | 
				
			||||
import json
 | 
				
			||||
import sys
 | 
				
			||||
 | 
				
			||||
from lxml import etree
 | 
				
			||||
 | 
				
			||||
import _pathfix
 | 
				
			||||
 | 
				
			||||
_BASE_URL = "http://monsterhunter.wikia.com/wiki/"
 | 
				
			||||
 | 
				
			||||
_PAGE = "MHX:_Palico_Skills"
 | 
				
			||||
 | 
				
			||||
_CIRCLE = u"\u26ab"
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def extract_arts_and_skills(tree):
 | 
				
			||||
    arts = []
 | 
				
			||||
    skills = []
 | 
				
			||||
    tables = tree.xpath(
 | 
				
			||||
        '//*[@id="mw-content-text"]/table[contains(@class, "linetable")]'
 | 
				
			||||
    )
 | 
				
			||||
    for table in tables:
 | 
				
			||||
        category = None
 | 
				
			||||
        fields = None
 | 
				
			||||
        rows = list(table)
 | 
				
			||||
        for row in rows:
 | 
				
			||||
            cols, is_header = _get_column_cells_texts(row)
 | 
				
			||||
            print is_header, cols
 | 
				
			||||
            continue
 | 
				
			||||
            if is_header:
 | 
				
			||||
                if len(cols) == 1:
 | 
				
			||||
                    category = cols[0]
 | 
				
			||||
                else:
 | 
				
			||||
                    fields = [_header_to_field_name(c) for c in cols]
 | 
				
			||||
            else:
 | 
				
			||||
                if fields[0].startswith("art_"):
 | 
				
			||||
                    if category == "Forte Specific (Unteachable)":
 | 
				
			||||
                        values = dict(name=cols[0],
 | 
				
			||||
                                      name_jp=cols[1],
 | 
				
			||||
                                      forte=cols[2],
 | 
				
			||||
                                      cost=int(cols[3]),
 | 
				
			||||
                                      unlock_requirement=None,
 | 
				
			||||
                                      teaching_requirement=None,
 | 
				
			||||
                                      description=cols[4],
 | 
				
			||||
                                      teachable=False)
 | 
				
			||||
                    elif category == "Forte Specific (Teachable)":
 | 
				
			||||
                        values = dict(name=cols[0],
 | 
				
			||||
                                      name_jp=cols[1],
 | 
				
			||||
                                      forte="%s %s" % (cols[2], cols[3]),
 | 
				
			||||
                                      cost=int(cols[4]),
 | 
				
			||||
                                      unlock_requirement=None,
 | 
				
			||||
                                      teaching_requirement=cols[5],
 | 
				
			||||
                                      description=cols[6],
 | 
				
			||||
                                      teachable=True)
 | 
				
			||||
                    else:
 | 
				
			||||
                        values = dict(name=cols[0],
 | 
				
			||||
                                      name_jp=cols[1],
 | 
				
			||||
                                      forte="All",
 | 
				
			||||
                                      cost=int(cols[2]),
 | 
				
			||||
                                      unlock_requirement=cols[3],
 | 
				
			||||
                                      teaching_requirement=None,
 | 
				
			||||
                                      description=cols[4],
 | 
				
			||||
                                      teachable=True)
 | 
				
			||||
                    arts.append(values)
 | 
				
			||||
                elif fields[0].startswith("skill_"):
 | 
				
			||||
                    values = dict(name=cols[0],
 | 
				
			||||
                                  name_jp=cols[1],
 | 
				
			||||
                                  req_level=cols[2],
 | 
				
			||||
                                  cost=cols[3].count(_CIRCLE),
 | 
				
			||||
                                  description=cols[4],
 | 
				
			||||
                                  category=category)
 | 
				
			||||
                    skills.append(values)
 | 
				
			||||
                else:
 | 
				
			||||
                    raise ValueError("Unknown table type: %r" % cols[0])
 | 
				
			||||
        #print rows[0].text, len(rows)
 | 
				
			||||
    return arts, skills
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def _get_column_cells_texts(tr_element):
 | 
				
			||||
    is_header = True
 | 
				
			||||
    cells = tr_element.xpath("./th")
 | 
				
			||||
    if not cells:
 | 
				
			||||
        is_header = False
 | 
				
			||||
        cells = tr_element.xpath("./td")
 | 
				
			||||
    texts = []
 | 
				
			||||
    for cell in cells:
 | 
				
			||||
        texts = [t.strip() for t in cell.xpath("./text()")]
 | 
				
			||||
    return texts, is_header
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def _header_to_field_name(s):
 | 
				
			||||
    return s.lower().replace(" ", "_").replace(".", "")
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def _main():
 | 
				
			||||
    tmp_path = os.path.join(_pathfix.project_path, "tmp")
 | 
				
			||||
    fpath = os.path.join(tmp_path, "wikia-palico-skills.html")
 | 
				
			||||
    parser = etree.HTMLParser()
 | 
				
			||||
    urllib.urlretrieve(_BASE_URL + _PAGE, fpath)
 | 
				
			||||
    with open(fpath) as f:
 | 
				
			||||
        tree = etree.parse(f, parser)
 | 
				
			||||
        arts, skills = extract_arts_and_skills(tree)
 | 
				
			||||
    #print json.dumps(weapon_list, indent=2)
 | 
				
			||||
    print json.dumps(arts, indent=2)
 | 
				
			||||
    print json.dumps(skills, indent=2)
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
if __name__ == '__main__':
 | 
				
			||||
    _main()
 | 
				
			||||
					Loading…
					
					
				
		Reference in new issue