4u stars filter, static damage gen, rise updates
This commit is contained in:
268
scrapers/fextralife-weapons.py
Executable file
268
scrapers/fextralife-weapons.py
Executable file
@@ -0,0 +1,268 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
import json
|
||||
import lxml.etree
|
||||
|
||||
import requests
|
||||
|
||||
#WTYPES = ["Great Sword", "Long Sword", "Sword and Shield", "Dual Blades", "Lance", "Gunlance", "Hammer"]
|
||||
WTYPES = ["Great Sword", "Lance", "Hammer"]
|
||||
|
||||
WIDTH_RE = re.compile(r'width: *(\d+)%;')
|
||||
|
||||
PART_RE = re.compile(r'(.*) x(\d+)( Points)?')
|
||||
|
||||
# MR Bone 20 pts.
|
||||
PART_RE_MR = re.compile(r'(.*) (\d+) +pts\.?')
|
||||
|
||||
|
||||
"""
|
||||
<div class="progress" style="max-width: 100%; min-width: 100px;">
|
||||
<div class="progress-bar danger-color-dark" style="width: 11%;">
|
||||
|
||||
</div>
|
||||
<div class="progress-bar warning-color-dark" style="width: 20%;">
|
||||
|
||||
</div>
|
||||
<div class="progress-bar warning-color" style="width: 12%;">
|
||||
|
||||
</div>
|
||||
<div class="progress-bar success-color" style="width: 0%;">
|
||||
|
||||
</div>
|
||||
<div class="progress-bar primary-color-dark" style="width: 0%;">
|
||||
|
||||
</div>
|
||||
<div class="progress-bar white" style="width: 0%;">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
def parse_sharpness(div):
|
||||
values = []
|
||||
divs = div.xpath('div')
|
||||
for div in divs:
|
||||
style = div.get("style")
|
||||
m = WIDTH_RE.match(style)
|
||||
if m:
|
||||
values.append(int(m.group(1)))
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def parse_rampage(td):
|
||||
return td.xpath('ul/li/a/text()')
|
||||
|
||||
|
||||
def parse_crafting(td):
|
||||
materials = {}
|
||||
for li in td.xpath('ul/li'):
|
||||
atext = li.xpath('a/text()')
|
||||
litext = li.xpath('text()')
|
||||
if litext:
|
||||
litext = litext[0].strip()
|
||||
else:
|
||||
print("Unknown format: ", lxml.etree.tostring(td))
|
||||
return {}
|
||||
|
||||
if litext.endswith('\xa0'):
|
||||
litext = litext.rstrip('\xa0')
|
||||
if litext.endswith('.'):
|
||||
litext = litext.rstrip('.')
|
||||
|
||||
if litext.endswith('l'):
|
||||
litext = litext[:-1] + '1'
|
||||
|
||||
if litext.startswith('+ '):
|
||||
atext += '+'
|
||||
litext = litext[2:]
|
||||
|
||||
if litext.startswith('x'):
|
||||
litext = litext[1:]
|
||||
|
||||
if atext:
|
||||
atext = atext[0].strip()
|
||||
if litext.endswith(" Points"):
|
||||
litext = litext.rstrip(" Points")
|
||||
atext += " Points"
|
||||
#print("atext '" + atext + "' '" + litext + "'")
|
||||
try:
|
||||
materials[atext] = clean_int(litext)
|
||||
except Exception as e:
|
||||
print("WARN: failed parsing ", atext, litext)
|
||||
if litext == 'l':
|
||||
materials[atext] = 1
|
||||
elif litext.isdigit():
|
||||
materials['zenny'] = clean_int(litext)
|
||||
else:
|
||||
m = PART_RE.match(litext)
|
||||
if not m:
|
||||
m = PART_RE_MR.match(litext)
|
||||
if m:
|
||||
materials[m.group(1) + ' Points'] = int(m.group(2))
|
||||
elif m.group(2):
|
||||
materials[m.group(1) + ' Points'] = int(m.group(2))
|
||||
else:
|
||||
materials[m.group(1)] = int(m.group(2))
|
||||
return materials
|
||||
|
||||
|
||||
def clean_text(t):
|
||||
t = t.strip()
|
||||
t = t.rstrip('\xa0')
|
||||
return t
|
||||
|
||||
|
||||
def clean_int(s):
|
||||
s = clean_text(s)
|
||||
if not s:
|
||||
return 0
|
||||
return int(s)
|
||||
|
||||
|
||||
def parse_element(td):
|
||||
#pp("td", td)
|
||||
etype = td.xpath('a/text()')
|
||||
if etype:
|
||||
values = td.xpath('./text()')
|
||||
if values:
|
||||
value = clean_int(values[0].strip())
|
||||
return dict(type=etype[0], attack=value)
|
||||
return dict(type=None, attack=None)
|
||||
|
||||
|
||||
def parse_rarity(td):
|
||||
text = td.xpath('.//text()')
|
||||
if text:
|
||||
parts = text[0].split()
|
||||
if len(parts) > 1:
|
||||
return clean_int(text[0].split()[1])
|
||||
return 8
|
||||
|
||||
|
||||
def parse_slots(td):
|
||||
slots = []
|
||||
for img in td.xpath('.//img'):
|
||||
title = img.get("title")
|
||||
if title and title.startswith('gem_'):
|
||||
parts = title.split("_")
|
||||
level = int(parts[2])
|
||||
slots.append(level)
|
||||
return slots
|
||||
|
||||
|
||||
def adjust_slots_rampage(data):
|
||||
if data['rarity'] >= 8:
|
||||
data['rampage_slot'] = data['slots'][-1]
|
||||
data['slots'] = data['slots'][:-1]
|
||||
else:
|
||||
data['rampage_slot'] = 0
|
||||
|
||||
|
||||
def gl_parse_tr(tr):
|
||||
data = {}
|
||||
cells = tr.xpath('td')
|
||||
#print(lxml.etree.tostring(cells[9]))
|
||||
|
||||
# Name
|
||||
name = cells[0]
|
||||
#print(name)
|
||||
data['name'] = name.xpath('a/text()')[0]
|
||||
data['slots'] = parse_slots(name)
|
||||
data['sharpness'] = parse_sharpness(name.xpath('div')[0])
|
||||
data['attack'] = clean_int(cells[1].text)
|
||||
element = parse_element(cells[2])
|
||||
data['element'] = element['type']
|
||||
data['element_attack'] = element['attack']
|
||||
data['element_2'] = None
|
||||
data['element_2_attack'] = None
|
||||
data['affinity'] = clean_int(cells[3].text.rstrip('%'))
|
||||
data['defense'] = clean_int(cells[4].text)
|
||||
data['shot_type'] = cells[5].text
|
||||
data['level'] = clean_int(cells[6].text.split()[1])
|
||||
data['rarity'] = parse_rarity(cells[7])
|
||||
data['rampage_skills'] = parse_rampage(cells[8])
|
||||
data['crafting'] = parse_crafting(cells[9])
|
||||
|
||||
adjust_slots_rampage(data)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def default_parse_tr(tr):
|
||||
data = {}
|
||||
cells = tr.xpath('td')
|
||||
#print(lxml.etree.tostring(cells[9]))
|
||||
|
||||
if len(cells) == 10:
|
||||
return gl_parse_tr(tr)
|
||||
|
||||
#print("cels", [c.text for c in cells])
|
||||
|
||||
# Name
|
||||
name = cells[0]
|
||||
data['name'] = name.xpath('a/text()')[0]
|
||||
data['slots'] = parse_slots(name)
|
||||
data['sharpness'] = parse_sharpness(name.xpath('div')[0])
|
||||
data['attack'] = clean_int(cells[1].text)
|
||||
element = parse_element(cells[2])
|
||||
data['element'] = element['type']
|
||||
data['element_attack'] = element['attack']
|
||||
data['element_2'] = None
|
||||
data['element_2_attack'] = None
|
||||
data['affinity'] = clean_int(cells[3].text.rstrip('%'))
|
||||
data['defense'] = clean_int(cells[4].text)
|
||||
data['rarity'] = parse_rarity(cells[5])
|
||||
data['rampage_skills'] = parse_rampage(cells[6])
|
||||
data['crafting'] = parse_crafting(cells[7])
|
||||
|
||||
adjust_slots_rampage(data)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
|
||||
def parse_fextralife_weapons(text):
|
||||
root = lxml.etree.HTML(text)
|
||||
weapons = []
|
||||
|
||||
table = root.xpath('//div[@id="wiki-content-block"]//table')[0]
|
||||
rows = table.xpath('tbody/tr')
|
||||
#print("nrows", len(rows))
|
||||
for tr in rows:
|
||||
data = default_parse_tr(tr)
|
||||
weapons.append(data)
|
||||
return weapons
|
||||
|
||||
|
||||
def pp(name, e):
|
||||
if isinstance(e, list):
|
||||
for i, ei in enumerate(e):
|
||||
pp(name + "[" + str(i) + "]", ei)
|
||||
else:
|
||||
print(name, e.tag)
|
||||
print(lxml.etree.tostring(e, pretty_print=True))
|
||||
|
||||
|
||||
def _main():
|
||||
indir = sys.argv[1]
|
||||
outpath = sys.argv[2]
|
||||
weapon_list_all = []
|
||||
for wtype in WTYPES:
|
||||
print(wtype)
|
||||
fpath = os.path.join(indir, wtype + ".html")
|
||||
with open(fpath) as f:
|
||||
text = f.read()
|
||||
weapon_list = parse_fextralife_weapons(text)
|
||||
for w in weapon_list:
|
||||
w["wtype"] = wtype
|
||||
weapon_list_all.extend(weapon_list)
|
||||
with open(outpath, "w") as f:
|
||||
json.dump(weapon_list_all, f, indent=2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_main()
|
||||
159
scrapers/mhrice_monsters.py
Executable file
159
scrapers/mhrice_monsters.py
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import os.path
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
import lxml.etree
|
||||
|
||||
import requests
|
||||
|
||||
PART_HEADER_MAP = dict(Slash="Cut",
|
||||
Impact="Impact",
|
||||
Shot="Shot",
|
||||
Fire="Fire",
|
||||
Water="Water",
|
||||
Ice="Ice",
|
||||
Thunder="Thunder",
|
||||
Dragon="Dragon")
|
||||
|
||||
|
||||
def _td_part_id(td):
|
||||
s = td.xpath('.//text()')[0].strip()
|
||||
if s.startswith("["):
|
||||
s = s[1:2]
|
||||
return int(s)
|
||||
|
||||
|
||||
def _td_part_break(td):
|
||||
text = td.text or ""
|
||||
text = text.strip()
|
||||
if text:
|
||||
m = re.match(r"\(x(\d+)\) (\d+)", text)
|
||||
print(text, m, m.group(1), m.group(2))
|
||||
return dict(count=int(m.group(1)), damage=int(m.group(2)))
|
||||
return dict(count=0, damage=0)
|
||||
|
||||
def _td_part_sever(td):
|
||||
text = td.text or ""
|
||||
text = text.strip()
|
||||
if text:
|
||||
m = re.match(r"\((\w+)\) (\d+)", text)
|
||||
return dict(type=m.group(1), damage=int(m.group(2)))
|
||||
return dict(type="", damage=0)
|
||||
|
||||
|
||||
def get_monster_data(link):
|
||||
hit_data = {}
|
||||
base = "https://mhrise.mhrice.info"
|
||||
url = base + link
|
||||
result = requests.get(url)
|
||||
root = lxml.etree.HTML(result.content)
|
||||
sections = root.xpath("//section")
|
||||
hit_table = None
|
||||
parts_table = None
|
||||
for section in sections:
|
||||
h2 = section.xpath('h2')
|
||||
if h2 and h2[0].text:
|
||||
if hit_table is None and h2[0].text.lower().startswith("hitzone"):
|
||||
hit_table = section.xpath('.//table')[0]
|
||||
elif parts_table is None and h2[0].text.lower().startswith("parts"):
|
||||
parts_table = section.xpath('.//table')[0]
|
||||
#pp("hit_table", hit_table)
|
||||
#pp("tr", hit_table.xpath('thead/tr'))
|
||||
header_cells = hit_table.xpath('thead/tr/th')
|
||||
header_names = [th.text for th in header_cells]
|
||||
#print("names", header_names)
|
||||
rows = hit_table.xpath('tbody/tr')
|
||||
part_id_name_map = {}
|
||||
for row in rows:
|
||||
if 'invalid' in row.attrib.get('class', ""):
|
||||
continue
|
||||
#pp("tr", row)
|
||||
cols = dict(zip(header_names, row.xpath('td')))
|
||||
name_td = cols["Name"]
|
||||
#pp("name_td", name_td)
|
||||
name_en_span = name_td.xpath('.//span[@lang="en"]/span')
|
||||
if not name_en_span:
|
||||
continue
|
||||
name = name_en_span[0].text
|
||||
#pp("part", cols["Part"].xpath('.//text()'))
|
||||
part_id = _td_part_id(cols["Part"])
|
||||
part_id_name_map[part_id] = name
|
||||
hit_data[name] = {}
|
||||
for k in PART_HEADER_MAP.keys():
|
||||
hit_data[name][PART_HEADER_MAP[k]] = int(cols[k].text)
|
||||
#print(hit_data)
|
||||
|
||||
return hit_data
|
||||
|
||||
# add break/sever data
|
||||
header_cells = parts_table.xpath('thead/tr/th')
|
||||
header_names = [th.text for th in header_cells]
|
||||
#print(header_names)
|
||||
rows = parts_table.xpath('tbody/tr')
|
||||
breaks = []
|
||||
for row in rows:
|
||||
if 'invalid' in row.attrib.get('class', ""):
|
||||
continue
|
||||
cols = dict(zip(header_names, row.xpath('td')))
|
||||
part_id = _td_part_id(cols["Part"])
|
||||
part_name = part_id_name_map[part_id]
|
||||
hit_data[part_name]["_stagger"] = int(cols["Stagger"].text)
|
||||
part_break = cols["Break"].text or ""
|
||||
part_sever = cols["Sever"].text or ""
|
||||
part_break = part_break.strip()
|
||||
part_sever = part_sever.strip()
|
||||
hit_data[part_name]["_break"] = _td_part_break(cols["Break"])
|
||||
hit_data[part_name]["_sever"] = _td_part_sever(cols["Sever"])
|
||||
if part_break or part_sever:
|
||||
breaks.append(part_name)
|
||||
|
||||
hit_data["_breaks"] = breaks
|
||||
return hit_data
|
||||
|
||||
|
||||
def pp(name, e):
|
||||
if isinstance(e, list):
|
||||
for i, ei in enumerate(e):
|
||||
pp(name + "[" + str(i) + "]", ei)
|
||||
else:
|
||||
print(name, e.tag)
|
||||
print(lxml.etree.tostring(e, pretty_print=True))
|
||||
|
||||
|
||||
def get_monster_list():
|
||||
result = requests.get("https://mhrise.mhrice.info/monster.html")
|
||||
root = lxml.etree.HTML(result.content)
|
||||
monster_li = root.xpath('//ul[@id="slist-monster"]//li')
|
||||
monsters = []
|
||||
for li in monster_li:
|
||||
name = li.xpath('.//span[@lang="en"]/span')[0].text
|
||||
link = li.xpath('a')[0].attrib['href']
|
||||
monsters.append(dict(name=name, link=link))
|
||||
return monsters
|
||||
|
||||
|
||||
def _main():
|
||||
outdir = sys.argv[1]
|
||||
monster_list = get_monster_list()
|
||||
with open(os.path.join(outdir, "monster_list.json"), "w") as f:
|
||||
json.dump(monster_list, f, indent=2)
|
||||
|
||||
monster_hitboxes = {}
|
||||
for m in monster_list:
|
||||
print(m["name"])
|
||||
try:
|
||||
monster_hitboxes[m["name"]] = get_monster_data(m["link"])
|
||||
except Exception as e:
|
||||
print("ERR: failed to parse hitzones for ", m["name"])
|
||||
print(repr(e), str(e))
|
||||
time.sleep(0.5)
|
||||
|
||||
with open(os.path.join(outdir, "monster_hitboxes.json"), "w") as f:
|
||||
json.dump(monster_hitboxes, f, indent=2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_main()
|
||||
352
scrapers/mhrice_weapons.py
Executable file
352
scrapers/mhrice_weapons.py
Executable file
@@ -0,0 +1,352 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import os.path
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
from pprint import pprint
|
||||
from collections import defaultdict
|
||||
import lxml.etree
|
||||
|
||||
import requests
|
||||
|
||||
import _pathfix
|
||||
|
||||
from mhapi.util import WEAPON_TYPES
|
||||
|
||||
MAX_PER_TYPE = 100000
|
||||
|
||||
def pp(name, e):
|
||||
if isinstance(e, list):
|
||||
for i, ei in enumerate(e):
|
||||
pp(name + "[" + str(i) + "]", ei)
|
||||
else:
|
||||
print(name, e.tag)
|
||||
print(lxml.etree.tostring(e, pretty_print=True))
|
||||
|
||||
|
||||
def parse_sharpness(value_span):
|
||||
bar_span = value_span.xpath('.//span[@class="mh-sharpness-bar"]')[0]
|
||||
sharp_spans = bar_span.xpath('.//span')
|
||||
i = 0
|
||||
last_color_num = -1
|
||||
values = []
|
||||
values_plus = []
|
||||
for sharp_span in sharp_spans:
|
||||
# <span class="mh-sharpness mh-sharpness-color-0" style="left:0%;width:47.5%;"></span>
|
||||
attr_style = sharp_span.attrib["style"]
|
||||
attr_class = sharp_span.attrib["class"]
|
||||
classes = attr_class.split()
|
||||
half = False
|
||||
for class_name in classes:
|
||||
if class_name.startswith("mh-sharpness-color-"):
|
||||
color_num = int(class_name[-1])
|
||||
if class_name == "mh-sharpness-half":
|
||||
half = True
|
||||
styles = attr_style.split(";")
|
||||
for s in styles:
|
||||
s = s.strip()
|
||||
if not s:
|
||||
continue
|
||||
parts = s.split(":")
|
||||
if parts[0] == "width":
|
||||
value = int(2*float(parts[1].rstrip("%")))
|
||||
break
|
||||
if value == 0:
|
||||
continue
|
||||
if half:
|
||||
if not values_plus:
|
||||
values_plus = list(values)
|
||||
if color_num == last_color_num:
|
||||
values_plus[-1] += value
|
||||
else:
|
||||
values_plus.append(value)
|
||||
else:
|
||||
# fill in missing colors, if any
|
||||
while i < color_num:
|
||||
values.append(0)
|
||||
i += 1
|
||||
values.append(value)
|
||||
i += 1
|
||||
last_color_num = color_num
|
||||
return values, values_plus
|
||||
|
||||
|
||||
def _map_element(e):
|
||||
if e == "Bomb":
|
||||
return "Blast"
|
||||
if e == "Paralyze":
|
||||
return "Paralysis"
|
||||
return e
|
||||
|
||||
|
||||
def get_weapon_details(wtype, name, link):
|
||||
data = dict(wtype=wtype, name=name)
|
||||
url = "https://mhrise.mhrice.info" + link
|
||||
result = requests.get(url)
|
||||
root = lxml.etree.HTML(result.content)
|
||||
|
||||
icon_div = root.xpath('//div[@class="mh-title-icon"]/div[@class="mh-colored-icon"]/div')[0]
|
||||
rarity_class = icon_div.attrib["class"]
|
||||
data["rarity"] = int(rarity_class.split("-")[-1])
|
||||
|
||||
stat_div = root.xpath('//div[@class="mh-kvlist"]')[0]
|
||||
kvlist = stat_div.xpath('.//p[@class="mh-kv"]')
|
||||
for kv in kvlist:
|
||||
spans = kv.xpath('span')
|
||||
key = spans[0].text.strip().lower()
|
||||
if key in set(["attack", "affinity", "defense"]):
|
||||
value = spans[1].text
|
||||
value = value.rstrip("%")
|
||||
data[key.lower()] = int(value)
|
||||
elif key == "element":
|
||||
value_spans = spans[1].xpath("span")
|
||||
value = value_spans[0].text.strip()
|
||||
if value:
|
||||
parts = value.split()
|
||||
if parts[0] == "None":
|
||||
data["element"] = None
|
||||
data["element_attack"] = None
|
||||
else:
|
||||
data["element"] = _map_element(parts[0])
|
||||
data["element_attack"] = int(parts[1])
|
||||
if len(value_spans) > 1:
|
||||
value = value_spans[1].text.strip()
|
||||
parts = value.split()
|
||||
data["element_2"] = _map_element(parts[0])
|
||||
data["element_2_attack"] = int(parts[1])
|
||||
else:
|
||||
data["element_2"] = None
|
||||
data["element_2_attack"] = None
|
||||
elif key == "slot":
|
||||
# <img alt="A level-2 slot" class="mh-slot" src="/resources/slot_1.png">
|
||||
# <img alt="A level-4 slot" class="mh-slot-large" src="/resources/slot_3.png">
|
||||
slots = []
|
||||
value_span = spans[1]
|
||||
slot_imgs = value_span.xpath('.//span[@class="mh-slot-outer"]/img')
|
||||
for slot_img in slot_imgs:
|
||||
src = slot_img.attrib["src"]
|
||||
m = re.match(r".*/slot_(\d+)\.png", src)
|
||||
if m:
|
||||
svalue = int(m.group(1)) + 1
|
||||
slots.append(svalue)
|
||||
data["slots"] = slots
|
||||
elif key == "rampage slot":
|
||||
slots = []
|
||||
value_span = spans[1]
|
||||
slot_imgs = value_span.xpath('.//span[@class="mh-slot-outer"]/img')
|
||||
for slot_img in slot_imgs:
|
||||
src = slot_img.attrib["src"]
|
||||
m = re.match(r".*/slot_(\d+).png", src)
|
||||
if m:
|
||||
svalue = int(m.group(1)) + 1
|
||||
slots.append(svalue)
|
||||
data["rampage_slots"] = slots
|
||||
elif key == "sharpness":
|
||||
value_span = spans[1]
|
||||
sharp, sharp_plus = parse_sharpness(value_span)
|
||||
data["sharpness"] = sharp
|
||||
data["sharpness_plus"] = sharp_plus
|
||||
elif key == "bottle":
|
||||
value = spans[1].text.strip()
|
||||
if wtype == "Charge Blade":
|
||||
key = "phial"
|
||||
if value == "Power":
|
||||
value = "Impact"
|
||||
if value == "StrongElement":
|
||||
value = "Element"
|
||||
if wtype == "Switch Axe":
|
||||
key = "phial"
|
||||
parts = value.split()
|
||||
value = parts[0]
|
||||
if value == "StrongElement":
|
||||
value = "Element"
|
||||
if value == "DownStamina":
|
||||
value = "Exhaust"
|
||||
phial_num = int(parts[1])
|
||||
if phial_num > 0:
|
||||
data["phial_value"] = phial_num
|
||||
data[key] = value
|
||||
elif key == "type":
|
||||
value = spans[1].text.strip()
|
||||
parts = value.split()
|
||||
value = parts[0]
|
||||
if len(parts) > 1:
|
||||
level = int(parts[1])
|
||||
data["shelling_level"] = level
|
||||
if wtype == "Gunlance":
|
||||
key = "shelling_type"
|
||||
if value == "Radial":
|
||||
value = "Long"
|
||||
elif value == "Diffusion":
|
||||
value = "Wide"
|
||||
data[key] = value
|
||||
elif key == "insect level":
|
||||
value = spans[1].text.strip()
|
||||
data["bug_level"] = int(value)
|
||||
|
||||
sections = root.xpath("//section")
|
||||
craft_table = None
|
||||
for section in sections:
|
||||
h2 = section.xpath("h2/text()")
|
||||
if h2 and h2[0] == "Crafting":
|
||||
craft_table = section.xpath("div/table/tbody")[0]
|
||||
break
|
||||
if craft_table is not None:
|
||||
rows = craft_table.xpath("tr")
|
||||
for row in rows:
|
||||
cells = row.findall("td")
|
||||
craft_type = cells[0].text.strip()
|
||||
if craft_type.startswith("Forge"):
|
||||
zenny, comps = get_components(cells)
|
||||
data["creation_cost"] = zenny
|
||||
data["create_components"] = comps
|
||||
elif craft_type.startswith("Upgrade"):
|
||||
zenny, comps = get_components(cells)
|
||||
data["upgrade_cost"] = zenny
|
||||
data["upgrade_components"] = comps
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def get_components(cells):
|
||||
zenny = int(cells[1].text)
|
||||
cmat_text = cells[2].text
|
||||
components = {}
|
||||
if cmat_text != "-":
|
||||
cmat_name = cells[2].xpath('.//span[@lang="en"]/span')[0].text
|
||||
cmat_points_string = cells[2].xpath("span")[0].tail
|
||||
cmat_points = int(cmat_points_string.split(" ")[0])
|
||||
components[cmat_name] = cmat_points
|
||||
li_mats = cells[3].xpath("ul/li")
|
||||
for li in li_mats:
|
||||
count = int(li.text.strip().rstrip("x"))
|
||||
name = li.xpath('.//span[@lang="en"]/span')[0].text
|
||||
components[name] = count
|
||||
return (zenny, components)
|
||||
|
||||
|
||||
def get_rice_id(link):
|
||||
# /weapon/GreatSword_026.html
|
||||
fname_base, _ = os.path.splitext(os.path.basename(link))
|
||||
_, tail = fname_base.rsplit("_", maxsplit=1)
|
||||
return int(tail)
|
||||
|
||||
|
||||
def get_weapon_list(wtype, id_offset):
|
||||
if wtype == "Sword and Shield":
|
||||
ftype = "short_sword"
|
||||
elif wtype == "Hunting Horn":
|
||||
ftype = "horn"
|
||||
elif wtype == "Gunlance":
|
||||
ftype = "gun_lance"
|
||||
elif wtype == "Switch Axe":
|
||||
ftype = "slash_axe"
|
||||
elif wtype == "Charge Blade":
|
||||
ftype = "charge_axe"
|
||||
else:
|
||||
ftype = wtype.lower().replace(" ", "_")
|
||||
list_fname = ftype + ".html"
|
||||
result = requests.get("https://mhrise.mhrice.info/weapon/" + list_fname)
|
||||
root = lxml.etree.HTML(result.content)
|
||||
weapon_tree_li = root.xpath('//div[@class="mh-weapon-tree"]//li')
|
||||
weapons = []
|
||||
seen = set()
|
||||
for li in weapon_tree_li:
|
||||
listack = [li]
|
||||
name_stack = [None]
|
||||
while listack:
|
||||
current_li = listack.pop()
|
||||
parent_name = name_stack.pop()
|
||||
|
||||
a = current_li.xpath('a[@class="mh-icon-text"]')[0]
|
||||
sublists = current_li.xpath('ul/li')
|
||||
|
||||
name = a.xpath('.//span[@lang="en"]/span')[0].text
|
||||
link = a.attrib['href']
|
||||
|
||||
name_stack.extend([name] * len(sublists))
|
||||
listack.extend(sublists)
|
||||
|
||||
if link in seen:
|
||||
print("WARN: Duplicate ", name, link)
|
||||
continue
|
||||
seen.add(link)
|
||||
|
||||
id_ = get_rice_id(link) + id_offset
|
||||
final = (len(sublists) == 0)
|
||||
wdata = dict(name=name, link=link, _id=id_, parent_name=parent_name, final=final)
|
||||
weapons.append(wdata)
|
||||
|
||||
return weapons
|
||||
|
||||
|
||||
def test_details():
|
||||
tests = [
|
||||
("Great Sword", "Sinister Shadowblade+", "/weapon/GreatSword_403.html"),
|
||||
("Great Sword", "Redwing Claymore I", "/weapon/GreatSword_068.html"),
|
||||
("Great Sword", "Defender Great Sword I", "/weapon/GreatSword_132.html"),
|
||||
("Great Sword", "Kamura Warrior Cleaver", "/weapon/GreatSword_300.html"),
|
||||
("Dual Blades", "Blood Wind Skards+", "/weapon/DualBlades_319.html"),
|
||||
("Switch Axe", "Arzuros Jubilax", "/weapon/SlashAxe_323.html"),
|
||||
("Switch Axe", "Leave-Taker+", "/weapon/SlashAxe_307.html"),
|
||||
("Insect Glaive", "Fine Kamura Glaive", "/weapon/InsectGlaive_302.html"),
|
||||
]
|
||||
for t in tests:
|
||||
print(t)
|
||||
d = get_weapon_details(*t)
|
||||
pprint(d)
|
||||
print()
|
||||
|
||||
|
||||
def _main():
|
||||
weapons_type_name_map = defaultdict(dict)
|
||||
weapons_data = []
|
||||
|
||||
outdir = sys.argv[1]
|
||||
outfile = os.path.join(outdir, "weapon_list.json")
|
||||
if os.path.exists(outfile):
|
||||
print("Loading existing data from ", outfile)
|
||||
with open(outfile) as f:
|
||||
old_data = json.load(f)
|
||||
for d in old_data:
|
||||
wtype_name_map = weapons_type_name_map[d["wtype"]]
|
||||
if d["name"] in wtype_name_map:
|
||||
print("Removing duplicate ", d["wtype"], d["name"])
|
||||
continue
|
||||
wtype_name_map[d["name"]] = d
|
||||
|
||||
for itype, wtype in enumerate(WEAPON_TYPES):
|
||||
wtype_name_map = weapons_type_name_map[wtype]
|
||||
weapons = get_weapon_list(wtype, (itype+1) * MAX_PER_TYPE)
|
||||
if not weapons:
|
||||
print("WARN: no weapons of type", wtype)
|
||||
continue
|
||||
name_id_map = {}
|
||||
for w in weapons:
|
||||
# always re-calculate IDs
|
||||
name_id_map[w["name"]] = w["_id"]
|
||||
if w["parent_name"]:
|
||||
w["parent_id"] = name_id_map[w["parent_name"]]
|
||||
else:
|
||||
w["parent_id"] = None
|
||||
data = wtype_name_map.get(w["name"])
|
||||
if data is not None:
|
||||
print("UP ", wtype, w["_id"], w["name"], w["link"])
|
||||
data.update(w)
|
||||
weapons_data.append(data)
|
||||
continue
|
||||
print("ADD", wtype, w["_id"], w["name"], w["link"])
|
||||
data = get_weapon_details(wtype, w["name"], w["link"])
|
||||
data.update(w)
|
||||
weapons_data.append(data)
|
||||
time.sleep(0.5)
|
||||
|
||||
with open(os.path.join(outdir, "weapon_list.json"), "w") as f:
|
||||
json.dump(weapons_data, f, indent=2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
#test_details()
|
||||
_main()
|
||||
Reference in New Issue
Block a user