4u stars filter, static damage gen, rise updates

This commit is contained in:
Bryce Allen
2022-07-31 12:12:03 -04:00
parent 6b57d498b6
commit 228c594ca9
27 changed files with 161945 additions and 109 deletions

268
scrapers/fextralife-weapons.py Executable file
View File

@@ -0,0 +1,268 @@
#!/usr/bin/env python3
import os.path
import sys
import re
import json
import lxml.etree
import requests
#WTYPES = ["Great Sword", "Long Sword", "Sword and Shield", "Dual Blades", "Lance", "Gunlance", "Hammer"]
WTYPES = ["Great Sword", "Lance", "Hammer"]
WIDTH_RE = re.compile(r'width: *(\d+)%;')
PART_RE = re.compile(r'(.*) x(\d+)( Points)?')
# MR Bone 20 pts.
PART_RE_MR = re.compile(r'(.*) (\d+) +pts\.?')
"""
<div class="progress" style="max-width: 100%; min-width: 100px;">
<div class="progress-bar danger-color-dark" style="width: 11%;">
&nbsp;
</div>
<div class="progress-bar warning-color-dark" style="width: 20%;">
&nbsp;
</div>
<div class="progress-bar warning-color" style="width: 12%;">
&nbsp;
</div>
<div class="progress-bar success-color" style="width: 0%;">
&nbsp;
</div>
<div class="progress-bar primary-color-dark" style="width: 0%;">
&nbsp;
</div>
<div class="progress-bar white" style="width: 0%;">
&nbsp;
</div>
</div>
"""
def parse_sharpness(div):
values = []
divs = div.xpath('div')
for div in divs:
style = div.get("style")
m = WIDTH_RE.match(style)
if m:
values.append(int(m.group(1)))
return values
def parse_rampage(td):
return td.xpath('ul/li/a/text()')
def parse_crafting(td):
materials = {}
for li in td.xpath('ul/li'):
atext = li.xpath('a/text()')
litext = li.xpath('text()')
if litext:
litext = litext[0].strip()
else:
print("Unknown format: ", lxml.etree.tostring(td))
return {}
if litext.endswith('\xa0'):
litext = litext.rstrip('\xa0')
if litext.endswith('.'):
litext = litext.rstrip('.')
if litext.endswith('l'):
litext = litext[:-1] + '1'
if litext.startswith('+ '):
atext += '+'
litext = litext[2:]
if litext.startswith('x'):
litext = litext[1:]
if atext:
atext = atext[0].strip()
if litext.endswith(" Points"):
litext = litext.rstrip(" Points")
atext += " Points"
#print("atext '" + atext + "' '" + litext + "'")
try:
materials[atext] = clean_int(litext)
except Exception as e:
print("WARN: failed parsing ", atext, litext)
if litext == 'l':
materials[atext] = 1
elif litext.isdigit():
materials['zenny'] = clean_int(litext)
else:
m = PART_RE.match(litext)
if not m:
m = PART_RE_MR.match(litext)
if m:
materials[m.group(1) + ' Points'] = int(m.group(2))
elif m.group(2):
materials[m.group(1) + ' Points'] = int(m.group(2))
else:
materials[m.group(1)] = int(m.group(2))
return materials
def clean_text(t):
t = t.strip()
t = t.rstrip('\xa0')
return t
def clean_int(s):
s = clean_text(s)
if not s:
return 0
return int(s)
def parse_element(td):
#pp("td", td)
etype = td.xpath('a/text()')
if etype:
values = td.xpath('./text()')
if values:
value = clean_int(values[0].strip())
return dict(type=etype[0], attack=value)
return dict(type=None, attack=None)
def parse_rarity(td):
text = td.xpath('.//text()')
if text:
parts = text[0].split()
if len(parts) > 1:
return clean_int(text[0].split()[1])
return 8
def parse_slots(td):
slots = []
for img in td.xpath('.//img'):
title = img.get("title")
if title and title.startswith('gem_'):
parts = title.split("_")
level = int(parts[2])
slots.append(level)
return slots
def adjust_slots_rampage(data):
if data['rarity'] >= 8:
data['rampage_slot'] = data['slots'][-1]
data['slots'] = data['slots'][:-1]
else:
data['rampage_slot'] = 0
def gl_parse_tr(tr):
data = {}
cells = tr.xpath('td')
#print(lxml.etree.tostring(cells[9]))
# Name
name = cells[0]
#print(name)
data['name'] = name.xpath('a/text()')[0]
data['slots'] = parse_slots(name)
data['sharpness'] = parse_sharpness(name.xpath('div')[0])
data['attack'] = clean_int(cells[1].text)
element = parse_element(cells[2])
data['element'] = element['type']
data['element_attack'] = element['attack']
data['element_2'] = None
data['element_2_attack'] = None
data['affinity'] = clean_int(cells[3].text.rstrip('%'))
data['defense'] = clean_int(cells[4].text)
data['shot_type'] = cells[5].text
data['level'] = clean_int(cells[6].text.split()[1])
data['rarity'] = parse_rarity(cells[7])
data['rampage_skills'] = parse_rampage(cells[8])
data['crafting'] = parse_crafting(cells[9])
adjust_slots_rampage(data)
return data
def default_parse_tr(tr):
data = {}
cells = tr.xpath('td')
#print(lxml.etree.tostring(cells[9]))
if len(cells) == 10:
return gl_parse_tr(tr)
#print("cels", [c.text for c in cells])
# Name
name = cells[0]
data['name'] = name.xpath('a/text()')[0]
data['slots'] = parse_slots(name)
data['sharpness'] = parse_sharpness(name.xpath('div')[0])
data['attack'] = clean_int(cells[1].text)
element = parse_element(cells[2])
data['element'] = element['type']
data['element_attack'] = element['attack']
data['element_2'] = None
data['element_2_attack'] = None
data['affinity'] = clean_int(cells[3].text.rstrip('%'))
data['defense'] = clean_int(cells[4].text)
data['rarity'] = parse_rarity(cells[5])
data['rampage_skills'] = parse_rampage(cells[6])
data['crafting'] = parse_crafting(cells[7])
adjust_slots_rampage(data)
return data
def parse_fextralife_weapons(text):
root = lxml.etree.HTML(text)
weapons = []
table = root.xpath('//div[@id="wiki-content-block"]//table')[0]
rows = table.xpath('tbody/tr')
#print("nrows", len(rows))
for tr in rows:
data = default_parse_tr(tr)
weapons.append(data)
return weapons
def pp(name, e):
if isinstance(e, list):
for i, ei in enumerate(e):
pp(name + "[" + str(i) + "]", ei)
else:
print(name, e.tag)
print(lxml.etree.tostring(e, pretty_print=True))
def _main():
indir = sys.argv[1]
outpath = sys.argv[2]
weapon_list_all = []
for wtype in WTYPES:
print(wtype)
fpath = os.path.join(indir, wtype + ".html")
with open(fpath) as f:
text = f.read()
weapon_list = parse_fextralife_weapons(text)
for w in weapon_list:
w["wtype"] = wtype
weapon_list_all.extend(weapon_list)
with open(outpath, "w") as f:
json.dump(weapon_list_all, f, indent=2)
if __name__ == '__main__':
_main()

159
scrapers/mhrice_monsters.py Executable file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
import sys
import os.path
import time
import re
import json
import lxml.etree
import requests
PART_HEADER_MAP = dict(Slash="Cut",
Impact="Impact",
Shot="Shot",
Fire="Fire",
Water="Water",
Ice="Ice",
Thunder="Thunder",
Dragon="Dragon")
def _td_part_id(td):
s = td.xpath('.//text()')[0].strip()
if s.startswith("["):
s = s[1:2]
return int(s)
def _td_part_break(td):
text = td.text or ""
text = text.strip()
if text:
m = re.match(r"\(x(\d+)\) (\d+)", text)
print(text, m, m.group(1), m.group(2))
return dict(count=int(m.group(1)), damage=int(m.group(2)))
return dict(count=0, damage=0)
def _td_part_sever(td):
text = td.text or ""
text = text.strip()
if text:
m = re.match(r"\((\w+)\) (\d+)", text)
return dict(type=m.group(1), damage=int(m.group(2)))
return dict(type="", damage=0)
def get_monster_data(link):
hit_data = {}
base = "https://mhrise.mhrice.info"
url = base + link
result = requests.get(url)
root = lxml.etree.HTML(result.content)
sections = root.xpath("//section")
hit_table = None
parts_table = None
for section in sections:
h2 = section.xpath('h2')
if h2 and h2[0].text:
if hit_table is None and h2[0].text.lower().startswith("hitzone"):
hit_table = section.xpath('.//table')[0]
elif parts_table is None and h2[0].text.lower().startswith("parts"):
parts_table = section.xpath('.//table')[0]
#pp("hit_table", hit_table)
#pp("tr", hit_table.xpath('thead/tr'))
header_cells = hit_table.xpath('thead/tr/th')
header_names = [th.text for th in header_cells]
#print("names", header_names)
rows = hit_table.xpath('tbody/tr')
part_id_name_map = {}
for row in rows:
if 'invalid' in row.attrib.get('class', ""):
continue
#pp("tr", row)
cols = dict(zip(header_names, row.xpath('td')))
name_td = cols["Name"]
#pp("name_td", name_td)
name_en_span = name_td.xpath('.//span[@lang="en"]/span')
if not name_en_span:
continue
name = name_en_span[0].text
#pp("part", cols["Part"].xpath('.//text()'))
part_id = _td_part_id(cols["Part"])
part_id_name_map[part_id] = name
hit_data[name] = {}
for k in PART_HEADER_MAP.keys():
hit_data[name][PART_HEADER_MAP[k]] = int(cols[k].text)
#print(hit_data)
return hit_data
# add break/sever data
header_cells = parts_table.xpath('thead/tr/th')
header_names = [th.text for th in header_cells]
#print(header_names)
rows = parts_table.xpath('tbody/tr')
breaks = []
for row in rows:
if 'invalid' in row.attrib.get('class', ""):
continue
cols = dict(zip(header_names, row.xpath('td')))
part_id = _td_part_id(cols["Part"])
part_name = part_id_name_map[part_id]
hit_data[part_name]["_stagger"] = int(cols["Stagger"].text)
part_break = cols["Break"].text or ""
part_sever = cols["Sever"].text or ""
part_break = part_break.strip()
part_sever = part_sever.strip()
hit_data[part_name]["_break"] = _td_part_break(cols["Break"])
hit_data[part_name]["_sever"] = _td_part_sever(cols["Sever"])
if part_break or part_sever:
breaks.append(part_name)
hit_data["_breaks"] = breaks
return hit_data
def pp(name, e):
if isinstance(e, list):
for i, ei in enumerate(e):
pp(name + "[" + str(i) + "]", ei)
else:
print(name, e.tag)
print(lxml.etree.tostring(e, pretty_print=True))
def get_monster_list():
result = requests.get("https://mhrise.mhrice.info/monster.html")
root = lxml.etree.HTML(result.content)
monster_li = root.xpath('//ul[@id="slist-monster"]//li')
monsters = []
for li in monster_li:
name = li.xpath('.//span[@lang="en"]/span')[0].text
link = li.xpath('a')[0].attrib['href']
monsters.append(dict(name=name, link=link))
return monsters
def _main():
outdir = sys.argv[1]
monster_list = get_monster_list()
with open(os.path.join(outdir, "monster_list.json"), "w") as f:
json.dump(monster_list, f, indent=2)
monster_hitboxes = {}
for m in monster_list:
print(m["name"])
try:
monster_hitboxes[m["name"]] = get_monster_data(m["link"])
except Exception as e:
print("ERR: failed to parse hitzones for ", m["name"])
print(repr(e), str(e))
time.sleep(0.5)
with open(os.path.join(outdir, "monster_hitboxes.json"), "w") as f:
json.dump(monster_hitboxes, f, indent=2)
if __name__ == '__main__':
_main()

352
scrapers/mhrice_weapons.py Executable file
View File

@@ -0,0 +1,352 @@
#!/usr/bin/env python3
import sys
import os.path
import time
import re
import json
from pprint import pprint
from collections import defaultdict
import lxml.etree
import requests
import _pathfix
from mhapi.util import WEAPON_TYPES
MAX_PER_TYPE = 100000
def pp(name, e):
if isinstance(e, list):
for i, ei in enumerate(e):
pp(name + "[" + str(i) + "]", ei)
else:
print(name, e.tag)
print(lxml.etree.tostring(e, pretty_print=True))
def parse_sharpness(value_span):
bar_span = value_span.xpath('.//span[@class="mh-sharpness-bar"]')[0]
sharp_spans = bar_span.xpath('.//span')
i = 0
last_color_num = -1
values = []
values_plus = []
for sharp_span in sharp_spans:
# <span class="mh-sharpness mh-sharpness-color-0" style="left:0%;width:47.5%;"></span>
attr_style = sharp_span.attrib["style"]
attr_class = sharp_span.attrib["class"]
classes = attr_class.split()
half = False
for class_name in classes:
if class_name.startswith("mh-sharpness-color-"):
color_num = int(class_name[-1])
if class_name == "mh-sharpness-half":
half = True
styles = attr_style.split(";")
for s in styles:
s = s.strip()
if not s:
continue
parts = s.split(":")
if parts[0] == "width":
value = int(2*float(parts[1].rstrip("%")))
break
if value == 0:
continue
if half:
if not values_plus:
values_plus = list(values)
if color_num == last_color_num:
values_plus[-1] += value
else:
values_plus.append(value)
else:
# fill in missing colors, if any
while i < color_num:
values.append(0)
i += 1
values.append(value)
i += 1
last_color_num = color_num
return values, values_plus
def _map_element(e):
if e == "Bomb":
return "Blast"
if e == "Paralyze":
return "Paralysis"
return e
def get_weapon_details(wtype, name, link):
data = dict(wtype=wtype, name=name)
url = "https://mhrise.mhrice.info" + link
result = requests.get(url)
root = lxml.etree.HTML(result.content)
icon_div = root.xpath('//div[@class="mh-title-icon"]/div[@class="mh-colored-icon"]/div')[0]
rarity_class = icon_div.attrib["class"]
data["rarity"] = int(rarity_class.split("-")[-1])
stat_div = root.xpath('//div[@class="mh-kvlist"]')[0]
kvlist = stat_div.xpath('.//p[@class="mh-kv"]')
for kv in kvlist:
spans = kv.xpath('span')
key = spans[0].text.strip().lower()
if key in set(["attack", "affinity", "defense"]):
value = spans[1].text
value = value.rstrip("%")
data[key.lower()] = int(value)
elif key == "element":
value_spans = spans[1].xpath("span")
value = value_spans[0].text.strip()
if value:
parts = value.split()
if parts[0] == "None":
data["element"] = None
data["element_attack"] = None
else:
data["element"] = _map_element(parts[0])
data["element_attack"] = int(parts[1])
if len(value_spans) > 1:
value = value_spans[1].text.strip()
parts = value.split()
data["element_2"] = _map_element(parts[0])
data["element_2_attack"] = int(parts[1])
else:
data["element_2"] = None
data["element_2_attack"] = None
elif key == "slot":
# <img alt="A level-2 slot" class="mh-slot" src="/resources/slot_1.png">
# <img alt="A level-4 slot" class="mh-slot-large" src="/resources/slot_3.png">
slots = []
value_span = spans[1]
slot_imgs = value_span.xpath('.//span[@class="mh-slot-outer"]/img')
for slot_img in slot_imgs:
src = slot_img.attrib["src"]
m = re.match(r".*/slot_(\d+)\.png", src)
if m:
svalue = int(m.group(1)) + 1
slots.append(svalue)
data["slots"] = slots
elif key == "rampage slot":
slots = []
value_span = spans[1]
slot_imgs = value_span.xpath('.//span[@class="mh-slot-outer"]/img')
for slot_img in slot_imgs:
src = slot_img.attrib["src"]
m = re.match(r".*/slot_(\d+).png", src)
if m:
svalue = int(m.group(1)) + 1
slots.append(svalue)
data["rampage_slots"] = slots
elif key == "sharpness":
value_span = spans[1]
sharp, sharp_plus = parse_sharpness(value_span)
data["sharpness"] = sharp
data["sharpness_plus"] = sharp_plus
elif key == "bottle":
value = spans[1].text.strip()
if wtype == "Charge Blade":
key = "phial"
if value == "Power":
value = "Impact"
if value == "StrongElement":
value = "Element"
if wtype == "Switch Axe":
key = "phial"
parts = value.split()
value = parts[0]
if value == "StrongElement":
value = "Element"
if value == "DownStamina":
value = "Exhaust"
phial_num = int(parts[1])
if phial_num > 0:
data["phial_value"] = phial_num
data[key] = value
elif key == "type":
value = spans[1].text.strip()
parts = value.split()
value = parts[0]
if len(parts) > 1:
level = int(parts[1])
data["shelling_level"] = level
if wtype == "Gunlance":
key = "shelling_type"
if value == "Radial":
value = "Long"
elif value == "Diffusion":
value = "Wide"
data[key] = value
elif key == "insect level":
value = spans[1].text.strip()
data["bug_level"] = int(value)
sections = root.xpath("//section")
craft_table = None
for section in sections:
h2 = section.xpath("h2/text()")
if h2 and h2[0] == "Crafting":
craft_table = section.xpath("div/table/tbody")[0]
break
if craft_table is not None:
rows = craft_table.xpath("tr")
for row in rows:
cells = row.findall("td")
craft_type = cells[0].text.strip()
if craft_type.startswith("Forge"):
zenny, comps = get_components(cells)
data["creation_cost"] = zenny
data["create_components"] = comps
elif craft_type.startswith("Upgrade"):
zenny, comps = get_components(cells)
data["upgrade_cost"] = zenny
data["upgrade_components"] = comps
return data
def get_components(cells):
zenny = int(cells[1].text)
cmat_text = cells[2].text
components = {}
if cmat_text != "-":
cmat_name = cells[2].xpath('.//span[@lang="en"]/span')[0].text
cmat_points_string = cells[2].xpath("span")[0].tail
cmat_points = int(cmat_points_string.split(" ")[0])
components[cmat_name] = cmat_points
li_mats = cells[3].xpath("ul/li")
for li in li_mats:
count = int(li.text.strip().rstrip("x"))
name = li.xpath('.//span[@lang="en"]/span')[0].text
components[name] = count
return (zenny, components)
def get_rice_id(link):
# /weapon/GreatSword_026.html
fname_base, _ = os.path.splitext(os.path.basename(link))
_, tail = fname_base.rsplit("_", maxsplit=1)
return int(tail)
def get_weapon_list(wtype, id_offset):
if wtype == "Sword and Shield":
ftype = "short_sword"
elif wtype == "Hunting Horn":
ftype = "horn"
elif wtype == "Gunlance":
ftype = "gun_lance"
elif wtype == "Switch Axe":
ftype = "slash_axe"
elif wtype == "Charge Blade":
ftype = "charge_axe"
else:
ftype = wtype.lower().replace(" ", "_")
list_fname = ftype + ".html"
result = requests.get("https://mhrise.mhrice.info/weapon/" + list_fname)
root = lxml.etree.HTML(result.content)
weapon_tree_li = root.xpath('//div[@class="mh-weapon-tree"]//li')
weapons = []
seen = set()
for li in weapon_tree_li:
listack = [li]
name_stack = [None]
while listack:
current_li = listack.pop()
parent_name = name_stack.pop()
a = current_li.xpath('a[@class="mh-icon-text"]')[0]
sublists = current_li.xpath('ul/li')
name = a.xpath('.//span[@lang="en"]/span')[0].text
link = a.attrib['href']
name_stack.extend([name] * len(sublists))
listack.extend(sublists)
if link in seen:
print("WARN: Duplicate ", name, link)
continue
seen.add(link)
id_ = get_rice_id(link) + id_offset
final = (len(sublists) == 0)
wdata = dict(name=name, link=link, _id=id_, parent_name=parent_name, final=final)
weapons.append(wdata)
return weapons
def test_details():
tests = [
("Great Sword", "Sinister Shadowblade+", "/weapon/GreatSword_403.html"),
("Great Sword", "Redwing Claymore I", "/weapon/GreatSword_068.html"),
("Great Sword", "Defender Great Sword I", "/weapon/GreatSword_132.html"),
("Great Sword", "Kamura Warrior Cleaver", "/weapon/GreatSword_300.html"),
("Dual Blades", "Blood Wind Skards+", "/weapon/DualBlades_319.html"),
("Switch Axe", "Arzuros Jubilax", "/weapon/SlashAxe_323.html"),
("Switch Axe", "Leave-Taker+", "/weapon/SlashAxe_307.html"),
("Insect Glaive", "Fine Kamura Glaive", "/weapon/InsectGlaive_302.html"),
]
for t in tests:
print(t)
d = get_weapon_details(*t)
pprint(d)
print()
def _main():
weapons_type_name_map = defaultdict(dict)
weapons_data = []
outdir = sys.argv[1]
outfile = os.path.join(outdir, "weapon_list.json")
if os.path.exists(outfile):
print("Loading existing data from ", outfile)
with open(outfile) as f:
old_data = json.load(f)
for d in old_data:
wtype_name_map = weapons_type_name_map[d["wtype"]]
if d["name"] in wtype_name_map:
print("Removing duplicate ", d["wtype"], d["name"])
continue
wtype_name_map[d["name"]] = d
for itype, wtype in enumerate(WEAPON_TYPES):
wtype_name_map = weapons_type_name_map[wtype]
weapons = get_weapon_list(wtype, (itype+1) * MAX_PER_TYPE)
if not weapons:
print("WARN: no weapons of type", wtype)
continue
name_id_map = {}
for w in weapons:
# always re-calculate IDs
name_id_map[w["name"]] = w["_id"]
if w["parent_name"]:
w["parent_id"] = name_id_map[w["parent_name"]]
else:
w["parent_id"] = None
data = wtype_name_map.get(w["name"])
if data is not None:
print("UP ", wtype, w["_id"], w["name"], w["link"])
data.update(w)
weapons_data.append(data)
continue
print("ADD", wtype, w["_id"], w["name"], w["link"])
data = get_weapon_details(wtype, w["name"], w["link"])
data.update(w)
weapons_data.append(data)
time.sleep(0.5)
with open(os.path.join(outdir, "weapon_list.json"), "w") as f:
json.dump(weapons_data, f, indent=2)
if __name__ == '__main__':
#test_details()
_main()