#!/usr/bin/env python3 import sys import os.path import time import re import json from pprint import pprint from collections import defaultdict import lxml.etree import requests import _pathfix from mhapi.util import WEAPON_TYPES MAX_PER_TYPE = 100000 def pp(name, e): if isinstance(e, list): for i, ei in enumerate(e): pp(name + "[" + str(i) + "]", ei) else: print(name, e.tag) print(lxml.etree.tostring(e, pretty_print=True)) def parse_sharpness(value_span): bar_span = value_span.xpath('.//span[@class="mh-sharpness-bar"]')[0] sharp_spans = bar_span.xpath('.//span') i = 0 last_color_num = -1 values = [] values_plus = [] for sharp_span in sharp_spans: # attr_style = sharp_span.attrib["style"] attr_class = sharp_span.attrib["class"] classes = attr_class.split() half = False for class_name in classes: if class_name.startswith("mh-sharpness-color-"): color_num = int(class_name[-1]) if class_name == "mh-sharpness-half": half = True styles = attr_style.split(";") for s in styles: s = s.strip() if not s: continue parts = s.split(":") if parts[0] == "width": value = int(2*float(parts[1].rstrip("%"))) break if value == 0: continue if half: if not values_plus: values_plus = list(values) if color_num == last_color_num: values_plus[-1] += value else: values_plus.append(value) else: # fill in missing colors, if any while i < color_num: values.append(0) i += 1 values.append(value) i += 1 last_color_num = color_num return values, values_plus def _map_element(e): if e == "Bomb": return "Blast" if e == "Paralyze": return "Paralysis" return e def get_weapon_details(wtype, name, link): data = dict(wtype=wtype, name=name) url = "https://mhrise.mhrice.info" + link result = requests.get(url) root = lxml.etree.HTML(result.content) icon_div = root.xpath('//div[@class="mh-title-icon"]/div[@class="mh-colored-icon"]/div')[0] rarity_class = icon_div.attrib["class"] data["rarity"] = int(rarity_class.split("-")[-1]) stat_div = root.xpath('//div[@class="mh-kvlist"]')[0] kvlist = stat_div.xpath('.//p[@class="mh-kv"]') for kv in kvlist: spans = kv.xpath('span') key = spans[0].text.strip().lower() if key in set(["attack", "affinity", "defense"]): value = spans[1].text value = value.rstrip("%") data[key.lower()] = int(value) elif key == "element": value_spans = spans[1].xpath("span") value = value_spans[0].text.strip() if value: parts = value.split() if parts[0] == "None": data["element"] = None data["element_attack"] = None else: data["element"] = _map_element(parts[0]) data["element_attack"] = int(parts[1]) if len(value_spans) > 1: value = value_spans[1].text.strip() parts = value.split() data["element_2"] = _map_element(parts[0]) data["element_2_attack"] = int(parts[1]) else: data["element_2"] = None data["element_2_attack"] = None elif key == "slot": # A level-2 slot # A level-4 slot slots = [] value_span = spans[1] slot_imgs = value_span.xpath('.//span[@class="mh-slot-outer"]/img') for slot_img in slot_imgs: src = slot_img.attrib["src"] m = re.match(r".*/slot_(\d+)\.png", src) if m: svalue = int(m.group(1)) + 1 slots.append(svalue) data["slots"] = slots elif key == "rampage slot": slots = [] value_span = spans[1] slot_imgs = value_span.xpath('.//span[@class="mh-slot-outer"]/img') for slot_img in slot_imgs: src = slot_img.attrib["src"] m = re.match(r".*/slot_(\d+).png", src) if m: svalue = int(m.group(1)) + 1 slots.append(svalue) data["rampage_slots"] = slots elif key == "sharpness": value_span = spans[1] sharp, sharp_plus = parse_sharpness(value_span) data["sharpness"] = sharp data["sharpness_plus"] = sharp_plus elif key == "bottle": value = spans[1].text.strip() if wtype == "Charge Blade": key = "phial" if value == "Power": value = "Impact" if value == "StrongElement": value = "Element" if wtype == "Switch Axe": key = "phial" parts = value.split() value = parts[0] if value == "StrongElement": value = "Element" if value == "DownStamina": value = "Exhaust" phial_num = int(parts[1]) if phial_num > 0: data["phial_value"] = phial_num data[key] = value elif key == "type": value = spans[1].text.strip() parts = value.split() value = parts[0] if len(parts) > 1: level = int(parts[1]) data["shelling_level"] = level if wtype == "Gunlance": key = "shelling_type" if value == "Radial": value = "Long" elif value == "Diffusion": value = "Wide" data[key] = value elif key == "insect level": value = spans[1].text.strip() data["bug_level"] = int(value) sections = root.xpath("//section") craft_table = None for section in sections: h2 = section.xpath("h2/text()") if h2 and h2[0] == "Crafting": craft_table = section.xpath("div/table/tbody")[0] break if craft_table is not None: rows = craft_table.xpath("tr") for row in rows: cells = row.findall("td") craft_type = cells[0].text.strip() if craft_type.startswith("Forge"): zenny, comps = get_components(cells) data["creation_cost"] = zenny data["create_components"] = comps elif craft_type.startswith("Upgrade"): zenny, comps = get_components(cells) data["upgrade_cost"] = zenny data["upgrade_components"] = comps return data def get_components(cells): zenny = int(cells[1].text) cmat_text = cells[2].text components = {} if cmat_text != "-": cmat_name = cells[2].xpath('.//span[@lang="en"]/span')[0].text cmat_points_string = cells[2].xpath("span")[0].tail cmat_points = int(cmat_points_string.split(" ")[0]) components[cmat_name] = cmat_points li_mats = cells[3].xpath("ul/li") for li in li_mats: count = int(li.text.strip().rstrip("x")) name = li.xpath('.//span[@lang="en"]/span')[0].text components[name] = count return (zenny, components) def get_rice_id(link): # /weapon/GreatSword_026.html fname_base, _ = os.path.splitext(os.path.basename(link)) _, tail = fname_base.rsplit("_", maxsplit=1) return int(tail) def get_weapon_list(wtype, id_offset): if wtype == "Sword and Shield": ftype = "short_sword" elif wtype == "Hunting Horn": ftype = "horn" elif wtype == "Gunlance": ftype = "gun_lance" elif wtype == "Switch Axe": ftype = "slash_axe" elif wtype == "Charge Blade": ftype = "charge_axe" else: ftype = wtype.lower().replace(" ", "_") list_fname = ftype + ".html" result = requests.get("https://mhrise.mhrice.info/weapon/" + list_fname) root = lxml.etree.HTML(result.content) weapon_tree_li = root.xpath('//div[@class="mh-weapon-tree"]//li') weapons = [] seen = set() for li in weapon_tree_li: listack = [li] name_stack = [None] while listack: current_li = listack.pop() parent_name = name_stack.pop() a = current_li.xpath('a[@class="mh-icon-text"]')[0] sublists = current_li.xpath('ul/li') name = a.xpath('.//span[@lang="en"]/span')[0].text link = a.attrib['href'] name_stack.extend([name] * len(sublists)) listack.extend(sublists) if link in seen: print("WARN: Duplicate ", name, link) continue seen.add(link) id_ = get_rice_id(link) + id_offset final = (len(sublists) == 0) wdata = dict(name=name, link=link, _id=id_, parent_name=parent_name, final=final) weapons.append(wdata) return weapons def test_details(): tests = [ ("Great Sword", "Sinister Shadowblade+", "/weapon/GreatSword_403.html"), ("Great Sword", "Redwing Claymore I", "/weapon/GreatSword_068.html"), ("Great Sword", "Defender Great Sword I", "/weapon/GreatSword_132.html"), ("Great Sword", "Kamura Warrior Cleaver", "/weapon/GreatSword_300.html"), ("Dual Blades", "Blood Wind Skards+", "/weapon/DualBlades_319.html"), ("Switch Axe", "Arzuros Jubilax", "/weapon/SlashAxe_323.html"), ("Switch Axe", "Leave-Taker+", "/weapon/SlashAxe_307.html"), ("Insect Glaive", "Fine Kamura Glaive", "/weapon/InsectGlaive_302.html"), ] for t in tests: print(t) d = get_weapon_details(*t) pprint(d) print() def _main(): weapons_type_name_map = defaultdict(dict) weapons_data = [] outdir = sys.argv[1] outfile = os.path.join(outdir, "weapon_list.json") if os.path.exists(outfile): print("Loading existing data from ", outfile) with open(outfile) as f: old_data = json.load(f) for d in old_data: wtype_name_map = weapons_type_name_map[d["wtype"]] if d["name"] in wtype_name_map: print("Removing duplicate ", d["wtype"], d["name"]) continue wtype_name_map[d["name"]] = d for itype, wtype in enumerate(WEAPON_TYPES): wtype_name_map = weapons_type_name_map[wtype] weapons = get_weapon_list(wtype, (itype+1) * MAX_PER_TYPE) if not weapons: print("WARN: no weapons of type", wtype) continue name_id_map = {} for w in weapons: # always re-calculate IDs name_id_map[w["name"]] = w["_id"] if w["parent_name"]: w["parent_id"] = name_id_map[w["parent_name"]] else: w["parent_id"] = None data = wtype_name_map.get(w["name"]) if data is not None: print("UP ", wtype, w["_id"], w["name"], w["link"]) data.update(w) weapons_data.append(data) continue print("ADD", wtype, w["_id"], w["name"], w["link"]) data = get_weapon_details(wtype, w["name"], w["link"]) data.update(w) weapons_data.append(data) time.sleep(0.5) with open(os.path.join(outdir, "weapon_list.json"), "w") as f: json.dump(weapons_data, f, indent=2) if __name__ == '__main__': #test_details() _main()