improve db check script
- use difflib to avoid external requirements. Catches everything except for Itiezu for Khezu. - track misspelling vs mismatch - improve output
This commit is contained in:
@@ -1,14 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
# Note: requires python-Levenshtein, available as package in debian and
|
This is a messy heuristic script for parsing monster names from quest and
|
||||||
# ubuntu
|
sub quest goals and comparing that with the monster_to_quest table in the db.
|
||||||
|
"""
|
||||||
|
|
||||||
import os.path
|
import os.path
|
||||||
import codecs
|
import codecs
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
import difflib
|
||||||
from Levenshtein import distance
|
|
||||||
|
|
||||||
import _pathfix
|
import _pathfix
|
||||||
|
|
||||||
@@ -129,14 +129,19 @@ def _parse_monster(name):
|
|||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
def parse_goal_monster_names(goal):
|
def parse_goal_monster_names(goal, errors):
|
||||||
if goal == "None":
|
if goal == "None":
|
||||||
return []
|
return []
|
||||||
|
if "Severthe " in goal or "Hunta " in goal:
|
||||||
|
goal2 = goal.replace("Severthe ", "Sever the ")
|
||||||
|
goal2 = goal2.replace("Hunta ", "Hunt a ")
|
||||||
|
errors.append("Spelling: '%s' => '%s'" % (goal, goal2))
|
||||||
|
goal = goal2
|
||||||
if goal.startswith("Deliver ") or goal.startswith("Topple "):
|
if goal.startswith("Deliver ") or goal.startswith("Topple "):
|
||||||
# TODO: subquest, could parse the item and look up which monster
|
# TODO: subquest, could parse the item and look up which monster
|
||||||
# it's from
|
# it's from
|
||||||
return []
|
return []
|
||||||
if goal == "Supress its Frenzy (2x)":
|
if goal == "Suppress its Frenzy (2x)":
|
||||||
return []
|
return []
|
||||||
goal = lstrip(goal, "Hunt ")
|
goal = lstrip(goal, "Hunt ")
|
||||||
# type in 253
|
# type in 253
|
||||||
@@ -148,8 +153,6 @@ def parse_goal_monster_names(goal):
|
|||||||
# sub quests
|
# sub quests
|
||||||
goal = lstrip(goal, "Wound ")
|
goal = lstrip(goal, "Wound ")
|
||||||
goal = lstrip(goal, "Sever ")
|
goal = lstrip(goal, "Sever ")
|
||||||
# typo in 71
|
|
||||||
goal = lstrip(goal, "Severthe ")
|
|
||||||
goal = lstrip(goal, "Break ")
|
goal = lstrip(goal, "Break ")
|
||||||
goal = lstrip(goal, "Suppress ")
|
goal = lstrip(goal, "Suppress ")
|
||||||
|
|
||||||
@@ -160,8 +163,8 @@ def parse_goal_monster_names(goal):
|
|||||||
return [_parse_monster(p) for p in parts]
|
return [_parse_monster(p) for p in parts]
|
||||||
|
|
||||||
|
|
||||||
def get_goal_monsters(db, goal):
|
def get_goal_monsters(db, goal, errors):
|
||||||
names = parse_goal_monster_names(goal)
|
names = parse_goal_monster_names(goal, errors)
|
||||||
#print quest.goal, names
|
#print quest.goal, names
|
||||||
monsters = []
|
monsters = []
|
||||||
for name in names:
|
for name in names:
|
||||||
@@ -175,36 +178,38 @@ def get_goal_monsters(db, goal):
|
|||||||
name = name2
|
name = name2
|
||||||
if m is None:
|
if m is None:
|
||||||
name2 = fuzzy_find(name)
|
name2 = fuzzy_find(name)
|
||||||
|
if name2:
|
||||||
m = db.get_monster_by_name(name2)
|
m = db.get_monster_by_name(name2)
|
||||||
if m is not None:
|
if m is not None:
|
||||||
print "Fuzzy match: %s => %s" % (name, name2)
|
errors.append("Fuzzy match: %s => %s" % (name, name2))
|
||||||
name = name2
|
name = name2
|
||||||
if m is None:
|
if m is None:
|
||||||
print "ERROR: can't find monster '%s'" % name
|
errors.append("ERROR: can't find monster '%s'" % name)
|
||||||
continue
|
continue
|
||||||
monsters.append(QuestMonster(m["_id"], name))
|
monsters.append(QuestMonster(m["_id"], name))
|
||||||
return monsters
|
return monsters
|
||||||
|
|
||||||
|
|
||||||
def fuzzy_find(name, max_distance=3):
|
def fuzzy_find(name):
|
||||||
best = (None, 10000)
|
matches = difflib.get_close_matches(name, ALL_NAMES, 1)
|
||||||
for n in ALL_NAMES:
|
if matches:
|
||||||
d = distance(name, n)
|
return matches[0]
|
||||||
if d < best[1]:
|
|
||||||
best = (n, d)
|
|
||||||
if best[1] <= max_distance:
|
|
||||||
return best[0]
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def check_hunts(db, quest):
|
def check_hunts(db, quest):
|
||||||
|
print ">", quest.id, quest.name,
|
||||||
|
|
||||||
|
monsters_match = False
|
||||||
|
|
||||||
all_names = db.get_monster_names()
|
all_names = db.get_monster_names()
|
||||||
|
|
||||||
db_expected = set()
|
db_expected = set()
|
||||||
db_expected_unstable = set()
|
db_expected_unstable = set()
|
||||||
|
|
||||||
goal_expected = set(get_goal_monsters(db, quest.goal))
|
errors = []
|
||||||
sub_expected = set(get_goal_monsters(db, quest.sub_goal))
|
goal_expected = set(get_goal_monsters(db, quest.goal, errors))
|
||||||
|
sub_expected = set(get_goal_monsters(db, quest.sub_goal, errors))
|
||||||
|
|
||||||
monsters = db.get_quest_monsters(quest.id)
|
monsters = db.get_quest_monsters(quest.id)
|
||||||
for m in monsters:
|
for m in monsters:
|
||||||
@@ -216,7 +221,6 @@ def check_hunts(db, quest):
|
|||||||
db_expected.add(qm)
|
db_expected.add(qm)
|
||||||
if goal_expected != db_expected:
|
if goal_expected != db_expected:
|
||||||
missing = goal_expected - db_expected
|
missing = goal_expected - db_expected
|
||||||
skip = False
|
|
||||||
if (len(goal_expected) == 1 and len(db_expected) == 1):
|
if (len(goal_expected) == 1 and len(db_expected) == 1):
|
||||||
# handle subspecious and Apex - e.g. when the goal lists the
|
# handle subspecious and Apex - e.g. when the goal lists the
|
||||||
# bare name, but in the db it's listed as Apex NAME, assume
|
# bare name, but in the db it's listed as Apex NAME, assume
|
||||||
@@ -226,13 +230,33 @@ def check_hunts(db, quest):
|
|||||||
goal = next(iter(goal_expected))
|
goal = next(iter(goal_expected))
|
||||||
db = next(iter(db_expected))
|
db = next(iter(db_expected))
|
||||||
if goal[0] == db[0] - 1 and db[1].endswith(goal[1]):
|
if goal[0] == db[0] - 1 and db[1].endswith(goal[1]):
|
||||||
skip = True
|
monsters_match = True
|
||||||
if not skip:
|
else:
|
||||||
print ">", quest.id, quest.name
|
monsters_match = True
|
||||||
|
|
||||||
|
if monsters_match and not errors:
|
||||||
|
# useful for doing grep -v on output
|
||||||
|
print " *OK*"
|
||||||
|
elif monsters_match:
|
||||||
|
print " *MISSPELLING*"
|
||||||
|
print " goal:", quest.goal
|
||||||
|
print " sub:", quest.sub_goal
|
||||||
|
for err in errors:
|
||||||
|
print " ", err
|
||||||
|
else:
|
||||||
|
print " *MISMATCH*",
|
||||||
|
if errors:
|
||||||
|
print " *MISSPELLING*",
|
||||||
|
print
|
||||||
|
for err in errors:
|
||||||
|
print " ", err
|
||||||
print " goal:", quest.goal
|
print " goal:", quest.goal
|
||||||
print " sub:", quest.sub_goal
|
print " sub:", quest.sub_goal
|
||||||
print " parsed:", goal_expected
|
print " parsed:", goal_expected
|
||||||
if sub_expected and not sub_expected < goal_expected:
|
if sub_expected and not sub_expected < goal_expected:
|
||||||
|
# print if sub monster looks like it's not one of the
|
||||||
|
# main monsters. This will false positive when main quest
|
||||||
|
# is hunt all large monsters.
|
||||||
print " sub prsd:", sub_expected
|
print " sub prsd:", sub_expected
|
||||||
print " db:", db_expected
|
print " db:", db_expected
|
||||||
print " db unstb:", db_expected_unstable
|
print " db unstb:", db_expected_unstable
|
||||||
|
|||||||
Reference in New Issue
Block a user