Files
WebMonkeyBall/tools/dump_vanilla_conf.py
2026-01-31 12:32:37 -05:00

718 lines
23 KiB
Python

#!/usr/bin/env python3
"""
Script for generating default wsmod config from a vanilla game's files
warning: bad
"""
from pathlib import Path
import struct
from collections import namedtuple
import logging
import sys
import json
from typing import Dict, List, Optional, Set, Tuple
VANILLA_ROOT_PATH = Path(
"/mnt/c/Users/ComplexPlane/Documents/projects/romhack/smb2imm/files"
)
CourseCommand = namedtuple("CourseCommand", ["opcode", "type", "value"])
SmStageInfo = namedtuple("SmStageInfo", ["stage_id", "difficulty"])
# CMD opcodes
CMD_IF = 0
CMD_THEN = 1
CMD_FLOOR = 2
CMD_COURSE_END = 3
# CMD_IF conditions
IF_FLOOR_CLEAR = 0
IF_GOAL_TYPE = 2
# CMD_THEN actions
THEN_JUMP_FLOOR = 0
THEN_END_COURSE = 2
# CMD_FLOOR value types
FLOOR_STAGE_ID = 0
FLOOR_TIME = 1
def get_theme_and_music_ids(stage_id, stage_id_to_theme_id, theme_id_to_music_id):
if stage_id < 0 or stage_id >= len(stage_id_to_theme_id):
logging.warning("Stage id %s out of range for theme map; using theme 0", stage_id)
theme_id = 0
else:
theme_id = stage_id_to_theme_id[stage_id]
if theme_id > 42:
theme_id = 42
if theme_id < 0 or theme_id >= len(theme_id_to_music_id):
logging.warning("Theme id %s out of range for music map; using 0", theme_id)
music_id = 0
else:
music_id = theme_id_to_music_id[theme_id]
return (theme_id, music_id)
def parse_cm_course(
mainloop_buffer,
stgname_lines,
bonus_stage_ids,
stage_id_to_theme_id_map,
theme_id_to_music_id_map,
start,
count=None,
max_cmds=1024,
strict=True,
):
def raise_error(message: str):
logging.error(message)
if strict:
raise SystemExit(message)
raise ValueError(message)
cmds: list[CourseCommand] = []
course_cmd_size = 0x1C
if count is None:
i = 0
while start + (i + 1) * course_cmd_size <= len(mainloop_buffer) and i < max_cmds:
course_cmd = CourseCommand._make(
struct.unpack_from(
">BBxxI20x",
mainloop_buffer,
start + i * course_cmd_size,
)
)
cmds.append(course_cmd)
if course_cmd.opcode == CMD_COURSE_END:
break
i += 1
else:
for i in range(count):
course_cmd = CourseCommand._make(
struct.unpack_from(
">BBxxI20x",
mainloop_buffer,
start + i * course_cmd_size,
)
)
cmds.append(course_cmd)
# Course commands to stage infos
cm_stage_infos = []
stage_id = 0
stage_time = 60 * 60
blue_jump = None
green_jump = None
red_jump = None
last_goal_type = None
first = True
finished = False
for cmd in cmds:
if cmd.opcode == CMD_FLOOR:
if cmd.type == FLOOR_STAGE_ID:
if not first:
if blue_jump is None:
raise_error("Invalid blue goal jump")
theme_id, music_id = get_theme_and_music_ids(
stage_id, stage_id_to_theme_id_map, theme_id_to_music_id_map
)
stage_name = (
stgname_lines[stage_id]
if 0 <= stage_id < len(stgname_lines)
else f"Stage {stage_id}"
)
cm_stage_infos.append(
{
"stage_id": stage_id,
"name": stage_name,
"theme_id": theme_id,
"music_id": music_id,
"time_limit": float(stage_time / 60),
"blue_goal_jump": blue_jump,
"green_goal_jump": green_jump
if green_jump is not None
else blue_jump,
"red_goal_jump": red_jump
if red_jump is not None
else blue_jump,
"is_bonus_stage": stage_id in bonus_stage_ids,
}
)
stage_id = 0
stage_time = 60 * 60
blue_jump = None
green_jump = None
red_jump = None
last_goal_type = None
stage_id = cmd.value
first = False
elif cmd.type == FLOOR_TIME:
stage_time = cmd.value
else:
raise_error(f"Invalid CMD_FLOOR opcode type: {cmd.type}")
elif cmd.opcode == CMD_IF:
if cmd.type == IF_FLOOR_CLEAR:
last_goal_type = None
elif cmd.type == IF_GOAL_TYPE:
last_goal_type = cmd.value
else:
raise_error(f"Invalid CMD_IF opcode type: {cmd.type}")
elif cmd.opcode == CMD_THEN:
if cmd.type == THEN_JUMP_FLOOR:
if last_goal_type is None:
if blue_jump is None:
blue_jump = cmd.value
if green_jump is None:
green_jump = cmd.value
if red_jump is None:
red_jump = cmd.value
elif last_goal_type == 0:
blue_jump = cmd.value
elif last_goal_type == 1:
green_jump = cmd.value
elif last_goal_type == 2:
red_jump = cmd.value
else:
raise_error(f"Invalid last goal type: {last_goal_type}")
elif cmd.type == THEN_END_COURSE:
# Jumps are irrelevant, this is end of difficulty
blue_jump = 1
green_jump = 1
red_jump = 1
else:
raise_error(f"Invalid CMD_THEN opcode type: {cmd.type}")
elif cmd.opcode == CMD_COURSE_END:
if blue_jump is None:
raise_error("Invalid blue goal jump")
theme_id, music_id = get_theme_and_music_ids(
stage_id, stage_id_to_theme_id_map, theme_id_to_music_id_map
)
stage_name = (
stgname_lines[stage_id]
if 0 <= stage_id < len(stgname_lines)
else f"Stage {stage_id}"
)
cm_stage_infos.append(
{
"stage_id": stage_id,
"name": stage_name,
"theme_id": theme_id,
"music_id": music_id,
"time_limit": float(stage_time / 60),
"blue_goal_jump": blue_jump,
"green_goal_jump": green_jump
if green_jump is not None
else blue_jump,
"red_goal_jump": red_jump if red_jump is not None else blue_jump,
"is_bonus_stage": stage_id in bonus_stage_ids,
}
)
finished = True
else:
raise_error(f"Invalid opcode: {cmd.opcode}")
if not finished:
raise_error("Course command list ended early")
return cm_stage_infos
def annotate_cm_layout_dump(dump: str) -> str:
lines = dump.split("\n")
out_lines: list[str] = []
last_course = None
floor_num = 1
for line in lines:
old_floor_num = floor_num
floor_num = 1
if '"beginner"' in line:
last_course = "Beginner"
elif '"beginner_extra"' in line:
last_course = "Beginner Extra"
elif '"advanced"' in line:
last_course = "Advanced"
elif '"advanced_extra"' in line:
last_course = "Advanced Extra"
elif '"expert"' in line:
last_course = "Expert"
elif '"expert_extra"' in line:
last_course = "Expert Extra"
elif '"master"' in line:
last_course = "Master"
elif '"master_extra"' in line:
last_course = "Master Extra"
else:
# Don't reset floor num if new difficulty not detected
floor_num = old_floor_num
new_line = line[:]
if "{" in new_line and last_course is not None:
new_line += f" // {last_course} {floor_num}"
floor_num += 1
new_line = new_line.replace("60.0", "60.00")
new_line = new_line.replace("30.0", "30.00")
out_lines.append(new_line)
# if '"time_limit"' in new_line:
# out_lines.append("")
return "\n".join(out_lines)
def dump_storymode_world_layout(
mainloop_buffer,
stgname_lines,
stage_id_to_theme_id_map,
theme_id_to_music_id_map,
start,
):
stage_info_size = 0x4
stage_infos: list[SmStageInfo] = []
for i in range(10):
offs = start + i * stage_info_size
stage_info = SmStageInfo._make(struct.unpack_from(">hh", mainloop_buffer, offs))
stage_infos.append(stage_info)
out_json_array = []
for stage_info in stage_infos:
time_limit = 60 * 60 if stage_info.stage_id != 30 else 60 * 30
theme_id, music_id = get_theme_and_music_ids(
stage_info.stage_id, stage_id_to_theme_id_map, theme_id_to_music_id_map
)
stage_name = (
stgname_lines[stage_info.stage_id]
if 0 <= stage_info.stage_id < len(stgname_lines)
else f"Stage {stage_info.stage_id}"
)
out_json_array.append(
{
"stage_id": stage_info.stage_id,
"name": stage_name,
"theme_id": theme_id,
"music_id": music_id,
"time_limit": float(time_limit / 60),
"difficulty": stage_info.difficulty,
}
)
return out_json_array
def annotate_story_layout_dump(dump: str) -> str:
lines = dump.split("\n")
out_lines: list[str] = []
last_course = None
world = -1
stage = 0
for line in lines:
new_line = line[:]
if "[" in line:
world += 1
stage = 0
if world >= 1:
new_line += f" // World {world}"
if "{" in line:
stage += 1
new_line += f" // Stage {world}-{stage}"
new_line = new_line.replace("60.0", "60.00")
new_line = new_line.replace("30.0", "30.00")
out_lines.append(new_line)
# if '"time_limit"' in new_line:
# out_lines.append("")
return "\n".join(out_lines)
def list_stage_ids(stage_dir: Path) -> Set[int]:
ids: Set[int] = set()
if not stage_dir.exists():
return ids
for path in stage_dir.glob("STAGE*.lz"):
name = path.name
if len(name) == 11 and name.startswith("STAGE") and name.endswith(".lz"):
try:
ids.add(int(name[5:8]))
except ValueError:
continue
return ids
def collect_stage_ids_from_cm(cm_layout: Dict[str, List[dict]]) -> List[int]:
ids: List[int] = []
for entries in cm_layout.values():
if not isinstance(entries, list):
continue
for entry in entries:
if isinstance(entry, dict) and isinstance(entry.get("stage_id"), int):
ids.append(entry["stage_id"])
return ids
def collect_stage_ids_from_story(worlds: List[List[dict]]) -> List[int]:
ids: List[int] = []
for world in worlds:
if not isinstance(world, list):
continue
for entry in world:
if isinstance(entry, dict) and isinstance(entry.get("stage_id"), int):
ids.append(entry["stage_id"])
return ids
def validate_stage_ids(
stage_ids: List[int],
valid_ids: Set[int],
label: str,
named_ids: Optional[Set[int]] = None,
min_named_ratio: float = 0.0,
) -> bool:
if not stage_ids or not valid_ids:
return True
if any(stage_id < 0 for stage_id in stage_ids):
logging.warning("%s contains negative stage ids", label)
return False
invalid = [sid for sid in stage_ids if sid not in valid_ids]
if not invalid:
if named_ids and min_named_ratio > 0:
named_count = sum(1 for sid in stage_ids if sid in named_ids)
ratio = named_count / max(1, len(stage_ids))
if ratio < min_named_ratio:
logging.warning("%s has low named stage ratio (%.1f%%)", label, ratio * 100)
return False
return True
ratio = len(invalid) / max(1, len(stage_ids))
logging.warning("%s has %d invalid stage ids (%.1f%%)", label, len(invalid), ratio * 100)
return ratio < 0.1
def find_course_offsets(
data: bytes,
stage_ids: Set[int],
named_stage_ids: Set[int],
min_stages: int = 10,
max_cmds: int = 512,
) -> List[Tuple[int, int]]:
course_cmd_size = 0x1C
candidates: List[Tuple[int, int, float]] = []
for off in range(0, len(data) - course_cmd_size, 4):
opcode = data[off]
cmd_type = data[off + 1]
if opcode != CMD_FLOOR or cmd_type != FLOOR_STAGE_ID:
continue
stage_count = 0
valid_stage_count = 0
cmd_count = 0
finished = False
for i in range(max_cmds):
cmd_off = off + i * course_cmd_size
if cmd_off + course_cmd_size > len(data):
break
opcode = data[cmd_off]
cmd_type = data[cmd_off + 1]
value = struct.unpack_from(">I", data, cmd_off + 4)[0]
cmd_count += 1
if opcode == CMD_FLOOR:
if cmd_type == FLOOR_STAGE_ID:
stage_count += 1
if value in stage_ids:
valid_stage_count += 1
elif cmd_type != FLOOR_TIME:
break
elif opcode == CMD_IF:
if cmd_type not in (IF_FLOOR_CLEAR, IF_GOAL_TYPE):
break
elif opcode == CMD_THEN:
if cmd_type not in (THEN_JUMP_FLOOR, THEN_END_COURSE):
break
elif opcode == CMD_COURSE_END:
finished = True
break
else:
break
if not finished or stage_count < min_stages:
continue
ratio = valid_stage_count / max(1, stage_count)
named_count = 0
if named_stage_ids:
for i in range(max_cmds):
cmd_off = off + i * course_cmd_size
if cmd_off + course_cmd_size > len(data):
break
opcode = data[cmd_off]
cmd_type = data[cmd_off + 1]
if opcode == CMD_FLOOR and cmd_type == FLOOR_STAGE_ID:
value = struct.unpack_from(">I", data, cmd_off + 4)[0]
if value in named_stage_ids:
named_count += 1
named_ratio = named_count / max(1, stage_count)
else:
named_ratio = 0.0
score = stage_count * ratio - cmd_count * 0.05 + named_ratio
candidates.append((off, cmd_count, score))
candidates.sort(key=lambda item: (-item[2], item[0]))
selected: List[Tuple[int, int]] = []
used_ranges: List[Tuple[int, int]] = []
for off, cmd_count, _ in candidates:
start = off
end = off + cmd_count * course_cmd_size
if any(start < rng_end and end > rng_start for rng_start, rng_end in used_ranges):
continue
selected.append((off, cmd_count))
used_ranges.append((start, end))
if len(selected) >= 8:
break
selected.sort(key=lambda item: item[0])
return selected
def find_story_block_offset(
data: bytes,
stage_ids: Set[int],
named_stage_ids: Set[int],
) -> Optional[int]:
entry_size = 4
world_count = 10
stages_per_world = 10
block_size = world_count * stages_per_world * entry_size
for off in range(0, len(data) - block_size, 4):
valid = True
unique_ids: Set[int] = set()
named_count = 0
for idx in range(world_count * stages_per_world):
entry_off = off + idx * entry_size
stage_id, difficulty = struct.unpack_from(">hh", data, entry_off)
if stage_id not in stage_ids:
valid = False
break
if difficulty < 0 or difficulty > 5:
valid = False
break
unique_ids.add(stage_id)
if stage_id in named_stage_ids:
named_count += 1
if valid:
if len(unique_ids) < 20:
continue
if named_stage_ids and named_count / max(1, world_count * stages_per_world) < 0.3:
continue
return off
return None
def is_story_world_valid(
data: bytes,
offset: int,
stage_ids: Set[int],
named_stage_ids: Set[int],
) -> bool:
unique_ids: Set[int] = set()
named_count = 0
for idx in range(10):
stage_id, difficulty = struct.unpack_from(">hh", data, offset + idx * 4)
if stage_id not in stage_ids:
return False
if difficulty < 0 or difficulty > 5:
return False
unique_ids.add(stage_id)
if stage_id in named_stage_ids:
named_count += 1
if len(unique_ids) < 3:
return False
if named_stage_ids and named_count / 10 < 0.3:
return False
return True
def load_vanilla_course_data(
rom_dir: Path,
*,
course_cmd_counts: Optional[Dict[str, int]] = None,
world_offsets: Optional[List[int]] = None,
) -> dict:
mainloop_path = rom_dir / "mkb2.main_loop.rel"
stgname_path = rom_dir / "stgname" / "usa.str"
if not mainloop_path.exists():
raise FileNotFoundError(f"missing {mainloop_path}")
if not stgname_path.exists():
raise FileNotFoundError(f"missing {stgname_path}")
mainloop_buffer = mainloop_path.read_bytes()
stgname_lines = stgname_path.read_text(encoding="ascii", errors="ignore").splitlines()
named_stage_ids = {i for i, name in enumerate(stgname_lines) if name and name != "-"}
bonus_stage_ids = struct.unpack_from(">9i", mainloop_buffer, 0x00176118)
stage_id_to_theme_id_map = struct.unpack_from(">428B", mainloop_buffer, 0x00204E48)
theme_id_to_music_id_map = struct.unpack_from(">43h", mainloop_buffer, 0x0016E738)
stage_ids = list_stage_ids(rom_dir / "stage")
# Parse challenge mode entries using default offsets first.
counts = course_cmd_counts or {}
default_course_offsets = [
("beginner", 0x002075B0),
("advanced", 0x00207914),
("expert", 0x00208634),
("beginner_extra", 0x00209CF4),
("advanced_extra", 0x0020A0C8),
("expert_extra", 0x0020A448),
("master", 0x0020A8E0),
("master_extra", 0x0020ACB4),
]
cm_layout: Dict[str, List[dict]] = {}
for name, offset in default_course_offsets:
try:
cm_layout[name] = parse_cm_course(
mainloop_buffer,
stgname_lines,
bonus_stage_ids,
stage_id_to_theme_id_map,
theme_id_to_music_id_map,
offset,
counts.get(name),
strict=True,
)
except Exception:
cm_layout = {}
break
if cm_layout:
cm_ids = collect_stage_ids_from_cm(cm_layout)
if not validate_stage_ids(cm_ids, stage_ids, "challenge courses", named_ids=named_stage_ids):
cm_layout = {}
if not cm_layout and stage_ids:
logging.warning("Default course offsets invalid; scanning for course tables.")
offsets = find_course_offsets(mainloop_buffer, stage_ids, named_stage_ids)
order = [name for name, _ in default_course_offsets]
for idx, (offset, cmd_count) in enumerate(offsets[: len(order)]):
name = order[idx]
try:
cm_layout[name] = parse_cm_course(
mainloop_buffer,
stgname_lines,
bonus_stage_ids,
stage_id_to_theme_id_map,
theme_id_to_music_id_map,
offset,
cmd_count,
strict=False,
)
except Exception:
cm_layout = {}
break
if not cm_layout:
raise SystemExit("Failed to locate challenge course tables.")
if world_offsets is None:
world_offsets = [
0x0020b448,
0x0020b470,
0x0020b498,
0x0020b4c0,
0x0020b4e8,
0x0020b510,
0x0020b538,
0x0020b560,
0x0020b588,
0x0020b5b0,
]
worlds = []
for offs in world_offsets:
if stage_ids and not is_story_world_valid(mainloop_buffer, offs, stage_ids, named_stage_ids):
worlds = []
break
world = dump_storymode_world_layout(
mainloop_buffer,
stgname_lines,
stage_id_to_theme_id_map,
theme_id_to_music_id_map,
offs,
)
worlds.append(world)
if worlds:
story_ids = collect_stage_ids_from_story(worlds)
if not validate_stage_ids(
story_ids,
stage_ids,
"story worlds",
named_ids=named_stage_ids,
min_named_ratio=0.3,
):
worlds = []
if not worlds and stage_ids:
logging.warning("Default story offsets invalid; scanning for story table.")
base_off = find_story_block_offset(mainloop_buffer, stage_ids, named_stage_ids)
if base_off is not None:
world_offsets = [base_off + i * 0x28 for i in range(10)]
for offs in world_offsets:
world = dump_storymode_world_layout(
mainloop_buffer,
stgname_lines,
stage_id_to_theme_id_map,
theme_id_to_music_id_map,
offs,
)
worlds.append(world)
if not worlds:
logging.warning("Story world data not found; output will omit story worlds.")
return {
"challenge": cm_layout,
"story": worlds,
}
def main() -> None:
import argparse
parser = argparse.ArgumentParser(
description="Dump vanilla challenge/story course data from extracted SMB2 files."
)
parser.add_argument(
"--rom",
type=Path,
default=VANILLA_ROOT_PATH,
help="Path to extracted ROM folder (containing mkb2.main_loop.rel)",
)
args = parser.parse_args()
data = load_vanilla_course_data(args.rom)
cm_layout_dump = json.dumps(data["challenge"], indent=4)
annotated_cm_layout_dump = annotate_cm_layout_dump(cm_layout_dump)
print(annotated_cm_layout_dump)
story_layout_dump = json.dumps(data["story"], indent=4)
annotated_story_layout_dump = annotate_story_layout_dump(story_layout_dump)
# print(annotated_story_layout_dump)
if __name__ == "__main__":
main()