Rewritten parser

This commit is contained in:
2020-12-26 10:45:04 +01:00
parent d938521ac6
commit 896a701097
4 changed files with 505 additions and 231 deletions
+50
View File
@@ -0,0 +1,50 @@
<?php
class osu_cacher
{
private $root;
private $cache_root;
public function __construct(string $root, string $cache_root)
{
$this->root = $root;
$this->cache_root = $cache_root;
}
public function get_root() : string
{
return $this->root;
}
public function get_cache_root() : string
{
return $this->cache_root;
}
public function get_cached_path(string $path) : string
{
return str_replace($this->root, $this->cache_root, $path);
}
public function is_cached(string $path) : bool
{
return file_exists($this->get_cached_path($path));
}
public function get_cached(string $path, $hash=false)// : array|bool // see you again in php8
{
if (!is_cached($path)) return false;
$raw = file_get_contents($this->get_cached_path($path));
$json = json_decode($raw, true);
if ($hash !== false && $hash != $json["hash"]) return false; // hash check failed
return $json;
}
public function set_cache(string $path, array $content) : void
{
$encoded = json_encode($content);
file_put_contents($this->get_cached_path($path), $encoded);
}
}
+14 -25
View File
@@ -1,5 +1,6 @@
<?php <?php
require "libraries/osu_parser.php"; require_once "libraries/osu_parser.php";
require_once "libraries/osu_cacher.php";
class osu_library class osu_library
{ {
@@ -45,35 +46,21 @@ class osu_library
$difficulties = array(); $difficulties = array();
$osu_glob = glob(utils::globsafe($folder) . "/*.osu"); $osu_glob = glob(utils::globsafe($folder) . "/*.osu");
// if (count($osu_glob) < 1) return; // nothing to do here... if (count($osu_glob) < 1) return; // nothing to do here...
$osb_glob = glob(utils::globsafe($folder) . "/*.osb");
$glob = array_merge($osu_glob, $osb_glob);
foreach ($osu_glob as $osu_file) foreach ($glob as $file)
{ {
$diff = osu_parser::scan_parse_osu_file($osu_file); $difficulty = osu_parser::parse_osu_file_format($file);
$diff["key"] = basename($osu_file); $difficulty["key"] = basename($file);
$diff["path"] = $osu_file; $difficulty["path"] = $file;
$difficulties[basename($osu_file)] = $diff; $difficulties[basename($file)] = $difficulty;
}
foreach (glob(utils::globsafe($folder) . "/*.osb") as $osb_file)
{
$diff = osu_parser::scan_parse_osb_file($osb_file);
$diff["key"] = basename($osb_file);
$diff["path"] = $osb_file;
$difficulties[basename($osb_file)] = $diff;
} }
$temp = explode(" ", basename($folder))[0]; $temp = explode(" ", basename($folder))[0];
if (is_numeric($temp)) $set_id = is_numeric($temp) ? $temp : "";
{
$set_id = $temp;
}
else
{
$set_id = "";
}
$entry = array( $entry = array(
"key" => $key, "key" => $key,
@@ -82,7 +69,9 @@ class osu_library
"difficulties" => $difficulties, "difficulties" => $difficulties,
); );
if (!isset($this->db["library"])) $this->db["library"] = array(); // if (!isset($this->db["library"])) $this->db["library"] = array();
$this->db["library"] = array(); // init or reset
$this->db["library"][$key] = $entry; $this->db["library"][$key] = $entry;
} }
+263
View File
@@ -0,0 +1,263 @@
<?php
class osu_old_parser
{
public static function scan_parse_osu_file(string $osu_file) : array
{
$time_start = microtime(true);
$file = file($osu_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
// osu files can get big, but why not load the full thing :3
if (stripos($file[0], "osu file format") === false) return [ "error..." ];
// some files had "ZERO WIDTH NO-BREAK SPACE" characters...
$format = explode("osu file format ", $file[0])[1];
unset($file[0]); // no longer needed
$osu = array("Format" => $format);
$current_section = "UnofficialComments";
$osu[$current_section] = array();
$storyboard = array();
foreach ($file as $key => $line)
{
if (strpos($line, "[") === 0 && strpos($line, "]") === (strlen($line)-1))
{
$current_section = str_replace([ "[", "]" ], "", $line);
if (!isset($osu[$current_section]))
{
$osu[$current_section] = array();
}
// peppy is retarded so i have to do this...
switch ($current_section) {
case "General":
case "Editor":
$section_type = "key-value pairs";
$delimiter = ": ";
break;
case "Metadata":
case "Difficulty":
$section_type = "key-value pairs";
$delimiter = ":"; // notice the missing space
break;
case "Colours":
$section_type = "key-value pairs";
$delimiter = " : "; // WHY WOULD YOU DO THIS IF YOU ALREADY HAVE TWO TYPES OF KEY-VALUE PAIRS???????????????????
break;
case "Events":
case "TimingPoints":
case "HitObjects":
$section_type = "lists"; // yes, listS because one list per line
$delimiter = ",";
break;
default:
$section_type = "unknown";
}
continue;
}
// only parse the ones needed
switch ($current_section) {
case "General":
case "Metadata":
case "Difficulty":
case "Events":
$skip = false;
break;
default:
$skip = true;
}
if ($skip) continue;
if (strpos($line, "//") === 0) // there were commented files that broke my script
{
}
else if ($section_type == "key-value pairs")
{
$delimiter_position = strpos($line, $delimiter);
$value = substr($line, $delimiter_position + strlen($delimiter_position));
$osu[$current_section][substr($line, 0, $delimiter_position)] = $value;
}
else if ($section_type == "lists")
{
$list = explode($delimiter, $line);
// group events by type and start time
if ($current_section == "Events")
{
if (strpos($line, " ") === 0) continue; // skip storyboard details lines
// event types: https://github.com/ppy/osu/blob/master/osu.Game/Beatmaps/Legacy/LegacyEventType.cs
$list[0] = str_replace(
[ "Background", "Video", "Break", "Colour", "Sprite", "Sample", "Animation" ],
[ "0", "1", "2", "3", "4", "5", "6" ],
$list[0]
);
if ($list[0] == "5" || $list[0] == "4")
{
$storyboard[] = trim(str_replace("\\", "/", $list[3]), "\"");
}
if ($list[0] == "6")
{
$story_base = pathinfo(trim(str_replace("\\", "/", $list[3]), "\""));
if (empty($story_base["extension"])) $ext = "";
else $ext = "." . $story_base["extension"];
if (empty($story_base["dirname"])) $dir = "";
else $dir = $story_base["dirname"] . "/";
for ($i = 0; $i < intval($list[6]); $i++)
{
$storyboard[] = $dir . $story_base["filename"] . $i . $ext;
}
}
if (!isset($osu[$current_section][$list[0]]))
{
$osu[$current_section][$list[0]] = array();
}
if (!isset($osu[$current_section][$list[0]][$list[1]]))
{
$osu[$current_section][$list[0]][$list[1]] = array();
}
$osu[$current_section][$list[0]][$list[1]][] = $list;
}
else
{
$osu[$current_section][] = $list;
}
}
else
{
$osu[$current_section][] = $line; // just dump the unknown...
}
}
unset($file); // remove the memory leak
// return $osu;
$set_id = $osu["Metadata"]["BeatmapSetID"] ?? false;
if ($set_id === false)
{
$temp = explode(" ", basename(dirname($osu_file)))[0];
if (is_numeric($temp))
{
$set_id = $temp;
}
else
{
$set_id = "";
}
}
$background = str_replace("\\", "/", trim($osu["Events"][0][0][0][2] ?? "", "\""));
$audio = str_replace("\\", "/", trim($osu["General"]["AudioFilename"] ?? "", "\""));
$video = str_replace("\\", "/", trim($osu["Events"][1][array_key_first($osu["Events"][1] ?? array())][0][2] ?? "", "\""));
$storyboard = array_unique($storyboard);
$map_id = intval($osu["Metadata"]["BeatmapID"] ?? 0);
if ($map_id < 1) $map_id = "";
$return = array(
"format" => $osu["Format"] ?? "",
"title" => $osu["Metadata"]["Title"] ?? "",
"artist" => $osu["Metadata"]["Artist"] ?? "",
"mapper" => $osu["Metadata"]["Creator"] ?? "",
"difficulty" => $osu["Metadata"]["Version"] ?? "",
"tags" => $osu["Metadata"]["Tags"] ?? "",
"background" => $background,
"audio" => $audio,
"video" => $video,
"storyboard" => $storyboard,
"id" => $map_id,
"set_id" => $set_id,
);
$time_end = microtime(true);
$time = $time_end - $time_start;
$return["process_time"] = $time;
$return["hash"] = hash_file("md5", $osu_file);
return $return;
}
public static function scan_parse_osb_file(string $osb_file) : array
{
$time_start = microtime(true);
$file = file($osb_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$storyboard = array();
$current_section = "UnofficialComments";
foreach ($file as $key => $line)
{
if (strpos($line, "[") === 0 && strpos($line, "]") === (strlen($line)-1))
{
$current_section = str_replace([ "[", "]" ], "", $line);
continue;
}
if (!($current_section == "Events")) continue; // skip rest
$list = explode(",", $line);
$list[0] = str_replace(
[ "Background", "Video", "Break", "Colour", "Sprite", "Sample", "Animation" ],
[ "0", "1", "2", "3", "4", "5", "6" ],
$list[0]
);
if ($list[0] == "5" || $list[0] == "4")
{
$storyboard[] = trim(str_replace("\\", "/", $list[3]), "\"");
}
if ($list[0] == "6")
{
$story_base = pathinfo(trim(str_replace("\\", "/", $list[3]), "\""));
if (empty($story_base["extension"])) $ext = "";
else $ext = "." . $story_base["extension"];
if (empty($story_base["dirname"])) $dir = "";
else $dir = $story_base["dirname"] . "/";
for ($i = 0; $i < intval($list[6]); $i++)
{
$storyboard[] = $dir . $story_base["filename"] . $i . $ext;
}
}
$temp = explode(" ", basename(dirname($osb_file)))[0];
if (is_numeric($temp))
{
$set_id = $temp;
}
else
{
$set_id = "";
}
}
$storyboard = array_unique($storyboard);
$return = array(
"format" => "storyboard",
"storyboard" => $storyboard,
"set_id" => $set_id,
);
$time_end = microtime(true);
$time = $time_end - $time_start;
$return["process_time"] = $time;
$return["hash"] = hash_file("md5", $osb_file);
return $return;
}
}
+178 -206
View File
@@ -1,263 +1,235 @@
<?php <?php
require_once "libraries/osu_cacher.php";
class osu_parser class osu_parser
{ {
public static function scan_parse_osu_file(string $osu_file) : array private $cacher;
public function __construct(osu_cacher $cacher)
{ {
$time_start = microtime(true); $this->cacher = $cacher;
$file = file($osu_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); }
// osu files can get big, but why not load the full thing :3
if (stripos($file[0], "osu file format") === false) return [ "error..." ]; public static function convert_event_type(string $input) : string
{
// reconverting everything to the legacy notation
// event types enum from lazer:
// https://github.com/ppy/osu/blob/master/osu.Game/Beatmaps/Legacy/LegacyEventType.cs
return str_replace(
[ "Background", "Video", "Break", "Colour", "Sprite", "Sample", "Animation" ],
[ "0", "1", "2", "3", "4", "5", "6" ],
$input
);
}
// some files had "ZERO WIDTH NO-BREAK SPACE" characters... public static function reverse_event_type(string $input) : string
$format = explode("osu file format ", $file[0])[1]; {
unset($file[0]); // no longer needed return str_replace(
[ "0", "1", "2", "3", "4", "5", "6" ],
[ "Background", "Video", "Break", "Colour", "Sprite", "Sample", "Animation" ],
$input
);
}
$osu = array("Format" => $format); public function parse_osu_file_format(string $path, bool $skip_cache = false)// : array|bool // see you again in php8
$current_section = "UnofficialComments"; {
$osu[$current_section] = array(); if (!file_exists($path)) return false;
$storyboard = array();
if (!$skip_cache)
{
$cached = $this->cacher->get_cache($path, hash_file("md5", $path));
if ($cached !== false) return $cached;
}
$time_start = microtime(true); // measure parsing time
$file = file($path, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$parsed = array();
// file format declaration
if (stripos($file[0], "osu file format ") === false)
{
// the ranker script had a bug where random "ZERO WIDTH NO-BREAK SPACE"
// characters were at beginning of the .osu files
$parsed["format"] = explode("osu file format ", $file[0])[1];
unset($file[0]); // no longer needed
}
else if (pathinfo($path, PATHINFO_EXTENSION) == "osb")
{
$parsed["format"] = "storyboard";
}
$current_section = false;
$needed_sections = [ "General", "Metadata", "Difficulty", "Events" ];
foreach ($file as $key => $line) foreach ($file as $key => $line)
{ {
if (strpos($line, "[") === 0 && strpos($line, "]") === (strlen($line)-1)) if (strpos($line, "[") === 0 && strpos($line, "]") === (strlen($line)-1))
{ {
$current_section = str_replace([ "[", "]" ], "", $line); list($current_section, $section_type, $delimiter) = self::parse_osu_file_section_header($line, $needed_sections);
if (!isset($osu[$current_section]))
{
$osu[$current_section] = array();
}
// peppy is retarded so i have to do this... continue; // this line has been analyzed
switch ($current_section) {
case "General":
case "Editor":
$section_type = "key-value pairs";
$delimiter = ": ";
break;
case "Metadata":
case "Difficulty":
$section_type = "key-value pairs";
$delimiter = ":"; // notice the missing space
break;
case "Colours":
$section_type = "key-value pairs";
$delimiter = " : "; // WHY WOULD YOU DO THIS IF YOU ALREADY HAVE TWO TYPES OF KEY-VALUE PAIRS???????????????????
break;
case "Events":
case "TimingPoints":
case "HitObjects":
$section_type = "lists"; // yes, listS because one list per line
$delimiter = ",";
break;
default:
$section_type = "unknown";
}
continue;
} }
if (strpos($line, "//") === 0 || // the editor puts hella lot of comments in the files
// only parse the ones needed $section_type === false)
switch ($current_section) {
case "General":
case "Metadata":
case "Difficulty":
case "Events":
$skip = false;
break;
default:
$skip = true;
}
if ($skip) continue;
if (strpos($line, "//") === 0) // there were commented files that broke my script
{ {
} }
else if ($section_type == "key-value pairs") else if ($section_type == "key-value pairs")
{ {
// init empty array
if (!isset($parsed[$current_section])) $parsed[$current_section] = array();
$delimiter_position = strpos($line, $delimiter); $delimiter_position = strpos($line, $delimiter);
$value = substr($line, $delimiter_position + strlen($delimiter_position)); $kv_key = substr($line, 0, $delimiter_position);
$osu[$current_section][substr($line, 0, $delimiter_position)] = $value; $kv_value = substr($line, $delimiter_position + strlen($delimiter_position));
// after some thinking, keeping the original names was a good idea
$parsed[$current_section][$kv_key] = $kv_value;
} }
else if ($section_type == "lists") else if ($section_type == "lists")
{ {
$list = explode($delimiter, $line); $list = explode($delimiter, $line);
if ($current_section == "Events") // saving the whole thing would take up too much space
// group events by type and start time
if ($current_section == "Events")
{ {
if (strpos($line, " ") === 0) continue; // skip storyboard details lines if (strpos($line, " ") === 0 || strpos($line, "_") === 0) continue; // skip storyboard details lines
// event types: https://github.com/ppy/osu/blob/master/osu.Game/Beatmaps/Legacy/LegacyEventType.cs list($event_type, $source_files) = self::gather_source_files($list);
$list[0] = str_replace(
[ "Background", "Video", "Break", "Colour", "Sprite", "Sample", "Animation" ],
[ "0", "1", "2", "3", "4", "5", "6" ],
$list[0]
);
if ($list[0] == "5" || $list[0] == "4") if ($event_type === false)
{ {
$storyboard[] = trim(str_replace("\\", "/", $list[3]), "\"");
} }
else if ($event_type == "Background")
if ($list[0] == "6")
{ {
$story_base = pathinfo(trim(str_replace("\\", "/", $list[3]), "\"")); $parsed["background"] = $source_files[0] ?? "";
if (empty($story_base["extension"])) $ext = ""; }
else $ext = "." . $story_base["extension"]; else if ($event_type == "Video")
if (empty($story_base["dirname"])) $dir = ""; {
else $dir = $story_base["dirname"] . "/"; $parsed["video"] = $source_files[0] ?? "";
}
else
{
// init empty array
if (!isset($parsed["storyboard"])) $parsed["storyboard"] = array();
for ($i = 0; $i < intval($list[6]); $i++) // add the elements to the storyboard
foreach ($source_files as $source_file)
{ {
$storyboard[] = $dir . $story_base["filename"] . $i . $ext; $parsed["storyboard"][] = $source_file;
} }
} }
if (!isset($osu[$current_section][$list[0]]))
{
$osu[$current_section][$list[0]] = array();
}
if (!isset($osu[$current_section][$list[0]][$list[1]]))
{
$osu[$current_section][$list[0]][$list[1]] = array();
}
$osu[$current_section][$list[0]][$list[1]][] = $list;
} }
else else
{ {
$osu[$current_section][] = $list; // init empty array
if (!isset($parsed[$current_section])) $parsed[$current_section] = array();
// just dump the non-events...
//
// at the point of writing this
// comment, this section will
// never get used...
$parsed[$current_section][] = $line;
} }
} }
else
{
$osu[$current_section][] = $line; // just dump the unknown...
}
} }
unset($file); // remove the memory leak unset($file); // remove the memory leak
if (!$skip_cache)
// return $osu;
$set_id = $osu["Metadata"]["BeatmapSetID"] ?? false;
if ($set_id === false)
{ {
$temp = explode(" ", basename(dirname($osu_file)))[0]; $this->cacher->set_cache($path, $parsed);
if (is_numeric($temp))
{
$set_id = $temp;
}
else
{
$set_id = "";
}
} }
$background = str_replace("\\", "/", trim($osu["Events"][0][0][0][2] ?? "", "\"")); return $parsed;
$audio = str_replace("\\", "/", trim($osu["General"]["AudioFilename"] ?? "", "\""));
$video = str_replace("\\", "/", trim($osu["Events"][1][array_key_first($osu["Events"][1] ?? array())][0][2] ?? "", "\""));
$storyboard = array_unique($storyboard);
$map_id = intval($osu["Metadata"]["BeatmapID"] ?? 0);
if ($map_id < 1) $map_id = "";
$return = array(
"format" => $osu["Format"] ?? "",
"title" => $osu["Metadata"]["Title"] ?? "",
"artist" => $osu["Metadata"]["Artist"] ?? "",
"mapper" => $osu["Metadata"]["Creator"] ?? "",
"difficulty" => $osu["Metadata"]["Version"] ?? "",
"tags" => $osu["Metadata"]["Tags"] ?? "",
"background" => $background,
"audio" => $audio,
"video" => $video,
"storyboard" => $storyboard,
"id" => $map_id,
"set_id" => $set_id,
);
$time_end = microtime(true);
$time = $time_end - $time_start;
$return["process_time"] = $time;
$return["hash"] = hash_file("md5", $osu_file);
return $return;
} }
public static function scan_parse_osb_file(string $osb_file) : array public static function gather_source_files(array $list) : array
{ {
$time_start = microtime(true); $list[0] = convert_event_type($list[0]);
$file = file($osb_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); $event_type = $list[0];
$storyboard = array(); if (!in_array($event_type, [ "0", "1", "4", "5", "6" ]))
$current_section = "UnofficialComments";
foreach ($file as $key => $line)
{ {
if (strpos($line, "[") === 0 && strpos($line, "]") === (strlen($line)-1)) return array(false, false); // events that shouldn't be processed
{
$current_section = str_replace([ "[", "]" ], "", $line);
continue;
}
if (!($current_section == "Events")) continue; // skip rest
$list = explode(",", $line);
$list[0] = str_replace(
[ "Background", "Video", "Break", "Colour", "Sprite", "Sample", "Animation" ],
[ "0", "1", "2", "3", "4", "5", "6" ],
$list[0]
);
if ($list[0] == "5" || $list[0] == "4")
{
$storyboard[] = trim(str_replace("\\", "/", $list[3]), "\"");
}
if ($list[0] == "6")
{
$story_base = pathinfo(trim(str_replace("\\", "/", $list[3]), "\""));
if (empty($story_base["extension"])) $ext = "";
else $ext = "." . $story_base["extension"];
if (empty($story_base["dirname"])) $dir = "";
else $dir = $story_base["dirname"] . "/";
for ($i = 0; $i < intval($list[6]); $i++)
{
$storyboard[] = $dir . $story_base["filename"] . $i . $ext;
}
}
$temp = explode(" ", basename(dirname($osb_file)))[0];
if (is_numeric($temp))
{
$set_id = $temp;
}
else
{
$set_id = "";
}
} }
$storyboard = array_unique($storyboard); $source_file = false;
if ($event_type == "0" || $event_type == "1")
{
$source_file = $list[2];
}
else if ($event_type == "4" || $event_type == "5" || $event_type == "6")
{
$source_file = $list[3];
}
$return = array( // fix backslash and double quotes
"format" => "storyboard", if ($source_file !== false) $source_file = trim(str_replace("\\", "/", $source_file), "\"");
"storyboard" => $storyboard,
"set_id" => $set_id,
);
$time_end = microtime(true); if ($event_type == "6")
$time = $time_end - $time_start; {
$return["process_time"] = $time; $extension = pathinfo($source_file, PATHINFO_EXTENSION) ?? "";
$return["hash"] = hash_file("md5", $osb_file); $extension = !empty($extension) ? "." . $extension : ""; // r-append dot if set
return $return; $directory = pathinfo($source_file, PATHINFO_DIRNAME) ?? "";
$directory = !empty($directory) ? $directory. "/" : ""; // append slash if set
$filename = pathinfo($source_file, PATHINFO_FILENAME);
$frames = intval($list[6]);
$source_files = array();
for ($i = 0; $i < $frames; $i++) // fill the array
{
$source_files[] = $directory . $filename . $i . $extension;
}
}
else
{
$source_files = array($source_file); // pack the single-source resources into an array
}
return array(self::reverse_event_type($event_type), $source_files);
}
// works according to the osu file format v14 specifications
public static function parse_osu_file_section_header(string $line, array $needed_sections = array()) : array
{
$current_section = str_replace([ "[", "]" ], "", $line);
if (!in_array($current_section, $needed_sections))
{
// short circuit whitelist
return array(false, false, false);
}
// peppy is retarded so i have to do this...
$section_type = false;
$delimiter = false;
switch ($current_section)
{
case "General":
case "Editor":
$section_type = "key-value pairs";
$delimiter = ": ";
break;
case "Metadata":
case "Difficulty":
$section_type = "key-value pairs";
$delimiter = ":"; // notice the missing space
break;
case "Colours":
$section_type = "key-value pairs";
$delimiter = " : "; // WHY WOULD YOU DO THIS IF YOU ALREADY HAVE TWO TYPES OF KEY-VALUE PAIRS???????????????????
break;
case "Events":
case "TimingPoints":
case "HitObjects":
$section_type = "lists"; // yes, listS because one list per line
$delimiter = ",";
break;
}
return array($current_section, $section_type, $delimiter);
} }
} }