Files
comaps/coding/bwt.hpp
Konstantin Pastbin e3e4a1985a Organic Maps sources as of 02.04.2025 (fad26bbf22ac3da75e01e62aa01e5c8e11861005)
To expand with full Organic Maps and Maps.ME commits history run:
  git remote add om-historic [om-historic.git repo url]
  git fetch --tags om-historic
  git replace squashed-history historic-commits
2025-05-08 21:10:51 +07:00

61 lines
2.1 KiB
C++

#pragma once
#include <cstdint>
#include <string>
namespace coding
{
// Computes the Burrows-Wheeler transform of the string |s|, stores
// result in the string |r|. Note - the size of |r| must be |n|.
// Returns the index of the original string among the all sorted
// rotations of the |s|.
//
// *NOTE* in contrast to popular explanations of BWT, we do not append
// to |s| trailing '$' that is less than any other character in |s|.
// The reason is that |s| can be an arbitrary byte string, with zero
// bytes inside, so implementation of this trailing '$' is expensive,
// and, actually, not needed.
//
// For example, if |s| is "abaaba", canonical BWT is:
//
// Sorted rotations: canonical BWT:
// $abaaba a
// a$abaab b
// aaba$ab b
// aba$aba a
// * abaaba$ $
// ba$abaa a
// baaba$a a
//
// where '*' denotes original string.
//
// Our implementation will sort rotations in a way as there is an
// implicit '$' that is less than any other byte in |s|, but does not
// return this '$'. Therefore, the order of rotations will be the same
// as above, without the first '$abaaba':
//
// Sorted rotations: ours BWT:
// aabaab b
// aabaab b
// abaaba a
// * abaaba a
// baabaa a
// baabaa a
//
// where '*' denotes the index of original string. As one can see,
// there are two 'abaaba' strings, but as mentioned, rotations are
// sorted like there is an implicit '$' at the end of the original
// string. It's possible to get from "ours BWT" to the "original BWT",
// see the code for details.
//
// Complexity: O(n) time and O(n) memory.
size_t BWT(size_t n, uint8_t const * s, uint8_t * r);
size_t BWT(std::string const & s, std::string & r);
// Inverse Burrows-Wheeler transform.
//
// Complexity: O(n) time and O(n) memory.
void RevBWT(size_t n, size_t start, uint8_t const * s, uint8_t * r);
void RevBWT(size_t start, std::string const & s, std::string & r);
} // namespace coding