Mojibake
Try it online
You can try the library online by using the WASM-compiled version. It is a live demo that you can use to preview the API.
// Return the codepoint character
bool mjb_codepoint_character(mjb_codepoint codepoint, mjb_character *character);
// Normalize a string to NFC/NFKC/NFD/NFKD form
bool mjb_normalize(const char *buffer, size_t size, mjb_encoding encoding, mjb_normalization form, mjb_result *result);
// Return the next character from a string
bool mjb_next_character(const char *buffer, size_t size, mjb_encoding encoding, mjb_next_character_fn fn);
// Check if a string is normalized to NFC/NFKC/NFD/NFKD form
mjb_quick_check_result mjb_string_is_normalized(const char *buffer, size_t size, mjb_encoding encoding, mjb_normalization form);
// Filter a string to remove invalid characters
bool mjb_string_filter(const char *buffer, size_t size, mjb_encoding encoding, mjb_encoding output_encoding, mjb_filter filters, mjb_result *result);
// Return the string encoding (the most probable)
mjb_encoding mjb_string_encoding(const char *buffer, size_t size);
// Return true if the string is encoded in UTF-8
bool mjb_string_is_utf8(const char *buffer, size_t size);
// Return true if the string is encoded in UTF-16BE or UTF-16LE
bool mjb_string_is_utf16(const char *buffer, size_t size);
// Return true if the string is encoded in ASCII
bool mjb_string_is_ascii(const char *buffer, size_t size);
// Encode a codepoint to a string
unsigned int mjb_codepoint_encode(mjb_codepoint codepoint, char *buffer, size_t size, mjb_encoding encoding);
// Convert from an encoding to another
bool mjb_string_convert_encoding(const char *buffer, size_t size, mjb_encoding encoding, mjb_encoding output_encoding, mjb_result *result);
// Return the length of a string
size_t mjb_strnlen(const char *buffer, size_t max_length, mjb_encoding encoding);
// Compare two strings
int mjb_string_compare(const char *s1, size_t s1_length, mjb_encoding s1_encoding, const char *s2, size_t s2_length, mjb_encoding s2_encoding);
// Change string case
char *mjb_case(const char *buffer, size_t size, mjb_case_type type, mjb_encoding encoding);
// Return true if the codepoint is valid
bool mjb_codepoint_is_valid(mjb_codepoint codepoint);
// Return true if the codepoint is graphic
bool mjb_codepoint_is_graphic(mjb_codepoint codepoint);
// Return true if the codepoint is combining
bool mjb_codepoint_is_combining(mjb_codepoint codepoint);
// Return if the codepoint is an hangul syllable
bool mjb_codepoint_is_hangul_syllable(mjb_codepoint codepoint);
// Return if the codepoint is CJK ideograph
bool mjb_codepoint_is_cjk_ideograph(mjb_codepoint codepoint);
// Return true if the category is graphic
bool mjb_category_is_graphic(mjb_category category);
// Return true if the category is combining
bool mjb_category_is_combining(mjb_category category);
// Return the character block
bool mjb_codepoint_block(mjb_codepoint codepoint, mjb_block_info *block);
// Return the codepoint lowercase codepoint
mjb_codepoint mjb_codepoint_to_lowercase(mjb_codepoint codepoint);
// Return the codepoint uppercase codepoint
mjb_codepoint mjb_codepoint_to_uppercase(mjb_codepoint codepoint);
// Return the codepoint titlecase codepoint
mjb_codepoint mjb_codepoint_to_titlecase(mjb_codepoint codepoint);
// Unicode line break algorithm
mjb_line_break *mjb_break_line(const char *buffer, size_t size, mjb_encoding encoding, size_t *output_size);
// Word and grapheme cluster breaking
bool mjb_segmentation(const char *buffer, size_t size, mjb_encoding encoding);
// Return the plane of the codepoint
mjb_plane mjb_codepoint_plane(mjb_codepoint codepoint);
// Return true if the plane is valid
bool mjb_plane_is_valid(mjb_plane plane);
// Return the name of a plane, NULL if the place specified is not valid
const char *mjb_plane_name(mjb_plane plane, bool abbreviation);
// Return the emoji properties
bool mjb_codepoint_emoji(mjb_codepoint codepoint, mjb_emoji_properties *emoji);
// Output the current library version (MJB_VERSION)
const char *mjb_version(void);
// Output the current library version number (MJB_VERSION_NUMBER)
unsigned int mjb_version_number(void);
// Output the current supported unicode version (MJB_UNICODE_VERSION)
const char *mjb_unicode_version(void);