note description: "UTF-8 encoding routines" library: "Gobo Eiffel Kernel Library" copyright: "Copyright (c) 2001-2018, Eric Bezault and others" license: "MIT License" date: "$Date: 2019-02-07 22:54:15 +0000 (Thu, 07 Feb 2019) $" revision: "$Revision: 102807 $" class interface UC_UTF8_ROUTINES create default_create feature -- Status report valid_utf8 (a_string: STRING_8): BOOLEAN -- Are the bytes in a_string a valid UTF-8 encoding? require a_string_not_void: a_string /= Void a_string_is_string: Any_.same_types (a_string, "") ensure instance_free: class is_encoded_first_byte (a_byte: CHARACTER_8): BOOLEAN -- Is a_byte the first byte in UTF-8 encoding? ensure instance_free: class is_encoded_next_byte (a_byte: CHARACTER_8): BOOLEAN -- Is a_byte one of the next bytes in UTF-8 encoding? ensure instance_free: class is_encoded_second_byte (a_byte, a_first_byte: CHARACTER_8): BOOLEAN -- Is a_byte a valid second byte in UTF-8 encoding? require valid_first_byte: is_encoded_first_byte (a_first_byte) ensure instance_free: class is_endian_detection_character (a_first, a_second, a_third: CHARACTER_8): BOOLEAN -- Is this sequence a UTF-8 Byte Order Marker (BOM)? ensure instance_free: class result_start: Result implies is_endian_detection_character_start (a_first, a_second) is_endian_detection_character_start (a_first, a_second: CHARACTER_8): BOOLEAN -- Are these characters the start of a UTF-8 encoded Byte Order Marker (BOM)? ensure instance_free: class feature -- Access encoded_first_value (a_byte: CHARACTER_8): INTEGER_32 -- Value encoded in first byte require is_encoded_first_byte: is_encoded_first_byte (a_byte) ensure instance_free: class value_positive: Result >= 0 value_small_enough: Result < 128 encoded_next_value (a_byte: CHARACTER_8): INTEGER_32 -- Value encoded in one of the next bytes require is_encoded_next_byte: is_encoded_next_byte (a_byte) ensure instance_free: class value_positive: Result >= 0 value_small_enough: Result < 64 feature -- Measurement encoded_byte_count (a_byte: CHARACTER_8): INTEGER_32 -- Number of bytes which were necessary to encode -- the unicode character whose first byte is a_byte require is_encoded_first_byte: is_encoded_first_byte (a_byte) ensure instance_free: class encoded_byte_code_large_enough: Result >= 1 encoded_byte_code_small_enough: Result <= 4 substring_byte_count (a_string: READABLE_STRING_GENERAL; start_index, end_index: INTEGER_32): INTEGER_32 -- Number of bytes needed to encode characters of -- a_string between start_index and end_index -- inclusive with the UTF-8 encoding require a_string_not_void: a_string /= Void valid_start_index: 1 <= start_index valid_end_index: end_index <= a_string.count meaningful_interval: start_index <= end_index + 1 ensure instance_free: class substring_byte_count_positive: Result >= 0 code_byte_count (a_code: INTEGER_32): INTEGER_32 -- Number of bytes needed to encode unicode character -- of code a_code with the UTF-8 encoding require valid_code: Unicode.valid_non_surrogate_code (a_code) ensure instance_free: class code_byte_count_large_enough: Result >= 1 code_byte_count_small_enough: Result <= 4 character_byte_count (c: CHARACTER_8): INTEGER_32 -- Number of bytes needed to encode character -- c with the UTF-8 encoding ensure instance_free: class character_byte_count_large_enough: Result >= 1 character_byte_count_small_enough: Result <= 4 feature -- Conversion to_utf8 (a_string: STRING_8): STRING_8 -- New STRING made up of bytes corresponding to -- the UTF-8 representation of a_string require a_string_not_void: a_string /= Void ensure instance_free: class to_utf8_not_void: Result /= Void string_type: Any_.same_types (Result, "") valid_utf8: valid_utf8 (Result) feature -- Element change append_code_to_utf8 (a_utf8: STRING_8; a_code: INTEGER_32) -- Add UTF-8 encoded character of code a_code -- at the end of a_utf8. require a_utf8_not_void: a_utf8 /= Void a_utf8_is_string: Any_.same_types (a_utf8, "") a_utf8_valid: valid_utf8 (a_utf8) valid_code: Unicode.valid_non_surrogate_code (a_code) ensure instance_free: class a_utf8_valid: valid_utf8 (a_utf8) end -- class UC_UTF8_ROUTINES
Generated by ISE EiffelStudio