note
	description: "UTF-8 encoding routines"
	library: "Gobo Eiffel Kernel Library"
	copyright: "Copyright (c) 2001-2018, Eric Bezault and others"
	license: "MIT License"
	date: "$Date: 2019-02-07 22:54:15 +0000 (Thu, 07 Feb 2019) $"
	revision: "$Revision: 102807 $"

class interface
	UC_UTF8_ROUTINES

create 
	default_create

feature -- Status report

	valid_utf8 (a_string: STRING_8): BOOLEAN
			-- Are the bytes in a_string a valid UTF-8 encoding?
		require
			a_string_not_void: a_string /= Void
			a_string_is_string: Any_.same_types (a_string, "")
		ensure
			instance_free: class

	is_encoded_first_byte (a_byte: CHARACTER_8): BOOLEAN
			-- Is a_byte the first byte in UTF-8 encoding?
		ensure
			instance_free: class

	is_encoded_next_byte (a_byte: CHARACTER_8): BOOLEAN
			-- Is a_byte one of the next bytes in UTF-8 encoding?
		ensure
			instance_free: class

	is_encoded_second_byte (a_byte, a_first_byte: CHARACTER_8): BOOLEAN
			-- Is a_byte a valid second byte in UTF-8 encoding?
		require
			valid_first_byte: is_encoded_first_byte (a_first_byte)
		ensure
			instance_free: class

	is_endian_detection_character (a_first, a_second, a_third: CHARACTER_8): BOOLEAN
			-- Is this sequence a UTF-8 Byte Order Marker (BOM)?
		ensure
			instance_free: class
			result_start: Result implies is_endian_detection_character_start (a_first, a_second)

	is_endian_detection_character_start (a_first, a_second: CHARACTER_8): BOOLEAN
			-- Are these characters the start of a UTF-8 encoded Byte Order Marker (BOM)?
		ensure
			instance_free: class
	
feature -- Access

	encoded_first_value (a_byte: CHARACTER_8): INTEGER_32
			-- Value encoded in first byte
		require
			is_encoded_first_byte: is_encoded_first_byte (a_byte)
		ensure
			instance_free: class
			value_positive: Result >= 0
			value_small_enough: Result < 128

	encoded_next_value (a_byte: CHARACTER_8): INTEGER_32
			-- Value encoded in one of the next bytes
		require
			is_encoded_next_byte: is_encoded_next_byte (a_byte)
		ensure
			instance_free: class
			value_positive: Result >= 0
			value_small_enough: Result < 64
	
feature -- Measurement

	encoded_byte_count (a_byte: CHARACTER_8): INTEGER_32
			-- Number of bytes which were necessary to encode
			-- the unicode character whose first byte is a_byte
		require
			is_encoded_first_byte: is_encoded_first_byte (a_byte)
		ensure
			instance_free: class
			encoded_byte_code_large_enough: Result >= 1
			encoded_byte_code_small_enough: Result <= 4

	substring_byte_count (a_string: READABLE_STRING_GENERAL; start_index, end_index: INTEGER_32): INTEGER_32
			-- Number of bytes needed to encode characters  of
			-- a_string between start_index and end_index
			-- inclusive with the UTF-8 encoding
		require
			a_string_not_void: a_string /= Void
			valid_start_index: 1 <= start_index
			valid_end_index: end_index <= a_string.count
			meaningful_interval: start_index <= end_index + 1
		ensure
			instance_free: class
			substring_byte_count_positive: Result >= 0

	code_byte_count (a_code: INTEGER_32): INTEGER_32
			-- Number of bytes needed to encode unicode character
			-- of code a_code with the UTF-8 encoding
		require
			valid_code: Unicode.valid_non_surrogate_code (a_code)
		ensure
			instance_free: class
			code_byte_count_large_enough: Result >= 1
			code_byte_count_small_enough: Result <= 4

	character_byte_count (c: CHARACTER_8): INTEGER_32
			-- Number of bytes needed to encode character
			-- c with the UTF-8 encoding
		ensure
			instance_free: class
			character_byte_count_large_enough: Result >= 1
			character_byte_count_small_enough: Result <= 4
	
feature -- Conversion

	to_utf8 (a_string: STRING_8): STRING_8
			-- New STRING made up of bytes corresponding to
			-- the UTF-8 representation of a_string
		require
			a_string_not_void: a_string /= Void
		ensure
			instance_free: class
			to_utf8_not_void: Result /= Void
			string_type: Any_.same_types (Result, "")
			valid_utf8: valid_utf8 (Result)
	
feature -- Element change

	append_code_to_utf8 (a_utf8: STRING_8; a_code: INTEGER_32)
			-- Add UTF-8 encoded character of code a_code
			-- at the end of a_utf8.
		require
			a_utf8_not_void: a_utf8 /= Void
			a_utf8_is_string: Any_.same_types (a_utf8, "")
			a_utf8_valid: valid_utf8 (a_utf8)
			valid_code: Unicode.valid_non_surrogate_code (a_code)
		ensure
			instance_free: class
			a_utf8_valid: valid_utf8 (a_utf8)
	
end -- class UC_UTF8_ROUTINES

Generated by ISE EiffelStudio