utf_converter Contracts

Automatic generation produced by ISE Eiffel


Classes
Clusters
Cluster hierarchy
Chart
Relations
Text
Flat
Contracts
Go to: 
note
	description: "[
					Converter from/to UTF-8, UTF-16 and UTF-32 encodings.
		
					Handling of invalid encodings
					=============================
		
					Whenever a UTF-8 or UTF-16 sequence is decoded, the decoding routines also check
					that the sequence is valid. If it is not, it will replace the invalid unit (e.g. a byte
					for UTF-8 and a 2-byte for UTF-16 by the replacement character U+FFFD as described by
					variant #3 of the recommended practice for replacement character in Unicode (see
					http://www.unicode.org/review/pr-121.html for more details).
		
					However it means that you cannot roundtrip incorrectly encoded sequence back and forth
					between the encoded version and the decoded STRING_32 version. To allow roundtrip, an
					escaped representation of a bad encoded sequence has been introduced. It is adding a
					a fourth variant (which is a slight modification of variant #3) to the recommended
					practice where the replacement character is followed by the printed hexadecimal value
					of the invalid byte or the invalid 2-byte sequence.
					
					To provide an example (assuming that the Unicode character U+FFFD is represented as
					? textually):
					1 - on UNIX, any invalid UTF-8 byte sequence such as 0x8F 0x8F is encoded as the
					following Unicode sequence: U+FFFD U+0038 U+0046 U+FFFF U+0038 U+0046, and textually
					it looks like "?8F?8F".
					2 - on Windows, any invalid UTF-16 2-byte sequence such as 0xD800 0x0054 is encoded as the
					following Unicode sequence: U+FFFD U+0075 U+0044 U+0038 U+0030 U+0030 U+FFFD U+0035 U+0033,
					and textually it looks like "?uD800?54". The rule is that if the 2-byte sequence does not fit
					into 1 byte, it uses the letter `u' followed by the hexadecimal value of the 2-byte sequence,
					otherwise it simply uses the 1-byte hexadecimal representation.
	]"
	date: "$Date: 2014-04-29 14:26:38 -0700 (Tue, 29 Apr 2014) $"
	revision: "$Revision: 94920 $"

expanded class interface
	UTF_CONVERTER

create 
	default_create

feature -- Access

	Escape_character: CHARACTER_32 = '�'
			-- Unicode replacement character to escape invalid UTF-8 or UTF-16 encoding.
			-- UTF-8 encoding: 0xEF 0xBF 0xBD
			-- Binary UTF-8 encoding: 11101111 10111111 10111101
			-- UTF-16 encoding: 0xFFFD
	
feature -- Status report

	is_valid_utf_8_string_8 (s: READABLE_STRING_8): BOOLEAN
			-- Is `s' a valid UTF-8 Unicode sequence?

	is_valid_utf_16le_string_8 (s: READABLE_STRING_8): BOOLEAN
			-- Is `s' a valid UTF-16LE Unicode sequence?

	is_valid_utf_16_subpointer (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN): BOOLEAN
			-- Is `p' a valid UTF-16 Unicode sequence between code unit `start_pos' and `end_pos'?
			-- If `a_stop_at_null' we stop checking after finding a null character.

	is_valid_utf_16 (s: SPECIAL [NATURAL_16]): BOOLEAN
			-- Is `s' a valid UTF-16 Unicode sequence?
	
feature -- Measurement

	utf_8_bytes_count (s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Number of bytes necessary to encode in UTF-8 `s.substring (start_pos, end_pos)'.
			-- Note that this feature can be used for both escaped and non-escaped string.
			-- In the case of escaped strings, the result will be possibly higher than really needed.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count

	utf_16_characters_count_form_pointer (m: MANAGED_POINTER; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Number of characters of the UTF-16 encoded `m' starting at `start_pos' in `m' up to `end_pos - 1'.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 0
			end_position: start_pos <= end_pos + 2
			end_pos_small_enought: end_pos < m.count
			even_start_position: start_pos \\ 2 = 0
			even_end_position: end_pos \\ 2 = 0

	utf_16_bytes_count (s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Number of bytes necessary at the very least to encode in UTF-16 `s.substring (start_pos, end_pos)'.
			-- Note that this feature can be used for both escaped and non-escaped string.
			-- In the case of escaped strings, the result will be possibly higher than really needed.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count

	utf_8_to_string_32_count (s: SPECIAL [CHARACTER_8]; start_pos, end_pos: INTEGER_32): INTEGER_32
			-- Count of characters corresponding to UTF-8 sequence `s'.
		require
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < s.count
	
feature -- UTF-32 to UTF-8

	string_32_to_utf_8_string_8 (s: READABLE_STRING_32): STRING_8
			-- UTF-8 sequence corresponding to `s'.
		ensure
			roundtrip: utf_8_string_8_to_string_32 (Result).same_string (s)

	string_32_into_utf_8_string_8 (s: READABLE_STRING_32; a_result: STRING_8)
			-- Copy the UTF-8 sequence corresponding to `s' appended into `a_result'.
		ensure
			roundtrip: utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)

	utf_32_string_to_utf_8_string_8 (s: READABLE_STRING_GENERAL): STRING_8
			-- UTF-8 sequence corresponding to `s' interpreted as a UTF-32 sequence.
		ensure
			roundtrip: utf_8_string_8_to_string_32 (Result).same_string_general (s)

	utf_32_string_into_utf_8_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
			-- Copy the UTF-8 sequence corresponding to `s' interpreted as a UTF-32 sequence
			-- appended into `a_result'.
		ensure
			roundtrip: utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)

	escaped_utf_32_substring_into_utf_8_0_pointer (s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER_32; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
			-- Write UTF-8 sequence corresponding to `s', interpreted as a UTF-32 sequence that could
			-- be escaped, with terminating zero to address `p + p_offset' and update the size of `p' to the
			-- number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
			-- If `s' contains the Escape_character followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
			p_offset_non_negative: p_offset >= 0
		ensure
			roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string_general (s.substring (start_pos, end_pos))
			roundtrip: (a_new_upper = Void and then not s.substring (start_pos, end_pos).has ('%U'.to_character_32)) implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s.substring (start_pos, end_pos))

	escaped_utf_32_string_to_utf_8_string_8 (s: READABLE_STRING_GENERAL): STRING_8
			-- UTF-8 sequence corresponding to `s' interpreted as a UTF-32 sequence that could be escaped.
			-- If `s' contains the Escape_character followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		ensure
			roundtrip: utf_8_string_8_to_escaped_string_32 (Result).same_string_general (s)

	escaped_utf_32_string_into_utf_8_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
			-- Copy the UTF-8 sequence corresponding to `s' interpreted as a UTF-32 sequence that could
			-- be escaped appended into `a_result'.
			-- If `s' contains the Escape_character followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		ensure
			roundtrip: utf_8_string_8_to_escaped_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)

	string_32_into_utf_8_0_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
			-- Write UTF-8 sequence corresponding to `s' with terminating zero
			-- to address `p + p_offset' and update the size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
		require
			p_offset_non_negative: p_offset >= 0
		ensure
			roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s)

	utf_32_string_into_utf_8_0_pointer (s: READABLE_STRING_GENERAL; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
			-- Write UTF-8 sequence corresponding to `s', interpreted as a UTF-32 sequence,
			-- with terminating zero to address `p + p_offset' and update the size of `p' to the
			-- number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
		require
			p_offset_non_negative: p_offset >= 0
		ensure
			roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string_general (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s)

	utf_32_string_to_utf_8 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_8]
			-- UTF-8 sequence corresponding to `s', interpreted as a UTF-32 sequence.
			-- The sequence is not zero-terminated.
		ensure
			roundtrip: attached utf_32_string_to_utf_8_string_8 (s) as l_ref and then across
					Result as l_spec
				all
					l_spec.item.to_natural_32 = l_ref.code (l_spec.cursor_index)
				end

	utf_32_string_to_utf_8_0 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_8]
			-- UTF-8 sequence corresponding to `s', interpreted as a UTF-32 sequence.
			-- The sequence is zero-terminated.
		ensure
			roundtrip: attached utf_32_string_to_utf_8_string_8 (s) as l_ref and then across
					Result as l_spec
				all
					l_spec.item.to_natural_32 = l_ref.code (l_spec.cursor_index)
				end
	
feature -- UTF-8 to UTF-32

	utf_8_0_pointer_to_escaped_string_32 (p: MANAGED_POINTER): STRING_32
			-- STRING_32 object corresponding to UTF-8 sequence `p' which is zero-terminated,
			-- where invalid UTF-8 sequences are escaped.
		ensure
			roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (Result) as l_str and then across
					l_str as l_char
				all
					l_char.item = p.read_natural_8 (l_char.cursor_index - 1).to_character_8
				end

	utf_8_0_pointer_into_escaped_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-8 sequence `p' which is zero-terminated,
			-- where invalid UTF-8 sequences are escaped, appended into `a_result'.
		ensure
			roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as l_str and then across
					l_str as l_char
				all
					l_char.item = p.read_natural_8 (l_char.cursor_index - 1).to_character_8
				end

	utf_8_0_subpointer_to_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN): STRING_32
			-- STRING_32 object corresponding to UTF-8 sequence `p' between indexes `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null', where invalid
			-- UTF-8 sequences are escaped.
		require
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count
		ensure
			roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (Result) as l_str and then across
					l_str as l_char
				all
					l_char.item = p.read_natural_8 (start_pos + l_char.cursor_index - 1).to_character_8
				end

	utf_8_0_subpointer_into_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-8 sequence `p' between indexes `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null', where invalid
			-- UTF-8 sequences are escaped, appended into `a_result'.
		require
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count
		ensure
			roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as l_str and then across
					l_str as l_char
				all
					l_char.item = p.read_natural_8 (start_pos + l_char.cursor_index - 1).to_character_8
				end

	utf_8_string_8_to_string_32 (s: READABLE_STRING_8): STRING_32
			-- STRING_32 corresponding to UTF-8 sequence `s'.
		ensure
			roundtrip: is_valid_utf_8_string_8 (s) implies utf_32_string_to_utf_8_string_8 (Result).same_string (s)

	utf_8_string_8_into_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
			-- Copy STRING_32 corresponding to UTF-8 sequence `s' appended into `a_result'.
		ensure
			roundtrip: is_valid_utf_8_string_8 (s) implies utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)

	utf_8_string_8_to_escaped_string_32 (s: READABLE_STRING_8): STRING_32
			-- STRING_32 corresponding to UTF-8 sequence `s', where invalid UTF-8 sequences are escaped.
		ensure
			roundtrip: escaped_utf_32_string_to_utf_8_string_8 (Result).same_string (s)

	utf_8_string_8_into_escaped_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
			-- Copy STRING_32 corresponding to UTF-8 sequence `s', where invalid UTF-8 sequences are escaped,
			-- appended into `a_result'.
		ensure
			roundtrip: escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)
	
feature -- UTF-32 to UTF-16

	string_32_to_utf_16 (s: READABLE_STRING_32): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to `s'.
			-- The sequence is not zero-terminated.
		ensure
			roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then across
					Result as l_spec
				all
					l_spec.item.to_natural_32 = (l_ref.code (l_spec.cursor_index * 2 - 1) | (l_ref.code (l_spec.cursor_index * 2) |<< 16))
				end

	utf_32_string_to_utf_16 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to `s' interpreted as a UTF-32 sequence.
			-- The sequence is not zero-terminated.
		ensure
			roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then across
					Result as l_spec
				all
					l_spec.item.to_natural_32 = (l_ref.code (l_spec.cursor_index * 2 - 1) | (l_ref.code (l_spec.cursor_index * 2) |<< 8))
				end

	string_32_to_utf_16_0 (s: READABLE_STRING_32): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to `s' with terminating zero.
		ensure
			roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then across
					Result.resized_area_with_default (0, Result.count - 1) as l_spec
				all
					l_spec.item.to_natural_32 = (l_ref.code (l_spec.cursor_index * 2 - 1) | (l_ref.code (l_spec.cursor_index * 2) |<< 8))
				end

	utf_32_string_to_utf_16_0 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to `s', interpreted as a UTF-32 sequence,
			-- with terminating zero.
		ensure
			roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then across
					Result.resized_area_with_default (0, Result.count - 1) as l_spec
				all
					l_spec.item.to_natural_32 = (l_ref.code (l_spec.cursor_index * 2 - 1) | (l_ref.code (l_spec.cursor_index * 2) |<< 8))
				end

	string_32_into_utf_16_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
			-- Write UTF-16 sequence corresponding to `s' to address `p + p_offset'
			-- and update the size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is not zero-terminated.
		require
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		ensure
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string (s)

	string_32_into_utf_16_0_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
			-- Write UTF-16 sequence corresponding to `s' with terminating zero
			-- to address `p + p_offset' and update the size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
		require
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		ensure
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string (s)

	utf_32_substring_into_utf_16_pointer (s: READABLE_STRING_GENERAL; start_pos, end_pos: like {READABLE_STRING_32}.count; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
			-- Write UTF-16 sequence corresponding to the substring of `s',
			-- interpreted as a UTF-32 sequence, starting at index `start_pos'
			-- and ending at index `end_pos' to address `p + p_offset' and update the
			-- size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is not zero-terminated.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		ensure
			p_count_may_increase: p.count >= old p.count
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s)

	utf_32_substring_into_utf_16_0_pointer (s: READABLE_STRING_GENERAL; start_pos, end_pos: like {READABLE_STRING_32}.count; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
			-- Write UTF-16 sequence corresponding to the substring of `s',
			-- interpreted as a UTF-32 sequence, starting at index `start_pos'
			-- and ending at index `end_pos' to address `p + p_offset' and update the
			-- size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		ensure
			p_count_may_increase: p.count >= old p.count
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s)

	utf_32_string_to_utf_16le_string_8 (s: READABLE_STRING_GENERAL): STRING_8
			-- UTF-16LE sequence corresponding to `s' interpreted as a UTF-32 sequence
		ensure
			roundtrip: utf_16le_string_8_to_string_32 (Result).same_string_general (s)

	utf_32_string_into_utf_16le_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
			-- Copy UTF-16LE sequence corresponding to `s' interpreted as a UTF-32 sequence
			-- appended into `a_result'.
		ensure
			roundtrip: utf_16le_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)

	escaped_utf_32_substring_into_utf_16_0_pointer (s: READABLE_STRING_GENERAL; start_pos, end_pos: like {READABLE_STRING_32}.count; p: MANAGED_POINTER; p_offset: INTEGER_32; a_new_upper: detachable CELL [INTEGER_32])
			-- Write UTF-16 sequence corresponding to the substring of `s',
			-- interpreted as a UTF-32 sequence, starting at index `start_pos'
			-- and ending at index `end_pos' to address `p + p_offset' and update the
			-- size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is not zero-terminated.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		ensure
			p_count_may_increase: p.count >= old p.count
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_escaped_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s.substring (start_pos, end_pos))
			roundtrip: (a_new_upper = Void and then not s.substring (start_pos, end_pos).has ('%U'.to_character_32)) implies utf_16_0_subpointer_to_escaped_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s.substring (start_pos, end_pos))

	escaped_utf_32_string_to_utf_16le_string_8 (s: READABLE_STRING_GENERAL): STRING_8
			-- UTF-16LE sequence corresponding to `s' interpreted as a UTF-32 sequence that could be escaped.
			-- If `s' contains the Escape_character followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		ensure
			roundtrip: utf_16le_string_8_to_escaped_string_32 (Result).same_string_general (s)

	escaped_utf_32_string_into_utf_16le_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
			-- Copy UTF-16LE sequence corresponding to `s' interpreted as a UTF-32 sequence that could be
			-- escaped appended into `a_result'.
			-- If `s' contains the Escape_character followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		ensure
			roundtrip: utf_16le_string_8_to_escaped_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)
	
feature -- UTF-16 to UTF-32

	utf_16_0_pointer_to_string_32 (p: MANAGED_POINTER): STRING_32
			-- STRING_32 object corresponding to UTF-16 sequence `p' which is zero-terminated.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		ensure
			roundtrip: is_valid_utf_16_subpointer (p, 0, p.count // 2, True) implies across
					string_32_to_utf_16 (Result) as l_spec
				all
					l_spec.item = p.read_natural_16 (l_spec.cursor_index * 2)
				end

	utf_16_0_pointer_into_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-16 sequence `p' which is zero-terminated
			-- appended into `a_result'.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		ensure
			roundtrip: is_valid_utf_16_subpointer (p, 0, p.count // 2, True) implies across
					string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)) as l_spec
				all
					l_spec.item = p.read_natural_16 (l_spec.target_index * 2)
				end

	utf_16_0_subpointer_to_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN): STRING_32
			-- STRING_32 object corresponding to UTF-16 sequence `p' between code units `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null'.
		require
			minimum_size: p.count >= 2
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count // 2
		ensure
			roundtrip: is_valid_utf_16_subpointer (p, start_pos, end_pos, a_stop_at_null) implies across
					string_32_to_utf_16 (Result) as l_spec
				all
					l_spec.item = p.read_natural_16 (l_spec.target_index * 2)
				end

	utf_16_0_subpointer_into_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-16 sequence `p' between code units `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null' appended into `a_result'.
		require
			minimum_size: p.count >= 2
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count // 2
		ensure
			roundtrip: is_valid_utf_16_subpointer (p, start_pos, end_pos, a_stop_at_null) implies across
					string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)) as l_spec
				all
					l_spec.item = p.read_natural_16 (l_spec.target_index * 2)
				end

	utf_16_0_pointer_to_escaped_string_32 (p: MANAGED_POINTER): STRING_32
			-- STRING_32 object corresponding to UTF-16 sequence `p' which is zero-terminated,
			-- where invalid UTF-16LE sequences are escaped.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		ensure
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (Result) as l_utf and then across
					l_utf.new_cursor.incremented (1) as l_str
				all
					(l_utf.code (l_str.cursor_index) | (l_utf.code (l_str.cursor_index + 1) |<< 8)) = p.read_natural_16 (l_str.cursor_index - 1).to_natural_32
				end

	utf_16_0_pointer_into_escaped_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-16 sequence `p' which is zero-terminated,
			-- where invalid UTF-16LE sequences are escaped, appended into `a_result'.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		ensure
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as l_utf and then across
					l_utf.new_cursor.incremented (1) as l_str
				all
					(l_utf.code (l_str.cursor_index) | (l_utf.code (l_str.cursor_index + 1) |<< 8)) = p.read_natural_16 (l_str.cursor_index - 1).to_natural_32
				end

	utf_16_0_subpointer_to_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN): STRING_32
			-- STRING_32 object corresponding to UTF-16 sequence `p' between code units `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null', where invalid
			-- UTF-16LE sequences are escaped.
		require
			minimum_size: p.count >= 2
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count // 2
		ensure
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (Result) as l_utf and then across
					l_utf.new_cursor.incremented (1) as l_str
				all
					(l_utf.code (l_str.cursor_index) | (l_utf.code (l_str.cursor_index + 1) |<< 8)) = p.read_natural_16 (start_pos * 2 + l_str.cursor_index - 1).to_natural_32
				end

	utf_16_0_subpointer_into_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER_32; a_stop_at_null: BOOLEAN; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-16 sequence `p' between code units `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null', where invalid
			-- UTF-16LE sequences are escaped, appended into `a_result'.
		require
			minimum_size: p.count >= 2
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count // 2
		ensure
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as l_utf and then across
					l_utf.new_cursor.incremented (1) as l_str
				all
					(l_utf.code (l_str.cursor_index) | (l_utf.code (l_str.cursor_index + 1) |<< 8)) = p.read_natural_16 (start_pos * 2 + l_str.cursor_index - 1).to_natural_32
				end

	utf_16_to_string_32 (s: SPECIAL [NATURAL_16]): STRING_32
			-- STRING_32 object corresponding to UTF-16 sequence `s'.
		ensure
			roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (Result).is_equal (s)

	utf_16_into_string_32 (s: SPECIAL [NATURAL_16]; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-16 sequence `s'
			-- appended into `a_result'.
		ensure
			roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)).is_equal (s)

	utf_16le_string_8_to_string_32 (s: READABLE_STRING_8): STRING_32
			-- STRING_32 object corresponding to UTF-16LE sequence `s'.
		ensure
			roundtrip: is_valid_utf_16le_string_8 (s) implies escaped_utf_32_string_to_utf_16le_string_8 (Result).same_string (s)

	utf_16le_string_8_into_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-16LE sequence `s' appended into `a_result'.
		ensure
			roundtrip: is_valid_utf_16le_string_8 (s) implies escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)

	utf_16le_string_8_to_escaped_string_32 (s: READABLE_STRING_8): STRING_32
			-- STRING_32 object corresponding to UTF-16LE sequence `s', where invalid UTF-16LE
			-- sequences are escaped.
		ensure
			roundtrip: escaped_utf_32_string_to_utf_16le_string_8 (Result).same_string (s)

	utf_16le_string_8_into_escaped_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
			-- Copy STRING_32 object corresponding to UTF-16LE sequence `s', where invalid UTF-16LE
			-- sequences are escaped, appended into `a_result'.
		ensure
			roundtrip: escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)
	
feature -- UTF-16 to UTF-8

	utf_16_to_utf_8_string_8 (s: SPECIAL [NATURAL_16]): STRING_8
			-- UTF-8 sequence corresponding to UTF-16 sequence `s'.
		ensure
			roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (utf_8_string_8_to_string_32 (Result)).is_equal (s)

	utf_16_into_utf_8_string_8 (s: SPECIAL [NATURAL_16]; a_result: STRING_8)
			-- Copy UTF-8 sequence corresponding to UTF-16 sequence `s' appended into `a_result'.
		ensure
			roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count))).is_equal (s)

	utf_16le_string_8_to_utf_8_string_8 (s: READABLE_STRING_8): STRING_8
			-- UTF-8 sequence corresponding to UTF-16LE sequence `s'.
		ensure
			roundtrip: is_valid_utf_16le_string_8 (s) implies utf_32_string_to_utf_16le_string_8 (utf_8_string_8_to_string_32 (Result)).same_string (s)

	utf_16le_string_8_into_utf_8_string_8 (s: READABLE_STRING_8; a_result: STRING_8)
			-- Copy UTF-8 sequence corresponding to UTF-16LE sequence `s' appended into `a_result'.
		require
			even_count: (s.count & 1) = 0
		ensure
			roundtrip: is_valid_utf_16le_string_8 (s) implies utf_32_string_to_utf_16le_string_8 (utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count))).same_string (s)
	
feature -- UTF-8 to UTF-16

	utf_8_string_8_to_utf_16 (s: READABLE_STRING_8): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to UTF-8 sequence `s'.
		ensure
			roundtrip: is_valid_utf_8_string_8 (s) implies utf_16_to_utf_8_string_8 (Result).same_string (s)

	utf_8_string_8_to_utf_16_0 (s: READABLE_STRING_8): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to UTF-8 sequence `s' with terminating zero.
		ensure
			roundtrip: is_valid_utf_8_string_8 (s) implies utf_16_to_utf_8_string_8 (Result).same_string (s)
	
feature -- Byte Order Mark (BOM)

	Utf_8_bom_to_string_8: STRING_8 = "ï»¿"
			-- UTF-8 BOM sequence.

	Utf_16be_bom_to_string_8: STRING_8 = "þÿ"
			-- UTF-16BE BOM sequence.

	Utf_16le_bom_to_string_8: STRING_8 = "ÿþ"
			-- UTF-16LE BOM sequence.

	Utf_32be_bom_to_string_8: STRING_8 = "%U%Uþÿ"
			-- UTF-32BE BOM sequence.

	Utf_32le_bom_to_string_8: STRING_8 = "ÿþ%U%U"
			-- UTF-32LE BOM sequence.
	
note
	copyright: "Copyright (c) 1984-2014, Eiffel Software and others"
	license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
	source: "[
		Eiffel Software
		5949 Hollister Ave., Goleta, CA 93117 USA
		Telephone 805-685-1006, Fax 805-685-6869
		Website http://www.eiffel.com
		Customer support http://support.eiffel.com
	]"

end -- class UTF_CONVERTER

Classes
Clusters
Cluster hierarchy
Chart
Relations
Text
Flat
Contracts
Go to:
-- Generated by ISE Eiffel --
For more details: www.eiffel.com