note
	description: "UTF-32 encoding routines"
	library: "Gobo Eiffel Kernel Library"
	copyright: "Copyright (c) 2005-2018, Colin Adams and others"
	license: "MIT License"
	date: "$Date: 2019-02-07 22:54:15 +0000 (Thu, 07 Feb 2019) $"
	revision: "$Revision: 102807 $"

class interface
	UC_UTF32_ROUTINES

create 
	default_create
			-- Process instances of classes with no creation clause.
			-- (Default: do nothing.)
			-- (from ANY)

feature -- Access

	Any_: KL_ANY_ROUTINES
			-- Routines that ought to be in class ANY
			-- (from KL_IMPORTED_ANY_ROUTINES)
		ensure -- from KL_IMPORTED_ANY_ROUTINES
			instance_free: class
			any_routines_not_void: Result /= Void

	Canonical_decomposition_mapping: INTEGER_32 = 0
			-- Decomposition mapping is canonical
			-- (from UC_UNICODE_CONSTANTS)

	Close_punctuation_category: INTEGER_32 = 15
			-- Close punctuation
			-- (from UC_UNICODE_CONSTANTS)

	code (first, second, third, fourth: INTEGER_32; least_endian: BOOLEAN): INTEGER_32
			-- Code point represented by four bytes
		require
			first_is_byte: is_byte (first)
			second_is_byte: is_byte (second)
			third_is_byte: is_byte (third)
			fourth_is_byte: is_byte (fourth)
		ensure
			instance_free: class
			code_not_negative: Result >= 0

	Compatibility_decomposition_mapping: INTEGER_32 = 16
			-- Decomposition mapping for unspecified compatibility character
			-- (from UC_UNICODE_CONSTANTS)

	Connector_punctuation_category: INTEGER_32 = 12
			-- Connector punctuation
			-- (from UC_UNICODE_CONSTANTS)

	Control_other_category: INTEGER_32 = 26
			-- Control character
			-- (from UC_UNICODE_CONSTANTS)

	Currency_symbol_category: INTEGER_32 = 20
			-- Currency symbol
			-- (from UC_UNICODE_CONSTANTS)

	Dash_punctuation_category: INTEGER_32 = 13
			-- Dash punctuation
			-- (from UC_UNICODE_CONSTANTS)

	Decimal_digit_number_category: INTEGER_32 = 9
			-- Decimal digit number
			-- (from UC_UNICODE_CONSTANTS)

	Encircled_decomposition_mapping: INTEGER_32 = 7
			-- Decomposition mapping for encircled form
			-- (from UC_UNICODE_CONSTANTS)

	Enclosing_mark_category: INTEGER_32 = 8
			-- Enclosing mark
			-- (from UC_UNICODE_CONSTANTS)

	Final_decomposition_mapping: INTEGER_32 = 5
			-- Decomposition mapping for Arabic final presentation form
			-- (from UC_UNICODE_CONSTANTS)

	Final_quote_punctuation_category: INTEGER_32 = 17
			-- Final_quote punctuation
			-- (from UC_UNICODE_CONSTANTS)

	Font_decomposition_mapping: INTEGER_32 = 1
			-- Decomposition mapping for font variant
			-- (from UC_UNICODE_CONSTANTS)

	Format_other_category: INTEGER_32 = 27
			-- Format character
			-- (from UC_UNICODE_CONSTANTS)

	Fraction_decomposition_mapping: INTEGER_32 = 15
			-- Decomposition mapping for vulgar fraction form
			-- (from UC_UNICODE_CONSTANTS)

	generating_type: TYPE [detachable UC_UTF32_ROUTINES]
			-- Type of current object
			-- (type of which it is a direct instance)
			-- (from ANY)
		ensure -- from ANY
			generating_type_not_void: Result /= Void

	generator: STRING_8
			-- Name of current object's generating class
			-- (base class of the type of which it is a direct instance)
			-- (from ANY)
		ensure -- from ANY
			generator_not_void: Result /= Void
			generator_not_empty: not Result.is_empty

	Initial_decomposition_mapping: INTEGER_32 = 3
			-- Decomposition mapping for Arabic initial presentation form
			-- (from UC_UNICODE_CONSTANTS)

	Initial_quote_punctuation_category: INTEGER_32 = 16
			-- Initial punctuation
			-- (from UC_UNICODE_CONSTANTS)

	Integer_: KL_INTEGER_ROUTINES
			-- Routines that ought to be in class INTEGER
			-- (from KL_IMPORTED_INTEGER_ROUTINES)
		ensure -- from KL_IMPORTED_INTEGER_ROUTINES
			instance_free: class
			integer_routines_not_void: Result /= Void

	Isolated_decomposition_mapping: INTEGER_32 = 6
			-- Decomposition mapping for Arabic isolated presentation form
			-- (from UC_UNICODE_CONSTANTS)

	Letter_number_category: INTEGER_32 = 10
			-- Letter number
			-- (from UC_UNICODE_CONSTANTS)

	Line_separator_category: INTEGER_32 = 24
			-- Line separator
			-- (from UC_UNICODE_CONSTANTS)

	Lowercase_letter_category: INTEGER_32 = 2
			-- Lower case letter
			-- (from UC_UNICODE_CONSTANTS)

	Math_symbol_category: INTEGER_32 = 19
			-- Mathematics symbol
			-- (from UC_UNICODE_CONSTANTS)

	Maximum_ascii_character: CHARACTER_8 = '%/127/'
			-- Largest ASCII character
			--		ensure
			--			definition: Result.code = maximum_ascii_code
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Maximum_ascii_character_code: INTEGER_32 = 127
			-- Largest code for ASCII characters
			-- (2^7 - 1)
			--		ensure
			--			definition: Result = 127
			--			small_enough: Result <= Platform.Maximum_byte_code
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Maximum_bmp_character_code: INTEGER_32 = 65535
			-- Largest code for unicode characters in Basic Multi-lingual Plane (FFFF);
			--		ensure
			--			definition: Result = 65535
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Maximum_unicode_character_code: INTEGER_32 = 1114111
			-- Largest code for unicode characters (10FFFF);
			-- Includes final two non-characters.
			--		ensure
			--			definition: Result = 1114111
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Maximum_unicode_surrogate_code: INTEGER_32 = 57343
			-- Highest unicode surrogate code-point (0xDFFF)
			--		ensure
			--			definition: Result = 57343
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Medial_decomposition_mapping: INTEGER_32 = 4
			-- Decomposition mapping for Arabic medial presentation form
			-- (from UC_UNICODE_CONSTANTS)

	Minimum_ascii_character: CHARACTER_8 = '%U'
			-- Smallest ASCII character
			--		ensure
			--			definition: Result.code = minimum_ascii_code
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Minimum_ascii_character_code: INTEGER_32 = 0
			-- Smallest code for ASCII characters
			--		ensure
			--			definition: Result = 0
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Minimum_unicode_character_code: INTEGER_32 = 0
			-- Smallest code for unicode characters
			--		ensure
			--			definition: Result = 0
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Minimum_unicode_surrogate_code: INTEGER_32 = 55296
			-- Lowest unicode surrogate code-point (0xD800)
			--		ensure
			--			definition: Result = 55296
			--		end
			-- (from UC_UNICODE_CONSTANTS)

	Modifier_letter_category: INTEGER_32 = 4
			-- Modifier letter
			-- (from UC_UNICODE_CONSTANTS)

	Modifier_symbol_category: INTEGER_32 = 21
			-- Modifier symbol
			-- (from UC_UNICODE_CONSTANTS)

	Narrow_decomposition_mapping: INTEGER_32 = 12
			-- Decomposition mapping for narrow (hankaku) compatibility character
			-- (from UC_UNICODE_CONSTANTS)

	No_break_decomposition_mapping: INTEGER_32 = 2
			-- Decomposition mapping for no-break variant
			-- (from UC_UNICODE_CONSTANTS)

	Non_spacing_mark_category: INTEGER_32 = 6
			-- Non-spacing mark
			-- (from UC_UNICODE_CONSTANTS)

	Open_punctuation_category: INTEGER_32 = 14
			-- Open punctuation
			-- (from UC_UNICODE_CONSTANTS)

	Other_letter_category: INTEGER_32 = 5
			-- Other letter
			-- (from UC_UNICODE_CONSTANTS)

	Other_number_category: INTEGER_32 = 11
			-- Other number
			-- (from UC_UNICODE_CONSTANTS)

	Other_punctuation_category: INTEGER_32 = 18
			-- Other punctuation
			-- (from UC_UNICODE_CONSTANTS)

	Other_symbol_category: INTEGER_32 = 22
			-- Other symbol
			-- (from UC_UNICODE_CONSTANTS)

	Paragraph_separator_category: INTEGER_32 = 25
			-- Paragraph separator
			-- (from UC_UNICODE_CONSTANTS)

	Platform: KL_PLATFORM
			-- Platform-dependent properties
			-- (from KL_SHARED_PLATFORM)
		ensure -- from KL_SHARED_PLATFORM
			instance_free: class
			platform_not_void: Result /= Void

	Private_other_category: INTEGER_32 = 29
			-- Private-use character
			-- (from UC_UNICODE_CONSTANTS)

	Small_decomposition_mapping: INTEGER_32 = 13
			-- Decomposition mapping for small variant form (CNS compatibility)
			-- (from UC_UNICODE_CONSTANTS)

	Space_separator_category: INTEGER_32 = 23
			-- Space separator
			-- (from UC_UNICODE_CONSTANTS)

	Spacing_combining_mark_category: INTEGER_32 = 7
			-- Spacing combining mark
			-- (from UC_UNICODE_CONSTANTS)

	Square_decomposition_mapping: INTEGER_32 = 14
			-- Decomposition mapping for CJK squared font variant
			-- (from UC_UNICODE_CONSTANTS)

	Subscript_decomposition_mapping: INTEGER_32 = 9
			-- Decomposition mapping for subscript form
			-- (from UC_UNICODE_CONSTANTS)

	Superscript_decomposition_mapping: INTEGER_32 = 8
			-- Decomposition mapping for superscript form
			-- (from UC_UNICODE_CONSTANTS)

	Surrogate_other_category: INTEGER_32 = 28
			-- Surrogate character
			-- (from UC_UNICODE_CONSTANTS)

	Titlecase_letter_category: INTEGER_32 = 3
			-- Title case letter
			-- (from UC_UNICODE_CONSTANTS)

	Unassigned_other_category: INTEGER_32 = 0
			-- Unassigned chacaracter
			-- (from UC_UNICODE_CONSTANTS)

	Unicode: UC_UNICODE_ROUTINES
			-- Unicode routines
			-- (from UC_IMPORTED_UNICODE_ROUTINES)
		ensure -- from UC_IMPORTED_UNICODE_ROUTINES
			instance_free: class
			unicode_not_void: Result /= Void

	Uppercase_letter_category: INTEGER_32 = 1
			-- Upper case letter
			-- (from UC_UNICODE_CONSTANTS)

	Vertical_decomposition_mapping: INTEGER_32 = 10
			-- Decomposition mapping for vertical layout presentation form
			-- (from UC_UNICODE_CONSTANTS)

	Wide_decomposition_mapping: INTEGER_32 = 11
			-- Decomposition mapping for wide (zenkaku) compatibility character
			-- (from UC_UNICODE_CONSTANTS)
	
feature -- Comparison

	frozen deep_equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void
			-- or attached to isomorphic object structures?
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			shallow_implies_deep: standard_equal (a, b) implies Result
			both_or_none_void: (a = Void) implies (Result = (b = Void))
			same_type: (Result and (a /= Void)) implies (b /= Void and then a.same_type (b))
			symmetric: Result implies deep_equal (b, a)

	frozen equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void or attached
			-- to objects considered equal?
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			definition: Result = (a = Void and b = Void) or else ((a /= Void and b /= Void) and then a.is_equal (b))

	frozen is_deep_equal (other: UC_UTF32_ROUTINES): BOOLEAN
			-- Are Current and other attached to isomorphic object structures?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			shallow_implies_deep: standard_is_equal (other) implies Result
			same_type: Result implies same_type (other)
			symmetric: Result implies other.is_deep_equal (Current)

	is_equal (other: UC_UTF32_ROUTINES): BOOLEAN
			-- Is other attached to an object considered
			-- equal to current object?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			symmetric: Result implies other ~ Current
			consistent: standard_is_equal (other) implies Result

	frozen standard_equal (a: detachable ANY; b: like arg #1): BOOLEAN
			-- Are a and b either both void or attached to
			-- field-by-field identical objects of the same type?
			-- Always uses default object comparison criterion.
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			definition: Result = (a = Void and b = Void) or else ((a /= Void and b /= Void) and then a.standard_is_equal (b))

	frozen standard_is_equal (other: UC_UTF32_ROUTINES): BOOLEAN
			-- Is other attached to an object of the same type
			-- as current object, and field-by-field identical to it?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			same_type: Result implies same_type (other)
			symmetric: Result implies other.standard_is_equal (Current)
	
feature -- Status report

	conforms_to (other: ANY): BOOLEAN
			-- Does type of current object conform to type
			-- of other (as per Eiffel: The Language, chapter 13)?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void

	same_type (other: ANY): BOOLEAN
			-- Is type of current object identical to type of other?
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			definition: Result = (conforms_to (other) and other.conforms_to (Current))

	valid_utf32 (a_string: STRING_8): BOOLEAN
			-- Are the bytes in a_string a valid UTF-32 encoding?
			-- 'a_string' has one byte per character.
			-- Default to big endian when no BOM.
		require
			a_string_not_void: a_string /= Void
			a_string_is_string: Any_.same_types (a_string, "")
		ensure
			instance_free: class
			empty_is_true: a_string.count = 0 implies Result
			utf32_count_multiple_of_four: Result implies ((a_string.count \\ 4) = 0)
	
feature -- Duplication

	copy (other: UC_UTF32_ROUTINES)
			-- Update current object using fields of object attached
			-- to other, so as to yield equal objects.
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
			type_identity: same_type (other)
		ensure -- from ANY
			is_equal: Current ~ other

	frozen deep_copy (other: UC_UTF32_ROUTINES)
			-- Effect equivalent to that of:
			--		copy (other . deep_twin)
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
		ensure -- from ANY
			deep_equal: deep_equal (Current, other)

	frozen deep_twin: UC_UTF32_ROUTINES
			-- New object structure recursively duplicated from Current.
			-- (from ANY)
		ensure -- from ANY
			deep_twin_not_void: Result /= Void
			deep_equal: deep_equal (Current, Result)

	frozen standard_copy (other: UC_UTF32_ROUTINES)
			-- Copy every field of other onto corresponding field
			-- of current object.
			-- (from ANY)
		require -- from ANY
			other_not_void: other /= Void
			type_identity: same_type (other)
		ensure -- from ANY
			is_standard_equal: standard_is_equal (other)

	frozen standard_twin: UC_UTF32_ROUTINES
			-- New object field-by-field identical to other.
			-- Always uses default copying semantics.
			-- (from ANY)
		ensure -- from ANY
			standard_twin_not_void: Result /= Void
			equal: standard_equal (Result, Current)

	frozen twin: UC_UTF32_ROUTINES
			-- New object equal to Current
			-- twin calls copy; to change copying/twinning semantics, redefine copy.
			-- (from ANY)
		ensure -- from ANY
			twin_not_void: Result /= Void
			is_equal: Result ~ Current
	
feature -- Basic operations

	frozen default: detachable UC_UTF32_ROUTINES
			-- Default value of object's type
			-- (from ANY)

	frozen default_pointer: POINTER
			-- Default value of type POINTER
			-- (Avoid the need to write p.default for
			-- some p of type POINTER.)
			-- (from ANY)
		ensure -- from ANY
			instance_free: class

	default_rescue
			-- Process exception for routines with no Rescue clause.
			-- (Default: do nothing.)
			-- (from ANY)

	frozen do_nothing
			-- Execute a null action.
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
	
feature -- Endian-ness detection

	Bom_be: STRING_8
			-- BOM in big-endian format
		ensure
			instance_free: class
			bom_be_not_void: Result /= Void
			four_bytes: Result.count = 4
			first_byte: Result.item_code (1) = 0
			second_byte: Result.item_code (2) = 0
			third_byte: Result.item_code (3) = Hex_fe
			fourth_byte: Result.item_code (4) = Hex_ff

	Bom_le: STRING_8
			-- BOM in little-endian format
		ensure
			instance_free: class
			bom_le_not_void: Result /= Void
			four_bytes: Result.count = 4
			first_byte: Result.item_code (1) = Hex_ff
			second_byte: Result.item_code (2) = Hex_fe
			third_byte: Result.item_code (3) = 0
			fourth_byte: Result.item_code (4) = 0

	is_byte (a: INTEGER_32): BOOLEAN
			-- Is a a byte?
		ensure
			instance_free: class
			definition: Result = (a >= 0 and a < Hex_100)

	is_endian_detection_character_least_first (first, second, third, fourth: INTEGER_32): BOOLEAN
			-- Do the four bytes represent the character
			-- 0xFEFF with first being the least significant byte?
		require
			first_is_byte: is_byte (first)
			second_is_byte: is_byte (second)
			third_is_byte: is_byte (third)
			fourth_is_byte: is_byte (fourth)
		ensure
			instance_free: class
			definition: Result = (first = Hex_ff and second = Hex_fe and third = 0 and fourth = 0)

	is_endian_detection_character_most_first (first, second, third, fourth: INTEGER_32): BOOLEAN
			-- Do the four bytes represent the character
			-- 0xFEFF with first being the most significant byte?
		require
			first_is_byte: is_byte (first)
			second_is_byte: is_byte (second)
			third_is_byte: is_byte (third)
			fourth_is_byte: is_byte (fourth)
		ensure
			instance_free: class
			definition: Result = (first = 0 and second = 0 and third = Hex_fe and fourth = Hex_ff)
	
feature -- Output

	Io: STD_FILES
			-- Handle to standard file setup
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			io_not_void: Result /= Void

	out: STRING_8
			-- New string containing terse printable representation
			-- of current object
			-- (from ANY)
		ensure -- from ANY
			out_not_void: Result /= Void

	print (o: detachable ANY)
			-- Write terse external representation of o
			-- on standard output.
			-- (from ANY)
		ensure -- from ANY
			instance_free: class

	frozen tagged_out: STRING_8
			-- New string containing terse printable representation
			-- of current object
			-- (from ANY)
		ensure -- from ANY
			tagged_out_not_void: Result /= Void
	
feature -- Platform

	Operating_environment: OPERATING_ENVIRONMENT
			-- Objects available from the operating system
			-- (from ANY)
		ensure -- from ANY
			instance_free: class
			operating_environment_not_void: Result /= Void
	
invariant
		-- from ANY
	reflexive_equality: standard_is_equal (Current)
	reflexive_conformance: conforms_to (Current)

end -- class UC_UTF32_ROUTINES

Generated by ISE EiffelStudio