note description: "Unicode strings" remark: "[ Unless specified otherwise, STRING and CHARACTER are supposed to contain characters whose code follows the unicode character set. In other words characters whose code is between 128 and 255 should follow the ISO 8859-1 Latin-1 character set. ]" remark2: "[ By default UC_STRING is implemented using the UTF-8 encoding. Use UC_UTF*_STRING to specify the encoding explicitly. ]" library: "Gobo Eiffel Kernel Library" copyright: "Copyright (c) 2001-2018, Eric Bezault and others" license: "MIT License" date: "$Date: 2019-02-07 22:54:15 +0000 (Thu, 07 Feb 2019) $" revision: "$Revision: 102807 $" class interface UC_STRING create make, make_empty, make_from_string, make_from_utf8, make_filled, make_filled_code, make_filled_unicode, make_from_substring, make_from_utf16, make_from_utf16le, make_from_utf16be, make_from_string_general, make_from_substring_general feature -- Initialization make (suggested_capacity: INTEGER_32) -- Create empty string, or remove all characters from -- existing string. -- (Extended from ELKS 2001 STRING) ensure then byte_count_set: byte_count = 0 byte_capacity_set: byte_capacity >= suggested_capacity make_from_string (a_string: STRING_8) -- Initialize from the character sequence of a_string. -- (ELKS 2001 STRING) ensure then same_unicode: same_unicode_string (a_string) feature -- Access unicode_item (i: INTEGER_32): UC_CHARACTER -- Unicode character at index i; -- Return a new object at each call require valid_index: valid_index (i) ensure item_not_void: Result /= Void code_set: Result.code = item_code (i) item_code (i: INTEGER_32): INTEGER_32 -- Code of character at index i ensure then item_code_not_negative: Result >= 0 valid_item_code: Unicode.valid_code (Result) item (i: INTEGER_32): CHARACTER_8 -- Character at index i; -- '%U' if the unicode character at index -- i cannot fit into a CHARACTER -- (Extended from ELKS 2001 STRING) -- Note: Use item_code instead of this routine when Current -- can contain characters which may not fit into a CHARACTER. ensure then code_small_enough: item_code (i) <= Platform.Maximum_character_code implies Result.code = item_code (i) overflow: item_code (i) > Platform.Maximum_character_code implies Result = '%U' at alias "@" (i: INTEGER_32): CHARACTER_8 -- Character at index i -- (ELKS 2001 STRING) ensure then definition: Result = item (i) substring (start_index, end_index: INTEGER_32): like Current -- New object containing all characters -- from start_index to end_index inclusive -- (ELKS 2001 STRING) ensure then first_unicode_item: Result.count > 0 implies Result.item_code (1) = item_code (start_index) unicode_substring_index (other: READABLE_STRING_GENERAL; start_index: INTEGER_32): INTEGER_32 -- Index of first occurrence of other at or after start_index; -- 0 if none require other_not_void: other /= Void valid_start_index: start_index >= 1 and start_index <= count + 1 ensure valid_result: Result = 0 or else (start_index <= Result and Result <= count - other.count + 1) zero_if_absent: (Result = 0) = not substring (start_index, count).has_unicode_substring (other) at_this_index: Result >= start_index implies substring (Result, Result + other.count - 1).same_unicode_string (other) none_before: Result > start_index implies not substring (start_index, Result + other.count - 2).has_unicode_substring (other) substring_index (other: STRING_8; start_index: INTEGER_32): INTEGER_32 -- Index of first occurrence of other at or after start_index; -- 0 if none. other and Current are considered with their -- characters which do not fit in a CHARACTER replaced by a '%U' -- (Extended from ELKS 2001 STRING) string: STRING_8 -- New STRING having the same character sequence as Current -- where characters which do not fit in a CHARACTER are -- replaced by a '%U' -- (Extended from ELKS 2001 STRING) plus alias "+" (other: READABLE_STRING_8): like Current -- New object which is a clone of Current extended -- by the characters of other -- (ELKS 2001 STRING) ensure then final_unicode: Result.substring (count + 1, count + other.count).same_unicode_string (other) prefixed_string (other: STRING_8): like Current -- New object which is a clone of Current preceded -- by the characters of other require other_not_void: other /= Void ensure prefixed_string_not_void: Result /= Void prefixed_string_count: Result.count = other.count + count initial: Result.substring (1, other.count).same_unicode_string (other) final: Result.substring (other.count + 1, Result.count).is_equal (Current) index_of_unicode (c: UC_CHARACTER; start_index: INTEGER_32): INTEGER_32 -- Index of first occurrence of c at or after start_index; -- 0 if none require c_not_void: c /= Void valid_start_index: start_index >= 1 and start_index <= count + 1 ensure valid_result: Result = 0 or (start_index <= Result and Result <= count) zero_if_absent: (Result = 0) = not substring (start_index, count).has_unicode (c) found_if_present: substring (start_index, count).has_unicode (c) implies item_code (Result) = c.code none_before: substring (start_index, count).has_unicode (c) implies not substring (start_index, Result - 1).has_unicode (c) index_of_item_code (a_code: INTEGER_32; start_index: INTEGER_32): INTEGER_32 -- Index of first occurrence of unicode character with -- code a_code at or after start_index; 0 if none require valid_start_index: start_index >= 1 and start_index <= count + 1 valid_code: Unicode.valid_code (a_code) ensure valid_result: Result = 0 or (start_index <= Result and Result <= count) zero_if_absent: (Result = 0) = not substring (start_index, count).has_item_code (a_code) found_if_present: substring (start_index, count).has_item_code (a_code) implies item_code (Result) = a_code none_before: substring (start_index, count).has_item_code (a_code) implies not substring (start_index, Result - 1).has_item_code (a_code) index_of (c: CHARACTER_8; start_index: INTEGER_32): INTEGER_32 -- Index of first occurrence of character c -- at or after start_index; 0 if none -- (ELKS 2001 STRING) hash_code: INTEGER_32 -- Hash code -- (ELKS 2001 STRING) new_empty_string (suggested_capacity: INTEGER_32): like Current -- New empty string with same dynamic type as Current require non_negative_suggested_capacity: suggested_capacity >= 0 ensure new_string_not_void: Result /= Void same_type: Any_.same_types (Result, Current) new_string_empty: Result.count = 0 byte_count_set: Result.byte_count = 0 byte_capacity_set: Result.byte_capacity >= suggested_capacity feature -- Measurement unicode_occurrences (c: UC_CHARACTER): INTEGER_32 -- Number of times c appears in the string require c_not_void: c /= Void ensure zero_if_empty: count = 0 implies Result = 0 recurse_if_not_found_at_first_position: (count > 0 and then item_code (1) /= c.code) implies Result = substring (2, count).unicode_occurrences (c) recurse_if_found_at_first_position: (count > 0 and then item_code (1) = c.code) implies Result = 1 + substring (2, count).unicode_occurrences (c) code_occurrences (a_code: INTEGER_32): INTEGER_32 -- Number of times unicode character of code -- a_code appears in the string require valid_code: Unicode.valid_code (a_code) ensure zero_if_empty: count = 0 implies Result = 0 recurse_if_not_found_at_first_position: (count > 0 and then item_code (1) /= a_code) implies Result = substring (2, count).code_occurrences (a_code) recurse_if_found_at_first_position: (count > 0 and then item_code (1) = a_code) implies Result = 1 + substring (2, count).code_occurrences (a_code) occurrences (c: CHARACTER_8): INTEGER_32 -- Number of times character c appears in the string -- (ELKS 2001 STRING) count: INTEGER_32 -- Number of characters -- (ELKS 2001 STRING) byte_count: INTEGER_32 -- Number of bytes in internal storage byte_capacity: INTEGER_32 -- Maximum number of bytes that can be put in -- internal storage feature -- Status report has_unicode (c: UC_CHARACTER): BOOLEAN -- Does Current contain c? require c_not_void: c /= Void ensure false_if_empty: count = 0 implies not Result true_if_first: count > 0 and then item_code (1) = c.code implies Result recurse: (count > 0 and then item_code (1) /= c.code) implies (Result = substring (2, count).has_unicode (c)) has_item_code (a_code: INTEGER_32): BOOLEAN -- Does Current contain the unicode character of code a_code? require valid_code: Unicode.valid_code (a_code) ensure false_if_empty: count = 0 implies not Result true_if_first: count > 0 and then item_code (1) = a_code implies Result recurse: (count > 0 and then item_code (1) /= a_code) implies (Result = substring (2, count).has_item_code (a_code)) has (c: CHARACTER_8): BOOLEAN -- Does Current contain character c? -- (ELKS 2001 STRING) has_unicode_substring (other: READABLE_STRING_GENERAL): BOOLEAN -- Does Current contain other? require other_not_void: other /= Void ensure false_if_too_small: count < other.count implies not Result true_if_initial: (count >= other.count and then substring (1, other.count).same_unicode_string (other)) implies Result recurse: (count >= other.count and then not substring (1, other.count).same_unicode_string (other)) implies (Result = substring (2, count).has_unicode_substring (other)) has_substring (other: STRING_8): BOOLEAN -- Does Current contain other? -- other and Current are considered with their characters -- which do not fit in a CHARACTER replaced by a '%U'. -- (Extented from ELKS 2001 STRING) is_empty: BOOLEAN -- Is string empty? -- (ELKS 2001 STRING) is_ascii: BOOLEAN -- Does string contain only ASCII characters? ensure empty: count = 0 implies Result recurse: count > 0 implies Result = (unicode_item (1).is_ascii and substring (2, count).is_ascii) feature -- Comparison is_equal (other: like Current): BOOLEAN -- Is other attached to an object considered equal -- to current object? -- (Extended from ELKS 2001 STRING) ensure then unicode_definition: Result = (Any_.same_types (Current, other) and then count = other.count and then (count > 0 implies (item_code (1) = other.item_code (1) and substring (2, count).is_equal (other.substring (2, count))))) is_less alias "<" (other: like Current): BOOLEAN -- Is string lexicographically lower than other? -- (Extended from ELKS 2001 STRING, inherited from COMPARABLE) ensure then unicode_definition: Result = (count = 0 and other.count > 0 or count > 0 and then other.count > 0 and then (item_code (1) < other.item_code (1) or item_code (1) = other.item_code (1) and substring (2, count) < other.substring (2, other.count))) same_string (other: READABLE_STRING_8): BOOLEAN -- Do Current and other have the same character sequence? -- Current is considered with its characters which do not -- fit in a CHARACTER replaced by a '%U'. -- (Extended from ELKS 2001 STRING) same_string_general (other: READABLE_STRING_GENERAL): BOOLEAN -- Do Current and other have the same character sequence? -- Current is considered with its characters which do not -- fit in a CHARACTER replaced by a '%U'. -- (Extended from ELKS 2001 STRING) same_unicode_string (other: READABLE_STRING_GENERAL): BOOLEAN -- Do Current and other have the same unicode character sequence? require other_not_void: other /= Void ensure definition: Result = (count = other.count and then (count > 0 implies (code (1) = other.code (1) and (count >= 2 implies substring (2, count).same_unicode_string (other.substring (2, count)))))) three_way_comparison (other: like Current): INTEGER_32 -- If current object equal to other, 0; -- if smaller, -1; if greater, 1 -- (ELKS 2001 STRING, inherited from COMPARABLE) -- Note: there is a bug in the specification of the -- contracts of three_way_comparison inherited -- from COMPARABLE. This routine cannot satisfy -- its postconditions if other is not of the -- same type as Current because the postcondition -- uses is_equal and is_equal has a postcondition -- inherited from ANY which says if it returns true -- then other has the same type as Current. three_way_unicode_comparison (other: STRING_8): INTEGER_32 -- If current object equal to other, 0; -- if smaller, -1; if greater, 1 -- Note: there is a bug in the specification of the -- contracts of three_way_comparison inherited -- from COMPARABLE. This routine cannot satisfy -- its postconditions if other is not of the -- same type as Current because the postcondition -- uses is_equal and is_equal has a postcondition -- inherited from ANY which says if it returns true -- then other has the same type as Current. -- three_way_unicode_comparison solves this problem -- and make the comparison polymorphically safe by -- changing the signature from 'like Current' to -- 'STRING' and by using same_unicode_string instead -- of is_equal in its postcondition. require other_not_void: other /= Void ensure equal_zero: (Result = 0) = same_unicode_string (other) feature -- Element change put_unicode (c: UC_CHARACTER; i: INTEGER_32) -- Replace unicode character at index i by c. require valid_index: valid_index (i) c_not_void: c /= Void ensure stable_count: count = old count replaced: item_code (i) = c.code stable_before_i: substring (1, i - 1).is_equal (old substring (1, i - 1)) stable_after_i: substring (i + 1, count).is_equal (old substring (i + 1, count)) put_item_code (a_code: INTEGER_32; i: INTEGER_32) -- Replace unicode character at index i -- by unicode character of code a_code. require valid_index: valid_index (i) valid_item_code: Unicode.valid_code (a_code) ensure stable_count: count = old count replaced: item_code (i) = a_code stable_before_i: substring (1, i - 1).is_equal (old substring (1, i - 1)) stable_after_i: substring (i + 1, count).is_equal (old substring (i + 1, count)) put (c: CHARACTER_8; i: INTEGER_32) -- Replace unicode character at index i by character c. -- (ELKS 2001 STRING) ensure then unicode_replaced: item_code (i) = c.code prepend (s: READABLE_STRING_8) -- Prepend a copy of s at front. prepend_string (s: detachable STRING_8) -- Prepend a copy of s, if not void, at front. append_string_general (s: READABLE_STRING_GENERAL) -- Append a copy of s at end. append_unicode_character (c: UC_CHARACTER) -- Append unicode character c at end. require c_not_void: c /= Void ensure new_count: count = old count + 1 appended: item_code (count) = c.code stable_before: substring (1, count - 1).is_equal (old cloned_string) append_item_code (a_code: INTEGER_32) -- Append unicode character of code a_code at end. require valid_item_code: Unicode.valid_code (a_code) ensure new_count: count = old count + 1 appended: item_code (count) = a_code stable_before: substring (1, count - 1).is_equal (old cloned_string) append_character (c: CHARACTER_8) -- Append character c at end. -- (ELKS 2001 STRING) ensure then unicode_appended: item_code (count) = c.code append_string (s: detachable READABLE_STRING_8) -- Append a copy of s at end. put_string (a_string: STRING_8) -- Write a_string to output stream. append (a_string: READABLE_STRING_8) -- Append a copy of a_string at end. -- (ELKS 2001 STRING) gobo_append_substring (a_string: STRING_8; s, e: INTEGER_32) -- Append substring of a_string between indexes -- s and e at end of current string. require a_string_not_void: a_string /= Void s_large_enough: s >= 1 e_small_enough: e <= a_string.count valid_interval: s <= e + 1 ensure then appended: is_equal (old cloned_string + old a_string.substring (s, e)) put_substring (a_string: STRING_8; s, e: INTEGER_32) -- Write substring of a_string between indexes -- s and e to output stream. append_utf8 (s: STRING_8) -- Append UTF-8 encoded string s at end of current string. require s_not_void: s /= Void s_is_string: Any_.same_types (s, "") valid_utf8: Utf8.valid_utf8 (s) append_utf16 (s: STRING_8) -- Append UTF-16 encoded string s at end of current string. require s_not_void: s /= Void s_is_string: Any_.same_types (s, "") valid_utf16: Utf16.valid_utf16 (s) append_utf16be (s: STRING_8) -- Append UTF-16BE encoded string s at end of current string. require s_not_void: s /= Void s_is_string: Any_.same_types (s, "") valid_utf16be: Utf16.valid_utf16be (s) append_utf16le (s: STRING_8) -- Append UTF-16LE encoded string s at end of current string. require s_not_void: s /= Void s_is_string: Any_.same_types (s, "") valid_utf16le: Utf16.valid_utf16le (s) fill_with_unicode (c: UC_CHARACTER) -- Replace every character with unicode character c. require c_not_void: c /= Void ensure same_count: old count = count filled: unicode_occurrences (c) = count fill_with_code (a_code: INTEGER_32) -- Replace every character with unicode character of code a_code. require valid_code: Unicode.valid_code (a_code) ensure same_count: old count = count filled: code_occurrences (a_code) = count fill_with (c: CHARACTER_8) -- Replace every character with character c. -- (ELKS 2001 STRING) ensure then all_code: code_occurrences (c.code) = count insert_unicode_character (c: UC_CHARACTER; i: INTEGER_32) -- Insert unicode character c at index i, shifting -- characters between ranks i and count rightwards. require c_not_void: c /= Void valid_insertion_index: 1 <= i and i <= count + 1 ensure one_more_character: count = old count + 1 inserted: item_code (i) = c.code stable_before_i: substring (1, i - 1).is_equal (old substring (1, i - 1)) stable_after_i: substring (i + 1, count).is_equal (old substring (i, count)) insert_code (a_code: INTEGER_32; i: INTEGER_32) -- Insert unicode character of code a_code -- at index i, shifting characters between -- ranks i and count rightwards. require valid_code: Unicode.valid_code (a_code) valid_insertion_index: 1 <= i and i <= count + 1 ensure one_more_character: count = old count + 1 inserted: item_code (i) = a_code stable_before_i: substring (1, i - 1).is_equal (old substring (1, i - 1)) stable_after_i: substring (i + 1, count).is_equal (old substring (i, count)) insert_character (c: CHARACTER_8; i: INTEGER_32) -- Insert character c at index i, shifting -- characters between ranks i and count rightwards. -- (ELKS 2001 STRING) ensure then code_inserted: item_code (i) = c.code insert_string (a_string: READABLE_STRING_8; i: INTEGER_32) -- Insert a_string at index i, shifting characters between ranks -- i and count rightwards. -- (ELKS 2001 STRING) require else string_not_void: a_string /= Void valid_insertion_index: 1 <= i and i <= count + 1 replace_substring (a_string: STRING_8; start_index, end_index: INTEGER_32) -- Replace the substring from start_index to end_index, -- inclusive, with a_string. -- (ELKS 2001 STRING) replace_substring_by_string (a_string: STRING_8; start_index, end_index: INTEGER_32) -- Replace the substring from start_index to end_index, -- inclusive, with a_string. require a_string_not_void: a_string /= Void replace_substring_all (original, new: like Current) -- Replace every occurrence of original with new. feature -- Removal keep_head (n: INTEGER_32) -- Remove all the characters except for the first n; -- if n > count, do nothing. -- (ELKS 2001 STRING) keep_tail (n: INTEGER_32) -- Remove all the characters except for the last n; -- if n > count, do nothing. -- (ELKS 2001 STRING) remove_head (n: INTEGER_32) -- Remove the first n characters; -- if n > count, remove all. -- (ELKS 2001 STRING) remove_tail (n: INTEGER_32) -- Remove the last n characters; -- if n > count, remove all. -- (ELKS 2001 STRING) remove (i: INTEGER_32) -- Remove i-th character, shifting characters between -- ranks i + 1 and count leftwards. -- (ELKS 2001 STRING) remove_substring (start_index, end_index: INTEGER_32) -- Remove all characters from start_index -- to end_index inclusive. -- (ELKS 2001 STRING) wipe_out -- Remove all characters. -- (ELKS 2001 STRING) feature -- Duplication copy (other: like Current) -- Reinitialize by copying the characters of other. -- (This is also used by clone.) -- (ELKS 2001 STRING) cloned_string: like Current -- New object equal to Current ensure twin_not_void: Result /= Void is_equal: Result.is_equal (Current) feature -- Output out: STRING_8 -- New STRING containing terse printable representation -- of current object; Non-ascii characters are represented -- with the %/code/ convention. -- (ELKS 2001 STRING) debug_output: STRING_8 -- String that should be displayed in debugger to represent Current. feature -- Conversion as_lower: like Current -- New object with all letters in lower case -- (Extended from ELKS 2001 STRING) ensure then unicode_anchor: count > 0 implies Result.unicode_item (1).is_equal (unicode_item (1).as_lower) unicode_recurse: count > 1 implies Result.substring (2, count).is_equal (substring (2, count).as_lower) as_upper: like Current -- New object with all letters in upper case -- (Extended from ELKS 2001 STRING) ensure then unicode_anchor: count > 0 implies Result.unicode_item (1).is_equal (unicode_item (1).as_upper) unicode_recurse: count > 1 implies Result.substring (2, count).is_equal (substring (2, count).as_upper) to_lower -- Convert all letters to lower case. -- (ELKS 2001 STRING) to_upper -- Convert all letters to upper case. -- (ELKS 2001 STRING) to_utf8: STRING_8 -- New STRING made up of bytes corresponding to -- the UTF-8 representation of current string ensure to_utf8_not_void: Result /= Void string_type: Any_.same_types (Result, "") valid_utf8: Utf8.valid_utf8 (Result) to_utf16_be: STRING_8 -- New STRING made up of bytes corresponding to -- the UTF-16BE representation of current string ensure to_utf16_be_not_void: Result /= Void string_type: Any_.same_types (Result, "") valid_utf16: Utf16.valid_utf16 (Result) to_utf16_le: STRING_8 -- New STRING made up of bytes corresponding to -- the UTF-16LE representation of current string ensure to_utf16_le_not_void: Result /= Void string_type: Any_.same_types (Result, "") valid_utf16: Utf16.valid_utf16 (Utf16.Bom_le + Result) to_utf32_be: STRING_8 -- New STRING made up of bytes corresponding to -- the UTF-32BE representation of current string ensure to_utf32_be_not_void: Result /= Void string_type: Any_.same_types (Result, "") valid_utf32: Utf32.valid_utf32 (Result) to_utf32_le: STRING_8 -- New STRING made up of bytes corresponding to -- the UTF-32LE representation of current string ensure to_utf32_le_not_void: Result /= Void string_type: Any_.same_types (Result, "") valid_utf32: Utf32.valid_utf32 (Utf32.Bom_le + Result) as_string: STRING_8 -- STRING version of current string; -- Return the UTF8 representation if it is encoded -- with UTF8, the UTF16 representation if it is -- encoded with UTF16, etc. ensure as_string_not_void: Result /= Void string_type: Any_.same_types (Result, "") feature -- Output stream Name: STRING_8 = "UC_STRING" -- Name of output stream Eol: STRING_8 = "%N" -- Line separator is_open_write: BOOLEAN -- Can characters be written to output stream? flush -- Do nothing (operation does not apply to string). feature -- Traversal item_code_at_byte_index (i: INTEGER_32): INTEGER_32 -- Code of character at byte index i require i_large_enough: i >= 1 i_small_enough: i <= byte_count is_encoded_first_byte: is_encoded_first_byte (i) ensure valid_item_code: Unicode.valid_code (Result) character_item_at_byte_index (i: INTEGER_32): CHARACTER_8 -- Character at byte_index i; -- '%U' is the unicode character at byte index -- i cannot fit into a CHARACTER require i_large_enough: i >= 1 i_small_enough: i <= byte_count is_encoded_first_byte: is_encoded_first_byte (i) ensure code_small_enough: item_code_at_byte_index (i) <= Platform.Maximum_character_code implies Result.code = item_code_at_byte_index (i) overflow: item_code_at_byte_index (i) > Platform.Maximum_character_code implies Result = '%U' next_byte_index (i: INTEGER_32): INTEGER_32 -- Byte index of unicode character after character -- at byte index i; Return 'byte_count + 1' if -- character at byte index i is the last character -- in the string require i_large_enough: i >= 1 i_small_enough: i <= byte_count is_encoded_first_byte: is_encoded_first_byte (i) ensure next_byte_index_large_enough: Result > i next_byte_index_small_enough: Result <= byte_count + 1 shifted_byte_index (i: INTEGER_32; n: INTEGER_32): INTEGER_32 -- Byte index of unicode character n positions after -- character at byte index i; Return 'byte_count + 1' -- if no such character in the string require i_large_enough: i >= 1 i_small_enough: i <= byte_count is_encoded_first_byte: is_encoded_first_byte (i) n_positive: n >= 0 ensure next_byte_index_large_enough: Result >= i next_byte_index_small_enough: Result <= byte_count + 1 byte_index (i: INTEGER_32): INTEGER_32 -- Byte index of character at index i require valid_index: valid_index (i) ensure byte_index_large_enough: Result >= 1 byte_index_small_enough: Result <= byte_count is_encoded_first_byte: is_encoded_first_byte (Result) is_encoded_first_byte (i: INTEGER_32): BOOLEAN -- Is byte at index i the first byte of an encoded unicode character? require i_large_enough: i >= 1 i_small_enough: i <= byte_count feature -- Implementation current_string: STRING_8 -- Current string invariant non_negative_byte_count: byte_count >= 0 byte_count_small_enough: byte_count <= byte_capacity count_small_enough: count <= byte_count end -- class UC_STRING
Generated by ISE EiffelStudio