diff --git a/gpr/common.gpr b/gpr/common.gpr index 72dc35d..973ecb0 100644 --- a/gpr/common.gpr +++ b/gpr/common.gpr @@ -6,6 +6,7 @@ abstract project Common is "-gnato", "-O2", "-ggdb", + "-gnata", "-gnatW8", "-gnatyAabefikltx" ); diff --git a/source/library/encodings-unicode-utf_16-from_utf_32.adb b/source/library/encodings-unicode-utf_16-from_utf_32.adb index 57e1e04..8a97e49 100644 --- a/source/library/encodings-unicode-utf_16-from_utf_32.adb +++ b/source/library/encodings-unicode-utf_16-from_utf_32.adb @@ -18,11 +18,12 @@ package body Encodings.Unicode.UTF_16.From_UTF_32 is begin Source_Last := Source'First - 1; Target_Last := Target'First - 1; - while Source_Last < Source'Last and Target_Last < Target'Last loop + Main : while Target_Last < Target'Last loop if State.Surrogate in Low_Surrogate then Q := State.Surrogate; State.Surrogate := UTF_16_Character'Val (0); else + exit Main when Source_Last >= Source'Last; Source_Last := Source_Last + 1; Code := UTF_32_Character'Pos (Source (Source_Last)); if Code <= 16#FFFF# then -- BMP @@ -34,7 +35,7 @@ package body Encodings.Unicode.UTF_16.From_UTF_32 is end if; Target_Last := Target_Last + 1; Target (Target_Last) := Q; - end loop; + end loop Main; end Process; end Encodings.Unicode.UTF_16.From_UTF_32; diff --git a/source/library/encodings-unicode-utf_16.ads b/source/library/encodings-unicode-utf_16.ads index 78bb2b2..3f65def 100644 --- a/source/library/encodings-unicode-utf_16.ads +++ b/source/library/encodings-unicode-utf_16.ads @@ -15,9 +15,15 @@ private use Interfaces; function Make_High_Surrogate (C : Code_Point) return UTF_16_Character - is (UTF_16_Character'Val (Shift_Right (C - 16#010000#, 10) or 16#D800#)); + is (UTF_16_Character'Val (Shift_Right (C - 16#010000#, 10) or 16#D800#)) + with + Pre => C not in Basic_Multilingual_Plane, + Post => Make_High_Surrogate'Result in High_Surrogate; function Make_Low_Surrogate (C : Code_Point) return UTF_16_Character - is (UTF_16_Character'Val ((C and 16#03FF#) or 16#DC00#)); + is (UTF_16_Character'Val ((C and 16#03FF#) or 16#DC00#)) + with + Pre => C not in Basic_Multilingual_Plane, + Post => Make_Low_Surrogate'Result in Low_Surrogate; end Encodings.Unicode.UTF_16; diff --git a/source/library/encodings-unicode-utf_32.ads b/source/library/encodings-unicode-utf_32.ads index 5872edb..6c175f0 100644 --- a/source/library/encodings-unicode-utf_32.ads +++ b/source/library/encodings-unicode-utf_32.ads @@ -1,5 +1,7 @@ package Encodings.Unicode.UTF_32 is subtype UTF_32_Character is Wide_Wide_Character range Wide_Wide_Character'Val (0) .. Wide_Wide_Character'Val (Code_Point_Last); + subtype UTF_32_String is Wide_Wide_String; + end Encodings.Unicode.UTF_32; \ No newline at end of file diff --git a/source/library/encodings-unicode-utf_8-from_utf_32.adb b/source/library/encodings-unicode-utf_8-from_utf_32.adb index c2aac7d..e318065 100644 --- a/source/library/encodings-unicode-utf_8-from_utf_32.adb +++ b/source/library/encodings-unicode-utf_8-from_utf_32.adb @@ -19,11 +19,12 @@ package body Encodings.Unicode.UTF_8.From_UTF_32 is begin Source_Last := Source'First - 1; Target_Last := Target'First - 1; - while Source_Last < Source'Last and Target_Last < Target'Last loop + Main : while Target_Last < Target'Last loop if State.L > 0 then State.L := State.L - 1; Q := Make_Extension_Byte (State.Code, State.L); else + exit Main when Source_Last >= Source'Last; Source_Last := Source_Last + 1; C := Source (Source_Last); case UTF_32_Character'Pos (C) is @@ -47,7 +48,7 @@ package body Encodings.Unicode.UTF_8.From_UTF_32 is end if; Target_Last := Target_Last + 1; Target (Target_Last) := Q; - end loop; + end loop Main; end Process; end Encodings.Unicode.UTF_8.From_UTF_32; \ No newline at end of file diff --git a/source/library/encodings-unicode-utf_8.ads b/source/library/encodings-unicode-utf_8.ads index 136bd84..40f65d6 100644 --- a/source/library/encodings-unicode-utf_8.ads +++ b/source/library/encodings-unicode-utf_8.ads @@ -33,7 +33,10 @@ private P : Code_Point; R : Natural) -- Remaining bytes (index from the last) return Extension_Byte - is (UTF_8_Character'Val ((Interfaces.Shift_Right (P, 6 * R) and Extension_Byte_Mask) or Extension_Byte_Base)); + is (UTF_8_Character'Val ((Interfaces.Shift_Right (P, 6 * R) and Extension_Byte_Mask) + or Extension_Byte_Base)) + with + Pre => P not in 0 .. 16#7F# and R < 3; function Initial_Mask (L : Positive) return Code_Point is (Shift_Left (1, 6 - L) - 1); @@ -45,7 +48,10 @@ private P : Code_Point; L : Positive) -- Extension bytes return UTF_8_Character - is (UTF_8_Character'Val (Shift_Right (P, 6 * L) or Initial_Base (L))); + is (UTF_8_Character'Val (Shift_Right (P, 6 * L) or Initial_Base (L))) + with + Pre => P not in 0 .. 16#7F# and L < 4, + Post => Make_Initial_Byte'Result in Initial_2_Byte | Initial_3_Byte | Initial_4_Byte; -- Masking is not needed here end Encodings.Unicode.UTF_8; diff --git a/source/library/encodings-unicode.ads b/source/library/encodings-unicode.ads index 81442f7..df34104 100644 --- a/source/library/encodings-unicode.ads +++ b/source/library/encodings-unicode.ads @@ -10,6 +10,9 @@ package Encodings.Unicode with Pure is private subtype Code_Point is Interfaces.Unsigned_32 range 0 .. Code_Point_Last; + subtype Basic_Multilingual_Plane is Code_Point + range 0 .. 16#FFFF#; + subtype Sequence_Count is Integer range 0 .. 4; end Encodings.Unicode; diff --git a/source/test/encodings-unicode-converter_test.adb b/source/test/encodings-unicode-converter_test.adb index 945abee..731a582 100644 --- a/source/test/encodings-unicode-converter_test.adb +++ b/source/test/encodings-unicode-converter_test.adb @@ -113,12 +113,16 @@ package body Encodings.Unicode.Converter_Test is Hex'Pos (H (H'Last)) + 16 * UC (H (H'First .. H'Last - 1))); function U (H : Hex_String) return UTF_8.UTF_8_Character is (UTF_8.UTF_8_Character'Val (UC (H))); + function U (H : Hex_String) return UTF_16.UTF_16_Character is (UTF_16.UTF_16_Character'Val (UC (H))); + function U (H : Hex_String) return UTF_32.UTF_32_Character is (UTF_32.UTF_32_Character'Val (UC (H))); procedure Run_Test (T : in out Test) is begin Test_One (T, "", "", ""); Test_One (T, "abc", "abc", "abc"); - Test_One (T, U("D0") & U("A9"), "Щ", "Щ"); + Test_One (T, U ("D0") & U ("A9"), "Щ", "Щ"); + Test_One (T, U ("E2") & U ("89") & U ("A0"), "≠", "≠"); + Test_One (T, U ("F0") & U ("9F") & U ("90") & U ("B1"), U ("D83D") & U ("DC31"), "" & U ("01F431")); -- cat face end Run_Test; function Suite return Access_Test_Suite is