* conversion will try to fullfill Target when whole Source is read
This commit is contained in:
parent
5707762d70
commit
d37628b673
8 changed files with 33 additions and 9 deletions
|
@ -6,6 +6,7 @@ abstract project Common is
|
|||
"-gnato",
|
||||
"-O2",
|
||||
"-ggdb",
|
||||
"-gnata",
|
||||
"-gnatW8",
|
||||
"-gnatyAabefikltx"
|
||||
);
|
||||
|
|
|
@ -18,11 +18,12 @@ package body Encodings.Unicode.UTF_16.From_UTF_32 is
|
|||
begin
|
||||
Source_Last := Source'First - 1;
|
||||
Target_Last := Target'First - 1;
|
||||
while Source_Last < Source'Last and Target_Last < Target'Last loop
|
||||
Main : while Target_Last < Target'Last loop
|
||||
if State.Surrogate in Low_Surrogate then
|
||||
Q := State.Surrogate;
|
||||
State.Surrogate := UTF_16_Character'Val (0);
|
||||
else
|
||||
exit Main when Source_Last >= Source'Last;
|
||||
Source_Last := Source_Last + 1;
|
||||
Code := UTF_32_Character'Pos (Source (Source_Last));
|
||||
if Code <= 16#FFFF# then -- BMP
|
||||
|
@ -34,7 +35,7 @@ package body Encodings.Unicode.UTF_16.From_UTF_32 is
|
|||
end if;
|
||||
Target_Last := Target_Last + 1;
|
||||
Target (Target_Last) := Q;
|
||||
end loop;
|
||||
end loop Main;
|
||||
end Process;
|
||||
|
||||
end Encodings.Unicode.UTF_16.From_UTF_32;
|
||||
|
|
|
@ -15,9 +15,15 @@ private
|
|||
use Interfaces;
|
||||
|
||||
function Make_High_Surrogate (C : Code_Point) return UTF_16_Character
|
||||
is (UTF_16_Character'Val (Shift_Right (C - 16#010000#, 10) or 16#D800#));
|
||||
is (UTF_16_Character'Val (Shift_Right (C - 16#010000#, 10) or 16#D800#))
|
||||
with
|
||||
Pre => C not in Basic_Multilingual_Plane,
|
||||
Post => Make_High_Surrogate'Result in High_Surrogate;
|
||||
|
||||
function Make_Low_Surrogate (C : Code_Point) return UTF_16_Character
|
||||
is (UTF_16_Character'Val ((C and 16#03FF#) or 16#DC00#));
|
||||
is (UTF_16_Character'Val ((C and 16#03FF#) or 16#DC00#))
|
||||
with
|
||||
Pre => C not in Basic_Multilingual_Plane,
|
||||
Post => Make_Low_Surrogate'Result in Low_Surrogate;
|
||||
|
||||
end Encodings.Unicode.UTF_16;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package Encodings.Unicode.UTF_32 is
|
||||
subtype UTF_32_Character is Wide_Wide_Character
|
||||
range Wide_Wide_Character'Val (0) .. Wide_Wide_Character'Val (Code_Point_Last);
|
||||
|
||||
subtype UTF_32_String is Wide_Wide_String;
|
||||
|
||||
end Encodings.Unicode.UTF_32;
|
|
@ -19,11 +19,12 @@ package body Encodings.Unicode.UTF_8.From_UTF_32 is
|
|||
begin
|
||||
Source_Last := Source'First - 1;
|
||||
Target_Last := Target'First - 1;
|
||||
while Source_Last < Source'Last and Target_Last < Target'Last loop
|
||||
Main : while Target_Last < Target'Last loop
|
||||
if State.L > 0 then
|
||||
State.L := State.L - 1;
|
||||
Q := Make_Extension_Byte (State.Code, State.L);
|
||||
else
|
||||
exit Main when Source_Last >= Source'Last;
|
||||
Source_Last := Source_Last + 1;
|
||||
C := Source (Source_Last);
|
||||
case UTF_32_Character'Pos (C) is
|
||||
|
@ -47,7 +48,7 @@ package body Encodings.Unicode.UTF_8.From_UTF_32 is
|
|||
end if;
|
||||
Target_Last := Target_Last + 1;
|
||||
Target (Target_Last) := Q;
|
||||
end loop;
|
||||
end loop Main;
|
||||
end Process;
|
||||
|
||||
end Encodings.Unicode.UTF_8.From_UTF_32;
|
|
@ -33,7 +33,10 @@ private
|
|||
P : Code_Point;
|
||||
R : Natural) -- Remaining bytes (index from the last)
|
||||
return Extension_Byte
|
||||
is (UTF_8_Character'Val ((Interfaces.Shift_Right (P, 6 * R) and Extension_Byte_Mask) or Extension_Byte_Base));
|
||||
is (UTF_8_Character'Val ((Interfaces.Shift_Right (P, 6 * R) and Extension_Byte_Mask)
|
||||
or Extension_Byte_Base))
|
||||
with
|
||||
Pre => P not in 0 .. 16#7F# and R < 3;
|
||||
|
||||
function Initial_Mask (L : Positive) return Code_Point
|
||||
is (Shift_Left (1, 6 - L) - 1);
|
||||
|
@ -45,7 +48,10 @@ private
|
|||
P : Code_Point;
|
||||
L : Positive) -- Extension bytes
|
||||
return UTF_8_Character
|
||||
is (UTF_8_Character'Val (Shift_Right (P, 6 * L) or Initial_Base (L)));
|
||||
is (UTF_8_Character'Val (Shift_Right (P, 6 * L) or Initial_Base (L)))
|
||||
with
|
||||
Pre => P not in 0 .. 16#7F# and L < 4,
|
||||
Post => Make_Initial_Byte'Result in Initial_2_Byte | Initial_3_Byte | Initial_4_Byte;
|
||||
-- Masking is not needed here
|
||||
|
||||
end Encodings.Unicode.UTF_8;
|
||||
|
|
|
@ -10,6 +10,9 @@ package Encodings.Unicode with Pure is
|
|||
private
|
||||
subtype Code_Point is Interfaces.Unsigned_32 range 0 .. Code_Point_Last;
|
||||
|
||||
subtype Basic_Multilingual_Plane is Code_Point
|
||||
range 0 .. 16#FFFF#;
|
||||
|
||||
subtype Sequence_Count is Integer range 0 .. 4;
|
||||
|
||||
end Encodings.Unicode;
|
||||
|
|
|
@ -113,12 +113,16 @@ package body Encodings.Unicode.Converter_Test is
|
|||
Hex'Pos (H (H'Last)) + 16 * UC (H (H'First .. H'Last - 1)));
|
||||
|
||||
function U (H : Hex_String) return UTF_8.UTF_8_Character is (UTF_8.UTF_8_Character'Val (UC (H)));
|
||||
function U (H : Hex_String) return UTF_16.UTF_16_Character is (UTF_16.UTF_16_Character'Val (UC (H)));
|
||||
function U (H : Hex_String) return UTF_32.UTF_32_Character is (UTF_32.UTF_32_Character'Val (UC (H)));
|
||||
|
||||
procedure Run_Test (T : in out Test) is
|
||||
begin
|
||||
Test_One (T, "", "", "");
|
||||
Test_One (T, "abc", "abc", "abc");
|
||||
Test_One (T, U("D0") & U("A9"), "Щ", "Щ");
|
||||
Test_One (T, U ("D0") & U ("A9"), "Щ", "Щ");
|
||||
Test_One (T, U ("E2") & U ("89") & U ("A0"), "≠", "≠");
|
||||
Test_One (T, U ("F0") & U ("9F") & U ("90") & U ("B1"), U ("D83D") & U ("DC31"), "" & U ("01F431")); -- cat face
|
||||
end Run_Test;
|
||||
|
||||
function Suite return Access_Test_Suite is
|
||||
|
|
Loading…
Reference in a new issue