This commit is contained in:
Vovanium 2023-04-25 19:46:13 +03:00
parent aeb1895200
commit 5707762d70
9 changed files with 238 additions and 0 deletions

View file

@ -5,6 +5,7 @@ abstract project Common is
Default_Switches := (
"-gnato",
"-O2",
"-ggdb",
"-gnatW8",
"-gnatyAabefikltx"
);

View file

@ -0,0 +1,40 @@
package body Encodings.Unicode.UTF_16.From_UTF_32 is
procedure Reset (State : in out Converter) is
begin
State.Surrogate := UTF_16_Character'Val (0);
end Reset;
procedure Process (
State : in out Converter;
Source : in UTF_32_String;
Source_Last : out Natural;
Target : out UTF_16_String;
Target_Last : out Natural)
is
Code : Code_Point;
Q : UTF_16_Character;
use Interfaces;
begin
Source_Last := Source'First - 1;
Target_Last := Target'First - 1;
while Source_Last < Source'Last and Target_Last < Target'Last loop
if State.Surrogate in Low_Surrogate then
Q := State.Surrogate;
State.Surrogate := UTF_16_Character'Val (0);
else
Source_Last := Source_Last + 1;
Code := UTF_32_Character'Pos (Source (Source_Last));
if Code <= 16#FFFF# then -- BMP
Q := UTF_16_Character'Val (Code);
else
Q := Make_High_Surrogate (Code);
State.Surrogate := Make_Low_Surrogate (Code);
end if;
end if;
Target_Last := Target_Last + 1;
Target (Target_Last) := Q;
end loop;
end Process;
end Encodings.Unicode.UTF_16.From_UTF_32;

View file

@ -0,0 +1,26 @@
--
-- Converter from UTF-32 to UTF-16
--
with Encodings.Converters;
with Encodings.Unicode.UTF_32;
use Encodings.Unicode.UTF_32;
package Encodings.Unicode.UTF_16.From_UTF_32 is
type Converter is new Converters.Wide_Wide_to_Wide.Converter with private;
overriding procedure Reset (State : in out Converter);
overriding procedure Process (
State : in out Converter;
Source : in UTF_32_String;
Source_Last : out Natural;
Target : out UTF_16_String;
Target_Last : out Natural);
private
type Converter is new Converters.Wide_Wide_to_Wide.Converter with record
Surrogate : UTF_16_Character := UTF_16_Character'Val (0);
end record;
end Encodings.Unicode.UTF_16.From_UTF_32;

View file

@ -11,4 +11,13 @@ package Encodings.Unicode.UTF_16 is
Byte_Order_Mark : constant UTF_16_String := (
1 => UTF_16_Character'Val (Byte_Order_Mark_Code));
private
use Interfaces;
function Make_High_Surrogate (C : Code_Point) return UTF_16_Character
is (UTF_16_Character'Val (Shift_Right (C - 16#010000#, 10) or 16#D800#));
function Make_Low_Surrogate (C : Code_Point) return UTF_16_Character
is (UTF_16_Character'Val ((C and 16#03FF#) or 16#DC00#));
end Encodings.Unicode.UTF_16;

View file

@ -1,8 +1,11 @@
with Encodings.Unicode.Test;
package body Encodings.Test is
function Suite return Access_Test_Suite is
R : Access_Test_Suite := new Test_Suite;
begin
R.Add_Test (Encodings.Unicode.Test.Suite);
return R;
end Suite;

View file

@ -0,0 +1,131 @@
with AUnit.Simple_Test_Cases;
with AUnit.Assertions;
use AUnit.Assertions;
with Encodings.Converters;
with Encodings.Generic_Converters;
with Encodings.Unicode.UTF_8.From_UTF_32;
with Encodings.Unicode.UTF_16.From_UTF_32;
with Encodings.Unicode.UTF_32.From_UTF_8;
with Interfaces;
use Interfaces;
package body Encodings.Unicode.Converter_Test is
type Test is new AUnit.Simple_Test_Cases.Test_Case with record
Chunk_Size : Positive;
end record;
function Name (T : Test) return AUnit.Message_String;
procedure Run_Test (T : in out Test);
function Name (T : Test) return AUnit.Message_String is
begin
return AUnit.Format ("Encodings.Unicode.Converter_Test with Chunk_Size = " & Positive'Image (T.Chunk_Size));
end Name;
generic
type Character_Type is (<>);
type String_Type is array (Positive range <>) of Character_Type'Base;
procedure Gen_Assert_Eq (
A, B : String_Type;
Message : String := "");
procedure Gen_Assert_Eq (
A, B : String_Type;
Message : String := "")
is
begin
for I in 0 .. Natural'Min (A'Length, B'Length) - 1 loop
Assert (A (A'First + I) = B (B'First + I), "Not equal characters: "
& Integer'Image (Character_Type'Pos (A (A'First + I)))
& " and "
& Integer'Image (Character_Type'Pos (B (B'First + I)))
& " at offset " & Natural'Image (I)
& " " & Message);
end loop;
Assert (A'Length = B'Length, "Not equal lengths "
& Integer'Image (A'Length)
& " and "
& Integer'Image (A'Length)
& " " & Message);
end Gen_Assert_Eq;
procedure Assert_Eq is new Gen_Assert_Eq (UTF_8.UTF_8_Character, UTF_8.UTF_8_String);
procedure Assert_Eq is new Gen_Assert_Eq (UTF_16.UTF_16_Character, UTF_16.UTF_16_String);
procedure Assert_Eq is new Gen_Assert_Eq (UTF_32.UTF_32_Character, UTF_32.UTF_32_String);
generic
with package Base is new Encodings.Generic_Converters (<>);
with procedure Assert_Eq (A, B : Base.Target_String; Message : String) is <>;
procedure Gen_Test (
T : Test;
Conv : in out Base.Converter'Class;
Source : Base.Source_String;
Target : Base.Target_String);
procedure Gen_Test (
T : Test;
Conv : in out Base.Converter'Class;
Source : Base.Source_String;
Target : Base.Target_String)
is
B : Base.Target_String (Target'Range);
SI, TI : Natural;
begin
Conv.Process (Source, SI, B, TI);
Assert_Eq (B, Target, "Strings must match");
Assert (SI = Source'Last, "Whole buffer must be read");
Assert (TI = B'Last, "Whole buffer must be written");
end Gen_Test;
procedure Test_8_32 is new Gen_Test (Base => Converters.Narrow_to_Wide_Wide);
procedure Test_32_8 is new Gen_Test (Base => Converters.Wide_Wide_to_Narrow);
procedure Test_32_16 is new Gen_Test (Base => Converters.Wide_Wide_to_Wide);
procedure Test_One (
T : in out Test;
NS : UTF_8.UTF_8_String;
WS : UTF_16.UTF_16_String;
ZS : UTF_32.UTF_32_String)
is
Conv_8_32 : UTF_32.From_UTF_8.Converter;
Conv_32_8 : UTF_8.From_UTF_32.Converter;
Conv_32_16 : UTF_16.From_UTF_32.Converter;
begin
Test_8_32 (T, Conv_8_32, NS, ZS);
Test_32_8 (T, Conv_32_8, ZS, NS);
Test_32_16 (T, Conv_32_16, ZS, WS);
end Test_One;
type Hex is ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F');
type Hex_String is array (Positive range <>) of Hex;
function UC (H : Hex_String) return Code_Point;
function UC (H : Hex_String) return Code_Point
is (if H'Length = 0 then
0
else
Hex'Pos (H (H'Last)) + 16 * UC (H (H'First .. H'Last - 1)));
function U (H : Hex_String) return UTF_8.UTF_8_Character is (UTF_8.UTF_8_Character'Val (UC (H)));
procedure Run_Test (T : in out Test) is
begin
Test_One (T, "", "", "");
Test_One (T, "abc", "abc", "abc");
Test_One (T, U("D0") & U("A9"), "Щ", "Щ");
end Run_Test;
function Suite return Access_Test_Suite is
R : Access_Test_Suite := new Test_Suite;
begin
R.Add_Test (new Test'(AUnit.Simple_Test_Cases.Test_Case with Chunk_Size => 1_000));
return R;
end Suite;
end Encodings.Unicode.Converter_Test;

View file

@ -0,0 +1,8 @@
with AUnit.Test_Suites;
use AUnit.Test_Suites;
package Encodings.Unicode.Converter_Test is
function Suite return Access_Test_Suite;
end Encodings.Unicode.Converter_Test;

View file

@ -0,0 +1,12 @@
with Encodings.Unicode.Converter_Test;
package body Encodings.Unicode.Test is
function Suite return Access_Test_Suite is
R : Access_Test_Suite := new Test_Suite;
begin
R.Add_Test (Encodings.Unicode.Converter_Test.Suite);
return R;
end Suite;
end Encodings.Unicode.Test;

View file

@ -0,0 +1,8 @@
with AUnit.Test_Suites;
use AUnit.Test_Suites;
package Encodings.Unicode.Test is
function Suite return Access_Test_Suite;
end Encodings.Unicode.Test;