General update after import with style change and several new packages

This commit is contained in:
Vovanium 2021-09-25 00:00:31 +03:00
parent 6b22e4321a
commit ac0241cf85
22 changed files with 299 additions and 152 deletions

View file

@ -1,2 +1,4 @@
# Encodings
Support for various encodings and escaping schemes for Ada (and possibly another languages)
A library and command-line tools for encoding text.

View file

@ -8,7 +8,8 @@ project Encodings is
for Default_Switches("Ada") use (
"-gnato",
"-O2",
"-gnatW8"
"-gnatW8",
"-gnatyabcfikltx"
);
end Builder;
end Encodings;

View file

@ -0,0 +1,67 @@
with Encodings.Generic_Converters;
package Encodings.Converters is
package Character_to_Character_Converters is new Generic_Converters (
Source_Character => Character,
Target_Character => Character,
Source_String => String,
Target_String => String
);
package Character_to_Wide_Character_Converters is new Generic_Converters (
Source_Character => Character,
Target_Character => Wide_Character,
Source_String => String,
Target_String => Wide_String
);
package Character_to_Wide_Wide_Character_Converters is new Generic_Converters (
Source_Character => Character,
Target_Character => Wide_Wide_Character,
Source_String => String,
Target_String => Wide_Wide_String
);
package Wide_Character_to_Character_Converters is new Generic_Converters (
Source_Character => Wide_Character,
Target_Character => Character,
Source_String => Wide_String,
Target_String => String
);
package Wide_Character_to_Wide_Character_Converters is new Generic_Converters (
Source_Character => Wide_Character,
Target_Character => Wide_Character,
Source_String => Wide_String,
Target_String => Wide_String
);
package Wide_Character_to_Wide_Wide_Character_Converters is new Generic_Converters (
Source_Character => Wide_Character,
Target_Character => Wide_Wide_Character,
Source_String => Wide_String,
Target_String => Wide_Wide_String
);
package Wide_Wide_Character_to_Character_Converters is new Generic_Converters (
Source_Character => Wide_Wide_Character,
Target_Character => Character,
Source_String => Wide_Wide_String,
Target_String => String
);
package Wide_Wide_Character_to_Wide_Character_Converters is new Generic_Converters (
Source_Character => Wide_Wide_Character,
Target_Character => Wide_Character,
Source_String => Wide_Wide_String,
Target_String => Wide_String
);
package Wide_Wide_Character_to_Wide_Wide_Character_Converters is new Generic_Converters (
Source_Character => Wide_Wide_Character,
Target_Character => Wide_Wide_Character,
Source_String => Wide_Wide_String,
Target_String => Wide_Wide_String
);
package Character_Converters renames Character_to_Character_Converters;
end Encodings.Converters;

View file

@ -1,7 +1,7 @@
with Ada.Strings.Bounded;
package Encodings.Encoding_Lists is
Encoding_Name_Length_Maximum: constant := 16;
package Encoding_Name_Strings is new Ada.Strings.Bounded.Generic_Bounded_Length(
Encoding_Name_Length_Maximum : constant := 16;
package Encoding_Name_Strings is new Ada.Strings.Bounded.Generic_Bounded_Length (
Max => Encoding_Name_Length_Maximum
);
end Encodings.Encoding_Lists;

View file

@ -0,0 +1,28 @@
generic
type Source_Character is (<>);
type Target_Character is (<>);
type Source_String is array (Positive range <>) of Source_Character;
type Target_String is array (Positive range <>) of Target_Character;
package Encodings.Generic_Converters is
type Converter is interface;
procedure Reset (This : in out Converter) is null;
-- Reset the converter to initial state (if any)
procedure Convert (
State : in out Converter; -- Converter
Source : in Source_String; -- Character sequence to convert
Source_Last : out Natural; -- Last character consumed by converter
Target : out Target_String; -- Converted character sequence
Target_Last : out Natural -- Last character in converted string
) is abstract with
Pre'Class => Target'Length > 0,
Post'Class => Source_Last = Source'Last or Target_Last = Target'Last;
-- Procedure converts a (part of) string
-- Procedure must consume whole source string or
-- fill whole target string.
-- Source (Source_Last + 1 .. Source'Last)
-- Note: this is most basic conversion interface
-- It is not require memory allocation or
-- length calculation
end Encodings.Generic_Converters;

View file

@ -2,10 +2,10 @@ with Ada.Characters.Latin_1;
use Ada.Characters.Latin_1;
with Encodings.Line_Endings.Generic_Add_CR;
package Encodings.Line_Endings.Add_CR is new Generic_Add_CR(
Character_Type => Character,
String_Type => String,
package Encodings.Line_Endings.Add_CR is new Generic_Add_CR (
Character_Type => Character,
String_Type => String,
Carriage_Return => CR,
Line_Feed => LF,
Coder_Base => Coder_Base
Line_Feed => LF,
Coder_Base => Coder_Base
);

View file

@ -1,4 +1,3 @@
-- Adds carriage return (0Dh) before line feed (0Ah) if it does not present
package Encodings.Line_Endings.CR_LF is
pragma Pure;
-- Adds carriage return (0Dh) before line feed (0Ah) if it does not present
package Encodings.Line_Endings.CR_LF with Pure is
end Encodings.Line_Endings.CR_LF;

View file

@ -1,12 +1,12 @@
package body Encodings.Line_Endings.Generic_Add_CR is
procedure Convert(
This: in out Coder;
Source: in String_Type;
Source_Last: out Natural;
Target: out String_Type;
Target_Last: out Natural
procedure Convert (
This : in out Coder;
Source : in String_Type;
Source_Last : out Natural;
Target : out String_Type;
Target_Last : out Natural
) is
C: Character_Type;
C : Character_Type;
begin
Source_Last := Source'First - 1;
Target_Last := Target'First - 1;
@ -15,19 +15,19 @@ package body Encodings.Line_Endings.Generic_Add_CR is
return;
end if;
Target_Last := Target_Last + 1;
Target(Target_Last) := Line_Feed;
Target (Target_Last) := Line_Feed;
This.State := Initial;
end if;
while Source_Last < Source'Last and Target_Last < Target'Last loop
Source_Last := Source_Last + 1;
C := Source(Source_Last);
C := Source (Source_Last);
if C = Carriage_Return then
This.State := Have_CR;
elsif C = Line_Feed then
if This.State /= Have_CR then
Target_Last := Target_Last + 1;
Target(Target_Last) := Carriage_Return;
if(Target_Last >= Target'Last) then -- Buffer ends while outputtting CR-LF
Target (Target_Last) := Carriage_Return;
if Target_Last >= Target'Last then -- Buffer ends while outputtting CR-LF
This.State := Need_LF;
return;
end if;
@ -35,7 +35,7 @@ package body Encodings.Line_Endings.Generic_Add_CR is
end if;
end if;
Target_Last := Target_Last + 1;
Target(Target_Last) := C;
Target (Target_Last) := C;
end loop;
end Convert;
end Encodings.Line_Endings.Generic_Add_CR;

View file

@ -1,17 +1,17 @@
generic
generic
type Character_Type is (<>); -- Character, Wide_Character, Wide_Wide_Character (or whatever)
type String_Type is array(Positive range <>) of Character_Type;
Carriage_Return: in Character_Type; -- CR in the corresponding type
Line_Feed: in Character_Type; -- LF in the corresponding type
type Coder_Base is abstract tagged private; -- Type to derive
type String_Type is array (Positive range <>) of Character_Type;
Carriage_Return : in Character_Type; -- CR in the corresponding type
Line_Feed : in Character_Type; -- LF in the corresponding type
type Coder_Base is abstract tagged private; -- Type to derive
package Encodings.Line_Endings.Generic_Add_CR is
type Coder is new Coder_Base with private;
procedure Convert(
This: in out Coder; -- Coder state
Source: in String_Type; -- String to be converted
Source_Last: out Natural; -- Last index of source string read (length if string is starting at 1)
Target: out String_Type; -- Converted string
Target_Last: out Natural -- Last Index of destination string written
procedure Convert (
This : in out Coder; -- Coder state
Source : in String_Type; -- String to be converted
Source_Last : out Natural; -- Last index of source string read (length if string is starting at 1)
Target : out String_Type; -- Converted string
Target_Last : out Natural -- Last Index of destination string written
);
private
type Coder_State is (
@ -20,6 +20,6 @@ private
Need_LF
);
type Coder is new Coder_Base with record
State: Coder_State := Initial;
State : Coder_State := Initial;
end record;
end Encodings.Line_Endings.Generic_Add_CR;

View file

@ -1,36 +1,36 @@
with Ada.Assertions;
use Ada.Assertions;
package body Encodings.Line_Endings.Generic_Strip_CR is
procedure Convert(
This: in out Coder;
Source: in String_Type;
Source_Last: out Natural;
Target: out String_Type;
Target_Last: out Natural
procedure Convert (
This : in out Coder;
Source : in String_Type;
Source_Last : out Natural;
Target : out String_Type;
Target_Last : out Natural
) is
C: Character_Type;
C : Character_Type;
begin
Source_Last := Source'First - 1;
Target_Last := Target'First - 1;
while Source_Last < Source'Last loop
C := Source(Source_Last + 1);
C := Source (Source_Last + 1);
if This.Have_CR and C /= Line_Feed then -- emit CR not the part of CRLF sequence
if Target_Last < Target'Last then
Target_Last := Target_Last + 1;
Target(Target_Last) := Carriage_Return;
Target (Target_Last) := Carriage_Return;
else
return;
end if;
This.Have_CR := False;
end if;
if C = Carriage_Return then
Assert(This.Have_CR = False, "Have should be cleared before or if condition shoudn't be true");
Assert (This.Have_CR = False, "Have should be cleared before or if condition shoudn't be true");
This.Have_CR := True;
else
This.Have_CR := False;
if Target_Last < Target'Last then
Target_Last := Target_Last + 1;
Target(Target_Last) := C;
Target (Target_Last) := C;
else
return;
end if;

View file

@ -1,20 +1,20 @@
generic
generic
type Character_Type is (<>); -- Character, Wide_Character, Wide_Wide_Character (or whatever)
type String_Type is array(Positive range <>) of Character_Type;
Carriage_Return: in Character_Type; -- CR in the corresponding type
Line_Feed: in Character_Type; -- LF in the corresponding type
type Coder_Base is abstract tagged private; -- Type to derive
type String_Type is array (Positive range <>) of Character_Type;
Carriage_Return : in Character_Type; -- CR in the corresponding type
Line_Feed : in Character_Type; -- LF in the corresponding type
type Coder_Base is abstract tagged private; -- Type to derive
package Encodings.Line_Endings.Generic_Strip_CR is
type Coder is new Coder_Base with private;
procedure Convert(
This: in out Coder; -- Coder state
Source: in String_Type; -- String to be converted
Source_Last: out Natural; -- Last index of source string read (length if string is starting at 1)
Target: out String_Type; -- Converted string
Target_Last: out Natural -- Last Index of destination string written
procedure Convert (
This : in out Coder; -- Coder state
Source : in String_Type; -- String to be converted
Source_Last : out Natural; -- Last index of source string read (length if string is starting at 1)
Target : out String_Type; -- Converted string
Target_Last : out Natural -- Last Index of destination string written
);
private
type Coder is new Coder_Base with record
Have_CR: Boolean := False;
Have_CR : Boolean := False;
end record;
end Encodings.Line_Endings.Generic_Strip_CR;

View file

@ -2,7 +2,7 @@ with Ada.Characters.Latin_1;
use Ada.Characters.Latin_1;
with Encodings.Line_Endings.Generic_Strip_CR;
package Encodings.Line_Endings.Strip_CR is new Generic_Strip_CR(
package Encodings.Line_Endings.Strip_CR is new Generic_Strip_CR (
Character_Type => Character,
String_Type => String,
Carriage_Return => CR,

View file

@ -1,3 +1,2 @@
package Encodings.Line_Endings is
pragma Pure;
end Encodings.Line_Endings;
package Encodings.Line_Endings with Pure is
end Encodings.Line_Endings;

View file

@ -0,0 +1,14 @@
package Encodings.Unicode.UTF_16 is
subtype UTF_16_Character is Wide_Character;
subtype High_Surrogate is UTF_16_Character
range UTF_16_Character'Val (16#D800#) .. UTF_16_Character'Val (16#DBFF#);
subtype Low_Surrogate is UTF_16_Character
range UTF_16_Character'Val (16#DC00#) .. UTF_16_Character'Val (16#DFFF#);
subtype UTF_16_String is Wide_String;
Byte_Order_Mark : constant UTF_16_String := (
1 => UTF_16_Character'Val (Byte_Order_Mark_Code));
end Encodings.Unicode.UTF_16;

View file

@ -1,3 +1,5 @@
package Encodings.Unicode.UTF_32 is
subtype UTF_32_Character is Wide_Wide_Character
range Wide_Wide_Character'Val (0) .. Wide_Wide_Character'Val (Code_Point_Last);
subtype UTF_32_String is Wide_Wide_String;
end Encodings.Unicode.UTF_32;

View file

@ -0,0 +1,25 @@
package Encodings.Unicode.UTF_8 is
subtype UTF_8_Character
is Character range Character'Val (0) .. Character'Val (16#F4#);
subtype Single_Byte_Character
is UTF_8_Character range UTF_8_Character'Val (0) .. UTF_8_Character'Val (16#7F#);
subtype Extension_Byte
is UTF_8_Character range UTF_8_Character'Val (16#80#) .. UTF_8_Character'Val (16#B8#);
-- C0 .. C1 -- illegal, encoding non-shortest sequence for 00 .. 7F
subtype Two_Byte_Initial
is UTF_8_Character range UTF_8_Character'Val (16#C2#) .. UTF_8_Character'Val (16#DF#);
subtype Three_Byte_Initial
is UTF_8_Character range UTF_8_Character'Val (16#E0#) .. UTF_8_Character'Val (16#EF#);
subtype Four_Byte_Initial
is UTF_8_Character range UTF_8_Character'Val (16#F0#) .. UTF_8_Character'Val (16#F4#);
-- F5 .. F7 encoding code points out of range
-- F8 .. FD are deprecated multibyte sequences
-- FE .. FF illegal
subtype UTF_8_String is String;
Byte_Order_Mark : constant UTF_8_String := (
UTF_8_Character'Val (16#EF#),
UTF_8_Character'Val (16#BB#),
UTF_8_Character'Val (16#BF#));
end Encodings.Unicode.UTF_8;

View file

@ -1,3 +1,5 @@
package Encodings.Unicode is
Code_Point_Last: constant Wide_Wide_Character := Wide_Wide_Character'Val(16#10FFFF#);
end Encodings.Unicode;
package Encodings.Unicode with Pure is
Code_Point_Last : constant := 16#10FFFF#;
Byte_Order_Mark_Code : constant := 16#FEFF#;
end Encodings.Unicode;

View file

@ -1,40 +1,40 @@
package body Encodings.Utility.Generic_Sequence_Buffers is
procedure Write_Buffered(
Buffer: in out Sequence_Buffer;
Target: in out String_Type;
Target_Last: in out Natural
procedure Write_Buffered (
Buffer : in out Sequence_Buffer;
Target : in out String_Type;
Target_Last : in out Natural
) is
Remaining : Natural := Remaining_Length(Target, Target'Last - Target_Last);
Buffered : Natural := Length(Buffer);
Write_Length : Natural := Natural'Min(Remaining, Buffered);
Remaining : Natural := Remaining_Length (Target, Target'Last - Target_Last);
Buffered : Natural := Length (Buffer);
Write_Length : Natural := Natural'Min (Remaining, Buffered);
New_Target_Last : Natural := Target_Last + Write_Length;
New_Buffer_First : Positive := Buffer.First + Write_Length;
begin
Target(Target_Last + 1 .. New_Target_Last) := Buffer.Data(Buffer.First .. New_Buffer_First - 1);
Target (Target_Last + 1 .. New_Target_Last) := Buffer.Data (Buffer.First .. New_Buffer_First - 1);
Target_Last := New_Target_Last;
Buffer.First := New_Buffer_First;
end Write_Buffered;
procedure Set_Buffer(
Buffer: in out Sequence_Buffer;
Source: in String_Type
procedure Set_Buffer (
Buffer : in out Sequence_Buffer;
Source : in String_Type
) is
begin
Buffer.Data(1 .. Source'Length) := Source;
Buffer.Data (1 .. Source'Length) := Source;
Buffer.First := 1;
Buffer.Last := Source'Length;
end Set_Buffer;
procedure Write(
Buffer: in out Sequence_Buffer;
Source: in String_Type;
Target: in out String_Type;
Target_Last: in out Natural
procedure Write (
Buffer : in out Sequence_Buffer;
Source : in String_Type;
Target : in out String_Type;
Target_Last : in out Natural
) is
begin
Set_Buffer(Buffer, Source);
Write_Buffered(Buffer, Target, Target_Last);
Set_Buffer (Buffer, Source);
Write_Buffered (Buffer, Target, Target_Last);
end;

View file

@ -1,12 +1,12 @@
generic
type Character_Type is (<>);
type String_Type is array(Positive range <>) of Character_Type;
Max_Length: Positive;
type String_Type is array (Positive range <>) of Character_Type;
Max_Length : Positive;
package Encodings.Utility.Generic_Sequence_Buffers is
function Remaining_Length(
Data: String_Type;
Last: Natural
function Remaining_Length (
Data : String_Type;
Last : Natural
) return Natural is (
if Last >= Data'Last then 0 else Data'Last - Last
) with
@ -14,43 +14,43 @@ package Encodings.Utility.Generic_Sequence_Buffers is
Post => Remaining_Length'Result in 0 .. Data'Length;
type Sequence_Buffer is record
Data: String_Type(1 .. Max_Length);
First: Positive := 1; -- first position of buffered sequence
Last: Natural := 0; -- last position of buffered sequence
Data : String_Type (1 .. Max_Length);
First : Positive := 1; -- first position of buffered sequence
Last : Natural := 0; -- last position of buffered sequence
end record;
function Is_Empty(
Buffer: in Sequence_Buffer
function Is_Empty (
Buffer : in Sequence_Buffer
) return Boolean is (
Buffer.Last < Buffer.First
);
function Length(
Buffer: in Sequence_Buffer
function Length (
Buffer : in Sequence_Buffer
) return Natural is (
if Is_Empty(Buffer) then 0 else Buffer.Last - Buffer.First + 1
if Is_Empty (Buffer) then 0 else Buffer.Last - Buffer.First + 1
) with
Post => Length'Result in 0 .. Max_Length;
procedure Write_Buffered(
Buffer: in out Sequence_Buffer;
Target: in out String_Type;
Target_Last: in out Natural
procedure Write_Buffered (
Buffer : in out Sequence_Buffer;
Target : in out String_Type;
Target_Last : in out Natural
);
procedure Set_Buffer(
Buffer: in out Sequence_Buffer;
Source: in String_Type
procedure Set_Buffer (
Buffer : in out Sequence_Buffer;
Source : in String_Type
) with
Pre => Is_Empty(Buffer),
Post => Length(Buffer) = Source'Length;
Pre => Is_Empty (Buffer),
Post => Length (Buffer) = Source'Length;
procedure Write(
Buffer: in out Sequence_Buffer;
Source: in String_Type;
Target: in out String_Type;
Target_Last: in out Natural
procedure Write (
Buffer : in out Sequence_Buffer;
Source : in String_Type;
Target : in out String_Type;
Target_Last : in out Natural
) with
Pre => Is_Empty(Buffer);
Pre => Is_Empty (Buffer);
end Encodings.Utility.Generic_Sequence_Buffers;

View file

@ -5,42 +5,49 @@ use type Ada.Streams.Stream_Element_Offset;
package body Encodings.Utility is
--generic
-- type Element_Type is private;
-- type Index_Type is (<>);
-- type Array_Type is array(Index_Type range <>) of Element_Type;
procedure Read_Array(
Stream: in out Root_Stream_Type'Class;
Item: out Array_Type;
Last: out Index_Type'Base
-- generic
-- type Element_Type is private;
-- type Index_Type is (<>);
-- type Array_Type is array(Index_Type range <>) of Element_Type;
procedure Read_Array (
Stream : in out Root_Stream_Type'Class;
Item : out Array_Type;
Last : out Index_Type'Base
) is
Element_Length: constant Stream_Element_Offset := Element_Type'Stream_Size / Stream_Element'Size;
Buffer_Length: constant Stream_Element_Offset := Item'Length * Element_Length;
Buffer: Stream_Element_Array(1 .. Buffer_Length);
Buffer_I, Buffer_Last: Stream_Element_Offset;
Element_Count: Natural;
function Conversion is new Ada.Unchecked_Conversion(
Element_Length : constant Stream_Element_Offset := Element_Type'Stream_Size / Stream_Element'Size;
Buffer_Length : constant Stream_Element_Offset := Item'Length * Element_Length;
Buffer : Stream_Element_Array (1 .. Buffer_Length);
Buffer_I, Buffer_Last : Stream_Element_Offset;
Element_Count : Natural;
function Conversion is new Ada.Unchecked_Conversion (
Source => Stream_Element_Array,
Target => Element_Type
);
begin
Stream.Read(Buffer, Buffer_Last);
Element_Count := Natural(Buffer_Last / Element_Length);
Stream.Read (Buffer, Buffer_Last);
Element_Count := Natural (Buffer_Last / Element_Length);
Last := Item'First; -- + (Element_Count - 1);
Buffer_I := 1;
for I in 1..Element_Count loop
Item(Last) := Conversion(Buffer(Buffer_I .. Buffer_I + Element_Length - 1));
Last := Index_Type'Succ(Last);
for I in 1 .. Element_Count loop
Item (Last) := Conversion (Buffer (Buffer_I .. Buffer_I + Element_Length - 1));
Last := Index_Type'Succ (Last);
Buffer_I := Buffer_I + Element_Length;
end loop;
Last := Index_Type'Pred(Last);
Last := Index_Type'Pred (Last);
end;
-- Strange, GNAT cannot use generic instance for package subprogram
procedure Read_String(Stream: in out Root_Stream_Type'Class; Item: out String; Last: out Positive'Base) is
procedure Inst is new Read_Array(Element_Type => Character, Index_Type => Positive, Array_Type => String);
-- Strange, GNAT cannot use generic instance for package subprogram
procedure Read_String (
Stream : in out Root_Stream_Type'Class;
Item : out String;
Last : out Positive'Base
) is
procedure Inst is new Read_Array (
Element_Type => Character,
Index_Type => Positive,
Array_Type => String);
begin
Inst(Stream, Item, Last);
Inst (Stream, Item, Last);
end;
end Encodings.Utility;

View file

@ -1,20 +1,22 @@
with Ada.Streams;
use Ada.Streams;
-- Several utilities to ease encoded string processing
package Encodings.Utility is
pragma Pure;
-- Several utilities to ease encoded string processing
package Encodings.Utility with Pure is
-- Generic array version of Stream.Read returning last element written
-- Generic array version of Stream.Read returning last element written
generic
type Element_Type is (<>);
type Index_Type is (<>);
type Array_Type is array(Index_Type range <>) of Element_Type;
procedure Read_Array(
Stream: in out Root_Stream_Type'Class;
Item: out Array_Type;
Last: out Index_Type'Base
type Index_Type is (<>);
type Array_Type is array (Index_Type range <>) of Element_Type;
procedure Read_Array (
Stream : in out Root_Stream_Type'Class;
Item : out Array_Type;
Last : out Index_Type'Base
);
procedure Read_String(Stream: in out Root_Stream_Type'Class; Item: out String; Last: out Positive'Base);
procedure Read_String (
Stream : in out Root_Stream_Type'Class;
Item : out String;
Last : out Positive'Base);
end Encodings.Utility;

View file

@ -1,4 +1,3 @@
package Encodings is
pragma Pure;
package Encodings with Pure is
type Coder_Base is interface;
end Encodings;