Update dependencies

Use nix in CI.
This commit is contained in:
Hoang Nguyen 2024-03-16 00:00:00 +07:00
parent f1768b14e1
commit 207e09fb51
Signed by: folliehiyuki
GPG Key ID: B0567C20730E9B11
44 changed files with 16544 additions and 2095 deletions

View File

@ -7,40 +7,14 @@ default:
stages:
- build
- deploy
workflow:
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
variables:
SRC_DIR: out
build:site:
deploy:site:
stage: build
image: golang:1.21-alpine
before_script:
- echo "https://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories
- apk add --no-cache templ
- mkdir "$SRC_DIR"
- cp -r src/* "$SRC_DIR"/
image: nixos/nix:2.21.0
script:
- templ generate
- go run . -minify -gen "$SRC_DIR"
artifacts:
expire_in: 1 hour
paths:
- out/
deploy:cloudflare_pages:
stage: deploy
image:
name: node:21-alpine
entrypoint: ['']
script: |
npx wrangler pages deploy \
--project-name=folliehiyuki-cdn \
--branch="$CI_COMMIT_REF_NAME" \
"$SRC_DIR"/
dependencies:
- build:site
- nix build .
- nix run .#publish

View File

@ -5,26 +5,11 @@
"systems": "systems"
},
"locked": {
"lastModified": 1705309234,
"narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"flake-utils_2": {
"locked": {
"lastModified": 1667395993,
"narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"type": "github"
},
"original": {
@ -40,11 +25,11 @@
]
},
"locked": {
"lastModified": 1703887061,
"narHash": "sha256-gGPa9qWNc6eCXT/+Z5/zMkyYOuRZqeFZBDbopNZQkuY=",
"lastModified": 1709087332,
"narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=",
"owner": "hercules-ci",
"repo": "gitignore.nix",
"rev": "43e1aa1308018f37118e34d3a9cb4f5e75dc11d5",
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
"type": "github"
},
"original": {
@ -55,16 +40,16 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1706191920,
"narHash": "sha256-eLihrZAPZX0R6RyM5fYAWeKVNuQPYjAkCUBr+JNvtdE=",
"lastModified": 1710534455,
"narHash": "sha256-huQT4Xs0y4EeFKn2BTBVYgEwJSv8SDlm82uWgMnCMmI=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "ae5c332cbb5827f6b1f02572496b141021de335f",
"rev": "9af9c1c87ed3e3ed271934cb896e0cdd33dae212",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
@ -73,8 +58,7 @@
"inputs": {
"flake-utils": "flake-utils",
"gitignore": "gitignore",
"nixpkgs": "nixpkgs",
"templ-src": "templ-src"
"nixpkgs": "nixpkgs"
}
},
"systems": {
@ -91,52 +75,6 @@
"repo": "default",
"type": "github"
}
},
"templ-src": {
"inputs": {
"gitignore": [
"gitignore"
],
"nixpkgs": [
"nixpkgs"
],
"xc": "xc"
},
"locked": {
"lastModified": 1706218036,
"narHash": "sha256-rSDcn6ZX9LMg16zh/FgaJkqG/bm8tf12iCU8t+E5ug4=",
"owner": "a-h",
"repo": "templ",
"rev": "c683be18add8e2f6465c9ab6044f7360cf50aed3",
"type": "github"
},
"original": {
"owner": "a-h",
"repo": "templ",
"type": "github"
}
},
"xc": {
"inputs": {
"flake-utils": "flake-utils_2",
"nixpkgs": [
"templ-src",
"nixpkgs"
]
},
"locked": {
"lastModified": 1703164129,
"narHash": "sha256-kCcCqqwvjN07H8FPG4tXsRVRcMqT8dUNt9pwW1kKAe8=",
"owner": "joerdav",
"repo": "xc",
"rev": "0655cccfcf036556aeaddfb8f45dc7e8dd1b3680",
"type": "github"
},
"original": {
"owner": "joerdav",
"repo": "xc",
"type": "github"
}
}
},
"root": "root",

View File

@ -2,22 +2,16 @@
description = "folliehiyuki's stupid CDN";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
flake-utils.url = "github:numtide/flake-utils";
gitignore = {
url = "github:hercules-ci/gitignore.nix";
inputs.nixpkgs.follows = "nixpkgs";
};
templ-src = {
url = "github:a-h/templ";
inputs.nixpkgs.follows = "nixpkgs";
inputs.gitignore.follows = "gitignore";
};
};
outputs = { nixpkgs, templ-src, flake-utils, gitignore, ... }:
outputs = { nixpkgs, flake-utils, gitignore, ... }:
flake-utils.lib.eachSystem [
"x86_64-linux"
"x86_64-darwin"
@ -27,19 +21,14 @@
(system:
let
inherit (nixpkgs) lib;
pkgs = nixpkgs.legacyPackages."${system}" // templ-src.packages."${system}";
buildInputs = with pkgs; [ go templ ];
shellHook = ''
export GOTOOLCHAIN=local
export CGO_ENABLED=0
'';
pkgs = nixpkgs.legacyPackages."${system}";
tasks = with pkgs; {
gen = {
runtimeInputs = [ templ ];
script = "templ generate";
};
preview = {
runtimeInputs = [ go templ ];
script = ''
@ -47,17 +36,18 @@
go run . -preview result/
'';
};
publish = {
runtimeInputs = [ nodePackages.wrangler ];
# NOTE: need to login beforehand, or export CLOUDFLARE_API_TOKEN first
# NOTE: need to login beforehand, or export CLOUDFLARE_API_TOKEN first
publish = {
runtimeInputs = [ nodePackages.wrangler git ];
script = ''
wrangler pages deploy \
--project-name=folliehiyuki-cdn \
--branch=main \
--branch="$(git branch --show-current)" \
result/
'';
};
update-src = {
runtimeInputs = [ curl openssl unzip ];
script = builtins.readFile ./update-src.sh;
@ -78,11 +68,11 @@
packages = rec {
default = bundle;
bundle = with pkgs; stdenv.mkDerivation {
inherit buildInputs;
buildInputs = [ templ go ];
name = "cdn.folliehiyuki.com";
src = lib.cleanSource (gitignore.lib.gitignoreSource ./.);
configurePhase = ''
${shellHook}
export GOTOOLCHAIN=local
export HOME="$TMPDIR"
export GOCACHE="$PWD/go-cache"
export GOTMPDIR="$PWD"
@ -101,8 +91,10 @@
};
devShells.default = with pkgs; mkShell {
inherit shellHook;
packages = buildInputs ++ [ nodePackages.wrangler ];
packages = [ nodePackages.wrangler templ go ];
shellHook = ''
export GOTOOLCHAIN=local
'';
};
}
);

10
go.mod
View File

@ -3,12 +3,12 @@ module cdn
go 1.21.6
require (
github.com/a-h/templ v0.2.543
github.com/charmbracelet/lipgloss v0.9.1
github.com/a-h/templ v0.2.598
github.com/charmbracelet/lipgloss v0.10.0
github.com/charmbracelet/log v0.3.1
github.com/dustin/go-humanize v1.0.1
github.com/labstack/echo/v4 v4.11.4
github.com/tdewolff/minify/v2 v2.20.15
github.com/tdewolff/minify/v2 v2.20.19
)
require (
@ -22,8 +22,8 @@ require (
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.15.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/tdewolff/parse/v2 v2.7.10 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/tdewolff/parse/v2 v2.7.12 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasttemplate v1.2.2 // indirect
golang.org/x/crypto v0.17.0 // indirect

19
go.sum
View File

@ -1,9 +1,9 @@
github.com/a-h/templ v0.2.543 h1:8YyLvyUtf0/IE2nIwZ62Z/m2o2NqwhnMynzOL78Lzbk=
github.com/a-h/templ v0.2.543/go.mod h1:jP908DQCwI08IrnTalhzSEH9WJqG/Q94+EODQcJGFUA=
github.com/a-h/templ v0.2.598 h1:6jMIHv6wQZvdPxTuv87erW4RqN/FPU0wk7ZHN5wVuuo=
github.com/a-h/templ v0.2.598/go.mod h1:SA7mtYwVEajbIXFRh3vKdYm/4FYyLQAtPH1+KxzGPA8=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/charmbracelet/lipgloss v0.9.1 h1:PNyd3jvaJbg4jRHKWXnCj1akQm4rh8dbEzN1p/u1KWg=
github.com/charmbracelet/lipgloss v0.9.1/go.mod h1:1mPmG4cxScwUQALAAnacHaigiiHB9Pmr+v1VEawJl6I=
github.com/charmbracelet/lipgloss v0.10.0 h1:KWeXFSexGcfahHX+54URiZGkBFazf70JNMtwg/AFW3s=
github.com/charmbracelet/lipgloss v0.10.0/go.mod h1:Wig9DSfvANsxqkRsqj6x87irdy123SR4dOXlKa91ciE=
github.com/charmbracelet/log v0.3.1 h1:TjuY4OBNbxmHWSwO3tosgqs5I3biyY8sQPny/eCMTYw=
github.com/charmbracelet/log v0.3.1/go.mod h1:OR4E1hutLsax3ZKpXbgUqPtTjQfrh1pG3zwHGWuuq8g=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@ -37,14 +37,15 @@ github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1n
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tdewolff/minify/v2 v2.20.15 h1:yXLXcFGDZSFv+dwd+s06IFSPHC+Di01oWUCRxEQyOZ4=
github.com/tdewolff/minify/v2 v2.20.15/go.mod h1:gbDLC/MfU1krLUZAu00h4wxefgtpO5YNy41LesqFLXM=
github.com/tdewolff/parse/v2 v2.7.10 h1:itheTX7WBPxGI1eo+8HQ6QVGH+ubr3Lpv98YJVtqqRo=
github.com/tdewolff/parse/v2 v2.7.10/go.mod h1:3FbJWZp3XT9OWVN3Hmfp0p/a08v4h8J9W1aghka0soA=
github.com/tdewolff/minify/v2 v2.20.19 h1:tX0SR0LUrIqGoLjXnkIzRSIbKJ7PaNnSENLD4CyH6Xo=
github.com/tdewolff/minify/v2 v2.20.19/go.mod h1:ulkFoeAVWMLEyjuDz1ZIWOA31g5aWOawCFRp9R/MudM=
github.com/tdewolff/parse/v2 v2.7.12 h1:tgavkHc2ZDEQVKy1oWxwIyh5bP4F5fEh/JmBwPP/3LQ=
github.com/tdewolff/parse/v2 v2.7.12/go.mod h1:3FbJWZp3XT9OWVN3Hmfp0p/a08v4h8J9W1aghka0soA=
github.com/tdewolff/test v1.0.11-0.20231101010635-f1265d231d52/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739 h1:IkjBCtQOOjIn03u/dMQK9g+Iw9ewps4mCl1nB8Sscbo=
github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739/go.mod h1:XPuWBzvdUzhCuxWO1ojpXsyzsA5bFoS3tO/Q3kFuTG8=

View File

@ -23,3 +23,6 @@ result
# Editors
## nvim
.null-ls*
# Go workspace.
go.work

View File

@ -1 +1 @@
0.2.543
0.2.598

View File

@ -28,6 +28,12 @@ cd cmd/templ
go build
```
### nix-update-gomod2nix
```sh
gomod2nix
```
### install-snapshot
Build and install current version.
@ -87,7 +93,7 @@ GOCOVERDIR=coverage/fmt ./coverage/templ-cover fmt .
GOCOVERDIR=coverage/generate ./coverage/templ-cover generate -include-version=false
GOCOVERDIR=coverage/version ./coverage/templ-cover version
# Run the unit tests.
go test -cover ./... -args -test.gocoverdir="$PWD/coverage/unit"
go test -cover ./... -coverpkg ./... -args -test.gocoverdir="$PWD/coverage/unit"
# Display the combined percentage.
go tool covdata percent -i=./coverage/fmt,./coverage/generate,./coverage/version,./coverage/unit
# Generate a text coverage profile for tooling to use.

View File

@ -1,6 +1,24 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1694529238,
"narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"flake-utils_2": {
"locked": {
"lastModified": 1667395993,
"narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=",
@ -35,18 +53,39 @@
"type": "github"
}
},
"gomod2nix": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1705314449,
"narHash": "sha256-yfQQ67dLejP0FLK76LKHbkzcQqNIrux6MFe32MMFGNQ=",
"owner": "nix-community",
"repo": "gomod2nix",
"rev": "30e3c3a9ec4ac8453282ca7f67fca9e1da12c3e6",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "gomod2nix",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1694422566,
"narHash": "sha256-lHJ+A9esOz9vln/3CJG23FV6Wd2OoOFbDeEs4cMGMqc=",
"lastModified": 1701282334,
"narHash": "sha256-MxCVrXY6v4QmfTwIysjjaX0XUhqBbxTWWB4HXtDYsdk=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "3a2786eea085f040a66ecde1bc3ddc7099f6dbeb",
"rev": "057f9aecfb71c4437d2b27d3323df7f93c010b7e",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"ref": "23.11",
"repo": "nixpkgs",
"type": "github"
}
@ -54,13 +93,29 @@
"root": {
"inputs": {
"gitignore": "gitignore",
"gomod2nix": "gomod2nix",
"nixpkgs": "nixpkgs",
"xc": "xc"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"xc": {
"inputs": {
"flake-utils": "flake-utils",
"flake-utils": "flake-utils_2",
"nixpkgs": [
"nixpkgs"
]

View File

@ -2,7 +2,11 @@
description = "templ";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs?ref=nixos-unstable";
nixpkgs.url = "github:NixOS/nixpkgs/23.11";
gomod2nix = {
url = "github:nix-community/gomod2nix";
inputs.nixpkgs.follows = "nixpkgs";
};
gitignore = {
url = "github:hercules-ci/gitignore.nix";
inputs.nixpkgs.follows = "nixpkgs";
@ -13,7 +17,7 @@
};
};
outputs = { self, nixpkgs, gitignore, xc }:
outputs = { self, nixpkgs, gomod2nix, gitignore, xc }:
let
allSystems = [
"x86_64-linux" # 64-bit Intel/AMD Linux
@ -27,35 +31,41 @@
});
in
{
packages = forAllSystems ({ pkgs, ... }: rec {
default = templ;
packages = forAllSystems ({ system, pkgs, ... }:
let
buildGoApplication = gomod2nix.legacyPackages.${system}.buildGoApplication;
in
rec {
default = templ;
templ = pkgs.buildGo121Module {
name = "templ";
src = gitignore.lib.gitignoreSource ./.;
subPackages = [ "cmd/templ" ];
vendorHash = "sha256-4tHofTnSNI/MBmrGdGsLNoXjxUC0+Gwp3PzzUwfUkQU=";
CGO_ENABLED = 0;
flags = [
"-trimpath"
];
ldflags = [
"-s"
"-w"
"-extldflags -static"
];
};
templ = buildGoApplication {
name = "templ";
src = gitignore.lib.gitignoreSource ./.;
go = pkgs.go_1_21;
# Must be added due to bug https://github.com/nix-community/gomod2nix/issues/120
pwd = ./.;
subPackages = [ "cmd/templ" ];
CGO_ENABLED = 0;
flags = [
"-trimpath"
];
ldflags = [
"-s"
"-w"
"-extldflags -static"
];
};
templ-docs = pkgs.buildNpmPackage {
name = "templ-docs";
src = gitignore.lib.gitignoreSource ./docs;
npmDepsHash = "sha256-i6clvSyHtQEGl2C/wcCXonl1W/Kxq7WPTYH46AhUvDM=";
installPhase = ''
mkdir -p $out/share
cp -r build/ $out/share/docs
'';
};
});
templ-docs = pkgs.buildNpmPackage {
name = "templ-docs";
src = gitignore.lib.gitignoreSource ./docs;
npmDepsHash = "sha256-i6clvSyHtQEGl2C/wcCXonl1W/Kxq7WPTYH46AhUvDM=";
installPhase = ''
mkdir -p $out/share
cp -r build/ $out/share/docs
'';
};
});
# `nix develop` provides a shell containing development tools.
devShell = forAllSystems ({ system, pkgs }:
@ -63,8 +73,10 @@
buildInputs = with pkgs; [
(golangci-lint.override { buildGoModule = buildGo121Module; })
go_1_21
gopls
goreleaser
nodejs
gomod2nix.legacyPackages.${system}.gomod2nix
xc.packages.${system}.xc
];
});

93
vendor/github.com/a-h/templ/gomod2nix.toml generated vendored Normal file
View File

@ -0,0 +1,93 @@
schema = 3
[mod]
[mod."github.com/PuerkitoBio/goquery"]
version = "v1.8.1"
hash = "sha256-z2RaB8PVPEzSJdMUfkfNjT616yXWTjW2gkhNOh989ZU="
[mod."github.com/a-h/htmlformat"]
version = "v0.0.0-20231108124658-5bd994fe268e"
hash = "sha256-YSl9GsXhc0L2oKGZLwwjUtpe5W6ra6kk74zvQdsDCMU="
[mod."github.com/a-h/lexical"]
version = "v0.0.53"
hash = "sha256-2nBWXgbkvl2lzrQmfsuxbOFm6BKdC3BcsFaGgaskhDM="
[mod."github.com/a-h/parse"]
version = "v0.0.0-20240121214402-3caf7543159a"
hash = "sha256-ee/g6xwwhtF7vVt3griUSh96Kz4z0hM5/tpXxHW6PZk="
[mod."github.com/a-h/pathvars"]
version = "v0.0.12"
hash = "sha256-cpQS/ZKaox3oOgoQRfsDiW6Qi+i7M/0z/IvtEOt63Ng="
[mod."github.com/a-h/protocol"]
version = "v0.0.0-20230224160810-b4eec67c1c22"
hash = "sha256-aoxNdsVfyXtiKv3mkEqF63bBEHd6thX7tC/7a1c8rcQ="
[mod."github.com/andybalholm/cascadia"]
version = "v1.3.1"
hash = "sha256-M0u22DXSeXUaYtl1KoW1qWL46niFpycFkraCEQ/luYA="
[mod."github.com/cenkalti/backoff/v4"]
version = "v4.2.1"
hash = "sha256-CKogmPe0pCcAdpztzPwr24rLTJZfq8QVZ9AUduwAcoA="
[mod."github.com/cli/browser"]
version = "v1.2.0"
hash = "sha256-EWaXC2N2LtRFUxsA1PGA0vNGqiTlAolgBP4PlVKSyco="
[mod."github.com/fatih/color"]
version = "v1.16.0"
hash = "sha256-Aq/SM28aPJVzvapllQ64R/DM4aZ5CHPewcm/AUJPyJQ="
[mod."github.com/fsnotify/fsnotify"]
version = "v1.7.0"
hash = "sha256-MdT2rQyQHspPJcx6n9ozkLbsktIOJutOqDuKpNAtoZY="
[mod."github.com/google/go-cmp"]
version = "v0.6.0"
hash = "sha256-qgra5jze4iPGP0JSTVeY5qV5AvEnEu39LYAuUCIkMtg="
[mod."github.com/mattn/go-colorable"]
version = "v0.1.13"
hash = "sha256-qb3Qbo0CELGRIzvw7NVM1g/aayaz4Tguppk9MD2/OI8="
[mod."github.com/mattn/go-isatty"]
version = "v0.0.20"
hash = "sha256-qhw9hWtU5wnyFyuMbKx+7RB8ckQaFQ8D+8GKPkN3HHQ="
[mod."github.com/natefinch/atomic"]
version = "v1.0.1"
hash = "sha256-fbOVHCwRNI8PFjC4o0YXpKZO0JU2aWTfH5c7WXXKMHg="
[mod."github.com/pkg/errors"]
version = "v0.9.1"
hash = "sha256-mNfQtcrQmu3sNg/7IwiieKWOgFQOVVe2yXgKBpe/wZw="
[mod."github.com/rs/cors"]
version = "v1.8.3"
hash = "sha256-VgVB4HKAhPSjNg96mIEUN1bt5ZQng8Fi3ZABy3CDWQE="
[mod."github.com/segmentio/asm"]
version = "v1.2.0"
hash = "sha256-zbNuKxNrUDUc6IlmRQNuJQzVe5Ol/mqp7srDg9IMMqs="
[mod."github.com/segmentio/encoding"]
version = "v0.3.6"
hash = "sha256-XrxOwMPaY3tRdZkQr9Njw5hI1HeheGU3uI4DbH8bb7w="
[mod."github.com/stretchr/testify"]
version = "v1.8.4"
hash = "sha256-MoOmRzbz9QgiJ+OOBo5h5/LbilhJfRUryvzHJmXAWjo="
[mod."go.lsp.dev/jsonrpc2"]
version = "v0.10.0"
hash = "sha256-RbRsMYVBLR7ZDHHGMooycrkdbIauMXkQjVOGP7ggSgM="
[mod."go.lsp.dev/pkg"]
version = "v0.0.0-20210717090340-384b27a52fb2"
hash = "sha256-TxS0Iqe1wbIaFe7MWZJRQdgqhKE8i8CggaGSV9zU1Vg="
[mod."go.lsp.dev/uri"]
version = "v0.3.0"
hash = "sha256-jGP0N7Gf+bql5oJraUo33sXqWg7AKOTj0D8b4paV4dc="
[mod."go.uber.org/atomic"]
version = "v1.10.0"
hash = "sha256-E6UEDc1eh/cLUFd+J86cDesQ0B8wEv/DdaAVKb+x2t8="
[mod."go.uber.org/multierr"]
version = "v1.9.0"
hash = "sha256-tlDRooh/V4HDhZohsUrxot/Y6uVInVBtRWCZbj/tPds="
[mod."go.uber.org/zap"]
version = "v1.24.0"
hash = "sha256-yLzjFbMWnc5b033gcPLGP0KY1xWPJ3sjnUG/RndmC3o="
[mod."golang.org/x/mod"]
version = "v0.12.0"
hash = "sha256-M/oXnzm7odpJdQzEnG6W0pNYtl0uhOM/l7qgfGVpU2M="
[mod."golang.org/x/net"]
version = "v0.19.0"
hash = "sha256-3M5rKEvJx4cO/q+06cGjR5sxF5JpnUWY0+fQttrWdT4="
[mod."golang.org/x/sys"]
version = "v0.15.0"
hash = "sha256-n7TlABF6179RzGq3gctPDKDPRtDfnwPdjNCMm8ps2KY="
[mod."golang.org/x/tools"]
version = "v0.13.0"
hash = "sha256-OCgLOwia8fNHxfdogXVApf0/qK6jE2ukegOx7lkOzfo="

View File

@ -220,7 +220,7 @@ pattern to the margin and padding shorthand functions.
lipgloss.NewStyle().
Border(lipgloss.ThickBorder(), true, false)
// Add a thick border to the right and bottom sides. Rules are set clockwise
// Add a double border to the top and left sides. Rules are set clockwise
// from top.
lipgloss.NewStyle().
Border(lipgloss.DoubleBorder(), true, false, false, true)
@ -461,7 +461,7 @@ fmt.Println(t)
![Table Example](https://github.com/charmbracelet/lipgloss/assets/42545625/6e4b70c4-f494-45da-a467-bdd27df30d5d)
For more on tables see [the docs](https://pkg.go.dev/github.com/charmbracelet/lipgloss?tab=doc).
For more on tables see [the docs](https://pkg.go.dev/github.com/charmbracelet/lipgloss?tab=doc) and [examples](https://github.com/charmbracelet/lipgloss/tree/master/examples/table).
***

View File

@ -69,7 +69,7 @@ func alignTextVertical(str string, pos Position, height int, _ *termenv.Style) s
case Top:
return str + strings.Repeat("\n", height-strHeight)
case Center:
var topPadding, bottomPadding = (height - strHeight) / 2, (height - strHeight) / 2
topPadding, bottomPadding := (height-strHeight)/2, (height-strHeight)/2 //nolint:gomnd
if strHeight+topPadding+bottomPadding > height {
topPadding--
} else if strHeight+topPadding+bottomPadding < height {

View File

@ -3,9 +3,9 @@ package lipgloss
import (
"strings"
"github.com/mattn/go-runewidth"
"github.com/muesli/reflow/ansi"
"github.com/muesli/termenv"
"github.com/rivo/uniseg"
)
// Border contains a series of values which comprise the various parts of a
@ -411,7 +411,7 @@ func (s Style) styleBorder(border string, fg, bg TerminalColor) string {
return border
}
var style = termenv.Style{}
style := termenv.Style{}
if fg != noColor {
style = style.Foreground(fg.color(s.r))
@ -423,13 +423,18 @@ func (s Style) styleBorder(border string, fg, bg TerminalColor) string {
return style.Styled(border)
}
func maxRuneWidth(str string) (width int) {
for _, r := range str {
w := runewidth.RuneWidth(r)
func maxRuneWidth(str string) int {
var width int
state := -1
for len(str) > 0 {
var w int
_, str, w, state = uniseg.FirstGraphemeClusterInString(str, state)
if w > width {
width = w
}
}
return width
}

View File

@ -408,6 +408,12 @@ func (s Style) GetFrameSize() (x, y int) {
return s.GetHorizontalFrameSize(), s.GetVerticalFrameSize()
}
// GetTransform returns the transform set on the style. If no transform is set
// nil is returned.
func (s Style) GetTransform() func(string) string {
return s.getAsTransform(transformKey)
}
// Returns whether or not the given property is set.
func (s Style) isSet(k propKey) bool {
_, exists := s.rules[k]
@ -469,6 +475,17 @@ func (s Style) getBorderStyle() Border {
return noBorder
}
func (s Style) getAsTransform(k propKey) func(string) string {
v, ok := s.rules[k]
if !ok {
return nil
}
if fn, ok := v.(func(string) string); ok {
return fn
}
return nil
}
// Split a string into lines, additionally returning the size of the widest
// line.
func getLines(s string) (lines []string, widest int) {

View File

@ -115,7 +115,7 @@ func (s Style) Height(i int) Style {
//
// With one argument, the position value is applied to the horizontal alignment.
//
// With two arguments, the value is applied to the vertical and horizontal
// With two arguments, the value is applied to the horizontal and vertical
// alignments, in that order.
func (s Style) Align(p ...Position) Style {
if len(p) > 0 {
@ -544,6 +544,18 @@ func (s Style) StrikethroughSpaces(v bool) Style {
return s
}
// Transform applies a given function to a string at render time, allowing for
// the string being rendered to be manipuated.
//
// Example:
//
// s := NewStyle().Transform(strings.ToUpper)
// fmt.Println(s.Render("raow!") // "RAOW!"
func (s Style) Transform(fn func(string) string) Style {
s.set(transformKey, fn)
return s
}
// Renderer sets the renderer for the style. This is useful for changing the
// renderer for a style that is being used in a different context.
func (s Style) Renderer(r *Renderer) Style {

View File

@ -73,6 +73,8 @@ const (
tabWidthKey
underlineSpacesKey
strikethroughSpacesKey
transformKey
)
// A set of properties.
@ -225,6 +227,8 @@ func (s Style) Render(strs ...string) string {
// Do we need to style spaces separately?
useSpaceStyler = underlineSpaces || strikethroughSpaces
transform = s.getAsTransform(transformKey)
)
if len(s.rules) == 0 {
@ -401,6 +405,10 @@ func (s Style) Render(strs ...string) string {
str = strings.Join(lines[:min(maxHeight, len(lines))], "\n")
}
if transform != nil {
return transform(str)
}
return str
}
@ -456,36 +464,23 @@ func (s Style) applyMargins(str string, inline bool) string {
// Apply left padding.
func padLeft(str string, n int, style *termenv.Style) string {
if n == 0 {
return str
}
sp := strings.Repeat(" ", n)
if style != nil {
sp = style.Styled(sp)
}
b := strings.Builder{}
l := strings.Split(str, "\n")
for i := range l {
b.WriteString(sp)
b.WriteString(l[i])
if i != len(l)-1 {
b.WriteRune('\n')
}
}
return b.String()
return pad(str, -n, style)
}
// Apply right padding.
func padRight(str string, n int, style *termenv.Style) string {
if n == 0 || str == "" {
return pad(str, n, style)
}
// pad adds padding to either the left or right side of a string.
// Positive values add to the right side while negative values
// add to the left side.
func pad(str string, n int, style *termenv.Style) string {
if n == 0 {
return str
}
sp := strings.Repeat(" ", n)
sp := strings.Repeat(" ", abs(n))
if style != nil {
sp = style.Styled(sp)
}
@ -494,8 +489,17 @@ func padRight(str string, n int, style *termenv.Style) string {
l := strings.Split(str, "\n")
for i := range l {
b.WriteString(l[i])
b.WriteString(sp)
switch {
// pad right
case n > 0:
b.WriteString(l[i])
b.WriteString(sp)
// pad left
default:
b.WriteString(sp)
b.WriteString(l[i])
}
if i != len(l)-1 {
b.WriteRune('\n')
}
@ -517,3 +521,11 @@ func min(a, b int) int {
}
return b
}
func abs(a int) int {
if a < 0 {
return -a
}
return a
}

View File

@ -305,6 +305,12 @@ func (s Style) UnsetStrikethroughSpaces() Style {
return s
}
// UnsetTransform removes the value set by Transform.
func (s Style) UnsetTransform() Style {
delete(s.rules, transformKey)
return s
}
// UnsetString sets the underlying string value to the empty string.
func (s Style) UnsetString() Style {
s.value = ""

View File

@ -1,15 +1,15 @@
# Unicode Text Segmentation for Go
[![Godoc Reference](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/rivo/uniseg)
[![Go Reference](https://pkg.go.dev/badge/github.com/rivo/uniseg.svg)](https://pkg.go.dev/github.com/rivo/uniseg)
[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg)
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](http://unicode.org/reports/tr29/) (Unicode version 12.0.0).
At this point, only the determination of grapheme cluster boundaries is implemented.
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 15.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html).
## Background
In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples:
### Grapheme Clusters
In Go, [strings are read-only slices of bytes](https://go.dev/blog/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples:
|String|Bytes (UTF-8)|Code points (runes)|Grapheme clusters|
|-|-|-|-|
@ -17,7 +17,23 @@ In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings).
|🏳️‍🌈|14 bytes: `f0 9f 8f b3 ef b8 8f e2 80 8d f0 9f 8c 88`|4 code points: `1f3f3 fe0f 200d 1f308`|1 cluster: `[1f3f3 fe0f 200d 1f308]`|
|🇩🇪|8 bytes: `f0 9f 87 a9 f0 9f 87 aa`|2 code points: `1f1e9 1f1ea`|1 cluster: `[1f1e9 1f1ea]`|
This package provides a tool to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit.
This package provides tools to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit.
### Word Boundaries
Word boundaries are used in a number of different contexts. The most familiar ones are selection (double-click mouse selection), cursor movement ("move to next word" control-arrow keys), and the dialog option "Whole Word Search" for search and replace. They are also used in database queries, to determine whether elements are within a certain number of words of one another. Searching may also use word boundaries in determining matching items. This package provides tools to determine word boundaries within strings.
### Sentence Boundaries
Sentence boundaries are often used for triple-click or some other method of selecting or iterating through blocks of text that are larger than single words. They are also used to determine whether words occur within the same sentence in database queries. This package provides tools to determine sentence boundaries within strings.
### Line Breaking
Line breaking, also known as word wrapping, is the process of breaking a section of text into lines such that it will fit in the available width of a page, window or other display area. This package provides tools to determine where a string may or may not be broken and where it must be broken (for example after newline characters).
### Monospace Width
Most terminals or text displays / text editors using a monospace font (for example source code editors) use a fixed width for each character. Some characters such as emojis or characters found in Asian and other languages may take up more than one character cell. This package provides tools to determine the number of cells a string will take up when displayed in a monospace font. See [here](https://pkg.go.dev/github.com/rivo/uniseg#hdr-Monospace_Width) for more information.
## Installation
@ -25,38 +41,97 @@ This package provides a tool to iterate over these grapheme clusters. This may b
go get github.com/rivo/uniseg
```
## Basic Example
## Examples
### Counting Characters in a String
```go
package uniseg
n := uniseg.GraphemeClusterCount("🇩🇪🏳️‍🌈")
fmt.Println(n)
// 2
```
import (
"fmt"
### Calculating the Monospace String Width
"github.com/rivo/uniseg"
)
```go
width := uniseg.StringWidth("🇩🇪🏳️‍🌈!")
fmt.Println(width)
// 5
```
func main() {
gr := uniseg.NewGraphemes("👍🏼!")
for gr.Next() {
fmt.Printf("%x ", gr.Runes())
}
// Output: [1f44d 1f3fc] [21]
### Using the [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) Class
This is the most convenient method of iterating over grapheme clusters:
```go
gr := uniseg.NewGraphemes("👍🏼!")
for gr.Next() {
fmt.Printf("%x ", gr.Runes())
}
// [1f44d 1f3fc] [21]
```
### Using the [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) Function
This avoids allocating a new `Graphemes` object but it requires the handling of states and boundaries:
```go
str := "🇩🇪🏳️‍🌈"
state := -1
var c string
for len(str) > 0 {
c, str, _, state = uniseg.StepString(str, state)
fmt.Printf("%x ", []rune(c))
}
// [1f1e9 1f1ea] [1f3f3 fe0f 200d 1f308]
```
### Advanced Examples
The [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) class offers the most convenient way to access all functionality of this package. But in some cases, it may be better to use the specialized functions directly. For example, if you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString):
```go
str := "Hello, world!"
state := -1
var c string
for len(str) > 0 {
c, str, state = uniseg.FirstWordInString(str, state)
fmt.Printf("(%s)\n", c)
}
// (Hello)
// (,)
// ( )
// (world)
// (!)
```
Similarly, use
- [`FirstGraphemeCluster`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeCluster) or [`FirstGraphemeClusterInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeClusterInString) for grapheme cluster determination only,
- [`FirstSentence`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentence) or [`FirstSentenceInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentenceInString) for sentence segmentation only, and
- [`FirstLineSegment`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegment) or [`FirstLineSegmentInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegmentInString) for line breaking / word wrapping (although using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) is preferred as it will observe grapheme cluster boundaries).
If you're only interested in the width of characters, use [`FirstGraphemeCluster`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeCluster) or [`FirstGraphemeClusterInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeClusterInString). It is much faster than using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step), [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString), or the [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) class because it does not include the logic for word / sentence / line boundaries.
Finally, if you need to reverse a string while preserving grapheme clusters, use [`ReverseString`](https://pkg.go.dev/github.com/rivo/uniseg#ReverseString):
```go
fmt.Println(uniseg.ReverseString("🇩🇪🏳️‍🌈"))
// 🏳️‍🌈🇩🇪
```
## Documentation
Refer to https://godoc.org/github.com/rivo/uniseg for the package's documentation.
Refer to https://pkg.go.dev/github.com/rivo/uniseg for the package's documentation.
## Dependencies
This package does not depend on any packages outside the standard library.
## Sponsor this Project
[Become a Sponsor on GitHub](https://github.com/sponsors/rivo?metadata_source=uniseg_readme) to support this project!
## Your Feedback
Add your issue here on GitHub. Feel free to get in touch if you have any questions.
## Version
Version tags will be introduced once Golang modules are official. Consider this version 0.1.
Add your issue here on GitHub, preferably before submitting any PR's. Feel free to get in touch if you have any questions.

108
vendor/github.com/rivo/uniseg/doc.go generated vendored
View File

@ -1,8 +1,108 @@
/*
Package uniseg implements Unicode Text Segmentation according to Unicode
Standard Annex #29 (http://unicode.org/reports/tr29/).
Package uniseg implements Unicode Text Segmentation, Unicode Line Breaking, and
string width calculation for monospace fonts. Unicode Text Segmentation conforms
to Unicode Standard Annex #29 (https://unicode.org/reports/tr29/) and Unicode
Line Breaking conforms to Unicode Standard Annex #14
(https://unicode.org/reports/tr14/).
At this point, only the determination of grapheme cluster boundaries is
implemented.
In short, using this package, you can split a string into grapheme clusters
(what people would usually refer to as a "character"), into words, and into
sentences. Or, in its simplest case, this package allows you to count the number
of characters in a string, especially when it contains complex characters such
as emojis, combining characters, or characters from Asian, Arabic, Hebrew, or
other languages. Additionally, you can use it to implement line breaking (or
"word wrapping"), that is, to determine where text can be broken over to the
next line when the width of the line is not big enough to fit the entire text.
Finally, you can use it to calculate the display width of a string for monospace
fonts.
# Getting Started
If you just want to count the number of characters in a string, you can use
[GraphemeClusterCount]. If you want to determine the display width of a string,
you can use [StringWidth]. If you want to iterate over a string, you can use
[Step], [StepString], or the [Graphemes] class (more convenient but less
performant). This will provide you with all information: grapheme clusters,
word boundaries, sentence boundaries, line breaks, and monospace character
widths. The specialized functions [FirstGraphemeCluster],
[FirstGraphemeClusterInString], [FirstWord], [FirstWordInString],
[FirstSentence], and [FirstSentenceInString] can be used if only one type of
information is needed.
# Grapheme Clusters
Consider the rainbow flag emoji: 🏳🌈. On most modern systems, it appears as one
character. But its string representation actually has 14 bytes, so counting
bytes (or using len("🏳️‍🌈")) will not work as expected. Counting runes won't,
either: The flag has 4 Unicode code points, thus 4 runes. The stdlib function
utf8.RuneCountInString("🏳️‍🌈") and len([]rune("🏳️‍🌈")) will both return 4.
The [GraphemeClusterCount] function will return 1 for the rainbow flag emoji.
The Graphemes class and a variety of functions in this package will allow you to
split strings into its grapheme clusters.
# Word Boundaries
Word boundaries are used in a number of different contexts. The most familiar
ones are selection (double-click mouse selection), cursor movement ("move to
next word" control-arrow keys), and the dialog option "Whole Word Search" for
search and replace. This package provides methods for determining word
boundaries.
# Sentence Boundaries
Sentence boundaries are often used for triple-click or some other method of
selecting or iterating through blocks of text that are larger than single words.
They are also used to determine whether words occur within the same sentence in
database queries. This package provides methods for determining sentence
boundaries.
# Line Breaking
Line breaking, also known as word wrapping, is the process of breaking a section
of text into lines such that it will fit in the available width of a page,
window or other display area. This package provides methods to determine the
positions in a string where a line must be broken, may be broken, or must not be
broken.
# Monospace Width
Monospace width, as referred to in this package, is the width of a string in a
monospace font. This is commonly used in terminal user interfaces or text
displays or editors that don't support proportional fonts. A width of 1
corresponds to a single character cell. The C function [wcswidth()] and its
implementation in other programming languages is in widespread use for the same
purpose. However, there is no standard for the calculation of such widths, and
this package differs from wcswidth() in a number of ways, presumably to generate
more visually pleasing results.
To start, we assume that every code point has a width of 1, with the following
exceptions:
- Code points with grapheme cluster break properties Control, CR, LF, Extend,
and ZWJ have a width of 0.
- U+2E3A, Two-Em Dash, has a width of 3.
- U+2E3B, Three-Em Dash, has a width of 4.
- Characters with the East-Asian Width properties "Fullwidth" (F) and "Wide"
(W) have a width of 2. (Properties "Ambiguous" (A) and "Neutral" (N) both
have a width of 1.)
- Code points with grapheme cluster break property Regional Indicator have a
width of 2.
- Code points with grapheme cluster break property Extended Pictographic have
a width of 2, unless their Emoji Presentation flag is "No", in which case
the width is 1.
For Hangul grapheme clusters composed of conjoining Jamo and for Regional
Indicators (flags), all code points except the first one have a width of 0. For
grapheme clusters starting with an Extended Pictographic, any additional code
point will force a total width of 2, except if the Variation Selector-15
(U+FE0E) is included, in which case the total width is always 1. Grapheme
clusters ending with Variation Selector-16 (U+FE0F) have a width of 2.
Note that whether these widths appear correct depends on your application's
render engine, to which extent it conforms to the Unicode Standard, and its
choice of font.
[wcswidth()]: https://man7.org/linux/man-pages/man3/wcswidth.3.html
*/
package uniseg

2588
vendor/github.com/rivo/uniseg/eastasianwidth.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

295
vendor/github.com/rivo/uniseg/emojipresentation.go generated vendored Normal file
View File

@ -0,0 +1,295 @@
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// emojiPresentation are taken from
//
// and
// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only)
// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var emojiPresentation = [][3]int{
{0x231A, 0x231B, prEmojiPresentation}, // E0.6 [2] (⌚..⌛) watch..hourglass done
{0x23E9, 0x23EC, prEmojiPresentation}, // E0.6 [4] (⏩..⏬) fast-forward button..fast down button
{0x23F0, 0x23F0, prEmojiPresentation}, // E0.6 [1] (⏰) alarm clock
{0x23F3, 0x23F3, prEmojiPresentation}, // E0.6 [1] (⏳) hourglass not done
{0x25FD, 0x25FE, prEmojiPresentation}, // E0.6 [2] (◽..◾) white medium-small square..black medium-small square
{0x2614, 0x2615, prEmojiPresentation}, // E0.6 [2] (☔..☕) umbrella with rain drops..hot beverage
{0x2648, 0x2653, prEmojiPresentation}, // E0.6 [12] (♈..♓) Aries..Pisces
{0x267F, 0x267F, prEmojiPresentation}, // E0.6 [1] (♿) wheelchair symbol
{0x2693, 0x2693, prEmojiPresentation}, // E0.6 [1] (⚓) anchor
{0x26A1, 0x26A1, prEmojiPresentation}, // E0.6 [1] (⚡) high voltage
{0x26AA, 0x26AB, prEmojiPresentation}, // E0.6 [2] (⚪..⚫) white circle..black circle
{0x26BD, 0x26BE, prEmojiPresentation}, // E0.6 [2] (⚽..⚾) soccer ball..baseball
{0x26C4, 0x26C5, prEmojiPresentation}, // E0.6 [2] (⛄..⛅) snowman without snow..sun behind cloud
{0x26CE, 0x26CE, prEmojiPresentation}, // E0.6 [1] (⛎) Ophiuchus
{0x26D4, 0x26D4, prEmojiPresentation}, // E0.6 [1] (⛔) no entry
{0x26EA, 0x26EA, prEmojiPresentation}, // E0.6 [1] (⛪) church
{0x26F2, 0x26F3, prEmojiPresentation}, // E0.6 [2] (⛲..⛳) fountain..flag in hole
{0x26F5, 0x26F5, prEmojiPresentation}, // E0.6 [1] (⛵) sailboat
{0x26FA, 0x26FA, prEmojiPresentation}, // E0.6 [1] (⛺) tent
{0x26FD, 0x26FD, prEmojiPresentation}, // E0.6 [1] (⛽) fuel pump
{0x2705, 0x2705, prEmojiPresentation}, // E0.6 [1] (✅) check mark button
{0x270A, 0x270B, prEmojiPresentation}, // E0.6 [2] (✊..✋) raised fist..raised hand
{0x2728, 0x2728, prEmojiPresentation}, // E0.6 [1] (✨) sparkles
{0x274C, 0x274C, prEmojiPresentation}, // E0.6 [1] (❌) cross mark
{0x274E, 0x274E, prEmojiPresentation}, // E0.6 [1] (❎) cross mark button
{0x2753, 0x2755, prEmojiPresentation}, // E0.6 [3] (❓..❕) red question mark..white exclamation mark
{0x2757, 0x2757, prEmojiPresentation}, // E0.6 [1] (❗) red exclamation mark
{0x2795, 0x2797, prEmojiPresentation}, // E0.6 [3] (..➗) plus..divide
{0x27B0, 0x27B0, prEmojiPresentation}, // E0.6 [1] (➰) curly loop
{0x27BF, 0x27BF, prEmojiPresentation}, // E1.0 [1] (➿) double curly loop
{0x2B1B, 0x2B1C, prEmojiPresentation}, // E0.6 [2] (⬛..⬜) black large square..white large square
{0x2B50, 0x2B50, prEmojiPresentation}, // E0.6 [1] (⭐) star
{0x2B55, 0x2B55, prEmojiPresentation}, // E0.6 [1] (⭕) hollow red circle
{0x1F004, 0x1F004, prEmojiPresentation}, // E0.6 [1] (🀄) mahjong red dragon
{0x1F0CF, 0x1F0CF, prEmojiPresentation}, // E0.6 [1] (🃏) joker
{0x1F18E, 0x1F18E, prEmojiPresentation}, // E0.6 [1] (🆎) AB button (blood type)
{0x1F191, 0x1F19A, prEmojiPresentation}, // E0.6 [10] (🆑..🆚) CL button..VS button
{0x1F1E6, 0x1F1FF, prEmojiPresentation}, // E0.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z
{0x1F201, 0x1F201, prEmojiPresentation}, // E0.6 [1] (🈁) Japanese “here” button
{0x1F21A, 0x1F21A, prEmojiPresentation}, // E0.6 [1] (🈚) Japanese “free of charge” button
{0x1F22F, 0x1F22F, prEmojiPresentation}, // E0.6 [1] (🈯) Japanese “reserved” button
{0x1F232, 0x1F236, prEmojiPresentation}, // E0.6 [5] (🈲..🈶) Japanese “prohibited” button..Japanese “not free of charge” button
{0x1F238, 0x1F23A, prEmojiPresentation}, // E0.6 [3] (🈸..🈺) Japanese “application” button..Japanese “open for business” button
{0x1F250, 0x1F251, prEmojiPresentation}, // E0.6 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button
{0x1F300, 0x1F30C, prEmojiPresentation}, // E0.6 [13] (🌀..🌌) cyclone..milky way
{0x1F30D, 0x1F30E, prEmojiPresentation}, // E0.7 [2] (🌍..🌎) globe showing Europe-Africa..globe showing Americas
{0x1F30F, 0x1F30F, prEmojiPresentation}, // E0.6 [1] (🌏) globe showing Asia-Australia
{0x1F310, 0x1F310, prEmojiPresentation}, // E1.0 [1] (🌐) globe with meridians
{0x1F311, 0x1F311, prEmojiPresentation}, // E0.6 [1] (🌑) new moon
{0x1F312, 0x1F312, prEmojiPresentation}, // E1.0 [1] (🌒) waxing crescent moon
{0x1F313, 0x1F315, prEmojiPresentation}, // E0.6 [3] (🌓..🌕) first quarter moon..full moon
{0x1F316, 0x1F318, prEmojiPresentation}, // E1.0 [3] (🌖..🌘) waning gibbous moon..waning crescent moon
{0x1F319, 0x1F319, prEmojiPresentation}, // E0.6 [1] (🌙) crescent moon
{0x1F31A, 0x1F31A, prEmojiPresentation}, // E1.0 [1] (🌚) new moon face
{0x1F31B, 0x1F31B, prEmojiPresentation}, // E0.6 [1] (🌛) first quarter moon face
{0x1F31C, 0x1F31C, prEmojiPresentation}, // E0.7 [1] (🌜) last quarter moon face
{0x1F31D, 0x1F31E, prEmojiPresentation}, // E1.0 [2] (🌝..🌞) full moon face..sun with face
{0x1F31F, 0x1F320, prEmojiPresentation}, // E0.6 [2] (🌟..🌠) glowing star..shooting star
{0x1F32D, 0x1F32F, prEmojiPresentation}, // E1.0 [3] (🌭..🌯) hot dog..burrito
{0x1F330, 0x1F331, prEmojiPresentation}, // E0.6 [2] (🌰..🌱) chestnut..seedling
{0x1F332, 0x1F333, prEmojiPresentation}, // E1.0 [2] (🌲..🌳) evergreen tree..deciduous tree
{0x1F334, 0x1F335, prEmojiPresentation}, // E0.6 [2] (🌴..🌵) palm tree..cactus
{0x1F337, 0x1F34A, prEmojiPresentation}, // E0.6 [20] (🌷..🍊) tulip..tangerine
{0x1F34B, 0x1F34B, prEmojiPresentation}, // E1.0 [1] (🍋) lemon
{0x1F34C, 0x1F34F, prEmojiPresentation}, // E0.6 [4] (🍌..🍏) banana..green apple
{0x1F350, 0x1F350, prEmojiPresentation}, // E1.0 [1] (🍐) pear
{0x1F351, 0x1F37B, prEmojiPresentation}, // E0.6 [43] (🍑..🍻) peach..clinking beer mugs
{0x1F37C, 0x1F37C, prEmojiPresentation}, // E1.0 [1] (🍼) baby bottle
{0x1F37E, 0x1F37F, prEmojiPresentation}, // E1.0 [2] (🍾..🍿) bottle with popping cork..popcorn
{0x1F380, 0x1F393, prEmojiPresentation}, // E0.6 [20] (🎀..🎓) ribbon..graduation cap
{0x1F3A0, 0x1F3C4, prEmojiPresentation}, // E0.6 [37] (🎠..🏄) carousel horse..person surfing
{0x1F3C5, 0x1F3C5, prEmojiPresentation}, // E1.0 [1] (🏅) sports medal
{0x1F3C6, 0x1F3C6, prEmojiPresentation}, // E0.6 [1] (🏆) trophy
{0x1F3C7, 0x1F3C7, prEmojiPresentation}, // E1.0 [1] (🏇) horse racing
{0x1F3C8, 0x1F3C8, prEmojiPresentation}, // E0.6 [1] (🏈) american football
{0x1F3C9, 0x1F3C9, prEmojiPresentation}, // E1.0 [1] (🏉) rugby football
{0x1F3CA, 0x1F3CA, prEmojiPresentation}, // E0.6 [1] (🏊) person swimming
{0x1F3CF, 0x1F3D3, prEmojiPresentation}, // E1.0 [5] (🏏..🏓) cricket game..ping pong
{0x1F3E0, 0x1F3E3, prEmojiPresentation}, // E0.6 [4] (🏠..🏣) house..Japanese post office
{0x1F3E4, 0x1F3E4, prEmojiPresentation}, // E1.0 [1] (🏤) post office
{0x1F3E5, 0x1F3F0, prEmojiPresentation}, // E0.6 [12] (🏥..🏰) hospital..castle
{0x1F3F4, 0x1F3F4, prEmojiPresentation}, // E1.0 [1] (🏴) black flag
{0x1F3F8, 0x1F407, prEmojiPresentation}, // E1.0 [16] (🏸..🐇) badminton..rabbit
{0x1F408, 0x1F408, prEmojiPresentation}, // E0.7 [1] (🐈) cat
{0x1F409, 0x1F40B, prEmojiPresentation}, // E1.0 [3] (🐉..🐋) dragon..whale
{0x1F40C, 0x1F40E, prEmojiPresentation}, // E0.6 [3] (🐌..🐎) snail..horse
{0x1F40F, 0x1F410, prEmojiPresentation}, // E1.0 [2] (🐏..🐐) ram..goat
{0x1F411, 0x1F412, prEmojiPresentation}, // E0.6 [2] (🐑..🐒) ewe..monkey
{0x1F413, 0x1F413, prEmojiPresentation}, // E1.0 [1] (🐓) rooster
{0x1F414, 0x1F414, prEmojiPresentation}, // E0.6 [1] (🐔) chicken
{0x1F415, 0x1F415, prEmojiPresentation}, // E0.7 [1] (🐕) dog
{0x1F416, 0x1F416, prEmojiPresentation}, // E1.0 [1] (🐖) pig
{0x1F417, 0x1F429, prEmojiPresentation}, // E0.6 [19] (🐗..🐩) boar..poodle
{0x1F42A, 0x1F42A, prEmojiPresentation}, // E1.0 [1] (🐪) camel
{0x1F42B, 0x1F43E, prEmojiPresentation}, // E0.6 [20] (🐫..🐾) two-hump camel..paw prints
{0x1F440, 0x1F440, prEmojiPresentation}, // E0.6 [1] (👀) eyes
{0x1F442, 0x1F464, prEmojiPresentation}, // E0.6 [35] (👂..👤) ear..bust in silhouette
{0x1F465, 0x1F465, prEmojiPresentation}, // E1.0 [1] (👥) busts in silhouette
{0x1F466, 0x1F46B, prEmojiPresentation}, // E0.6 [6] (👦..👫) boy..woman and man holding hands
{0x1F46C, 0x1F46D, prEmojiPresentation}, // E1.0 [2] (👬..👭) men holding hands..women holding hands
{0x1F46E, 0x1F4AC, prEmojiPresentation}, // E0.6 [63] (👮..💬) police officer..speech balloon
{0x1F4AD, 0x1F4AD, prEmojiPresentation}, // E1.0 [1] (💭) thought balloon
{0x1F4AE, 0x1F4B5, prEmojiPresentation}, // E0.6 [8] (💮..💵) white flower..dollar banknote
{0x1F4B6, 0x1F4B7, prEmojiPresentation}, // E1.0 [2] (💶..💷) euro banknote..pound banknote
{0x1F4B8, 0x1F4EB, prEmojiPresentation}, // E0.6 [52] (💸..📫) money with wings..closed mailbox with raised flag
{0x1F4EC, 0x1F4ED, prEmojiPresentation}, // E0.7 [2] (📬..📭) open mailbox with raised flag..open mailbox with lowered flag
{0x1F4EE, 0x1F4EE, prEmojiPresentation}, // E0.6 [1] (📮) postbox
{0x1F4EF, 0x1F4EF, prEmojiPresentation}, // E1.0 [1] (📯) postal horn
{0x1F4F0, 0x1F4F4, prEmojiPresentation}, // E0.6 [5] (📰..📴) newspaper..mobile phone off
{0x1F4F5, 0x1F4F5, prEmojiPresentation}, // E1.0 [1] (📵) no mobile phones
{0x1F4F6, 0x1F4F7, prEmojiPresentation}, // E0.6 [2] (📶..📷) antenna bars..camera
{0x1F4F8, 0x1F4F8, prEmojiPresentation}, // E1.0 [1] (📸) camera with flash
{0x1F4F9, 0x1F4FC, prEmojiPresentation}, // E0.6 [4] (📹..📼) video camera..videocassette
{0x1F4FF, 0x1F502, prEmojiPresentation}, // E1.0 [4] (📿..🔂) prayer beads..repeat single button
{0x1F503, 0x1F503, prEmojiPresentation}, // E0.6 [1] (🔃) clockwise vertical arrows
{0x1F504, 0x1F507, prEmojiPresentation}, // E1.0 [4] (🔄..🔇) counterclockwise arrows button..muted speaker
{0x1F508, 0x1F508, prEmojiPresentation}, // E0.7 [1] (🔈) speaker low volume
{0x1F509, 0x1F509, prEmojiPresentation}, // E1.0 [1] (🔉) speaker medium volume
{0x1F50A, 0x1F514, prEmojiPresentation}, // E0.6 [11] (🔊..🔔) speaker high volume..bell
{0x1F515, 0x1F515, prEmojiPresentation}, // E1.0 [1] (🔕) bell with slash
{0x1F516, 0x1F52B, prEmojiPresentation}, // E0.6 [22] (🔖..🔫) bookmark..water pistol
{0x1F52C, 0x1F52D, prEmojiPresentation}, // E1.0 [2] (🔬..🔭) microscope..telescope
{0x1F52E, 0x1F53D, prEmojiPresentation}, // E0.6 [16] (🔮..🔽) crystal ball..downwards button
{0x1F54B, 0x1F54E, prEmojiPresentation}, // E1.0 [4] (🕋..🕎) kaaba..menorah
{0x1F550, 0x1F55B, prEmojiPresentation}, // E0.6 [12] (🕐..🕛) one oclock..twelve oclock
{0x1F55C, 0x1F567, prEmojiPresentation}, // E0.7 [12] (🕜..🕧) one-thirty..twelve-thirty
{0x1F57A, 0x1F57A, prEmojiPresentation}, // E3.0 [1] (🕺) man dancing
{0x1F595, 0x1F596, prEmojiPresentation}, // E1.0 [2] (🖕..🖖) middle finger..vulcan salute
{0x1F5A4, 0x1F5A4, prEmojiPresentation}, // E3.0 [1] (🖤) black heart
{0x1F5FB, 0x1F5FF, prEmojiPresentation}, // E0.6 [5] (🗻..🗿) mount fuji..moai
{0x1F600, 0x1F600, prEmojiPresentation}, // E1.0 [1] (😀) grinning face
{0x1F601, 0x1F606, prEmojiPresentation}, // E0.6 [6] (😁..😆) beaming face with smiling eyes..grinning squinting face
{0x1F607, 0x1F608, prEmojiPresentation}, // E1.0 [2] (😇..😈) smiling face with halo..smiling face with horns
{0x1F609, 0x1F60D, prEmojiPresentation}, // E0.6 [5] (😉..😍) winking face..smiling face with heart-eyes
{0x1F60E, 0x1F60E, prEmojiPresentation}, // E1.0 [1] (😎) smiling face with sunglasses
{0x1F60F, 0x1F60F, prEmojiPresentation}, // E0.6 [1] (😏) smirking face
{0x1F610, 0x1F610, prEmojiPresentation}, // E0.7 [1] (😐) neutral face
{0x1F611, 0x1F611, prEmojiPresentation}, // E1.0 [1] (😑) expressionless face
{0x1F612, 0x1F614, prEmojiPresentation}, // E0.6 [3] (😒..😔) unamused face..pensive face
{0x1F615, 0x1F615, prEmojiPresentation}, // E1.0 [1] (😕) confused face
{0x1F616, 0x1F616, prEmojiPresentation}, // E0.6 [1] (😖) confounded face
{0x1F617, 0x1F617, prEmojiPresentation}, // E1.0 [1] (😗) kissing face
{0x1F618, 0x1F618, prEmojiPresentation}, // E0.6 [1] (😘) face blowing a kiss
{0x1F619, 0x1F619, prEmojiPresentation}, // E1.0 [1] (😙) kissing face with smiling eyes
{0x1F61A, 0x1F61A, prEmojiPresentation}, // E0.6 [1] (😚) kissing face with closed eyes
{0x1F61B, 0x1F61B, prEmojiPresentation}, // E1.0 [1] (😛) face with tongue
{0x1F61C, 0x1F61E, prEmojiPresentation}, // E0.6 [3] (😜..😞) winking face with tongue..disappointed face
{0x1F61F, 0x1F61F, prEmojiPresentation}, // E1.0 [1] (😟) worried face
{0x1F620, 0x1F625, prEmojiPresentation}, // E0.6 [6] (😠..😥) angry face..sad but relieved face
{0x1F626, 0x1F627, prEmojiPresentation}, // E1.0 [2] (😦..😧) frowning face with open mouth..anguished face
{0x1F628, 0x1F62B, prEmojiPresentation}, // E0.6 [4] (😨..😫) fearful face..tired face
{0x1F62C, 0x1F62C, prEmojiPresentation}, // E1.0 [1] (😬) grimacing face
{0x1F62D, 0x1F62D, prEmojiPresentation}, // E0.6 [1] (😭) loudly crying face
{0x1F62E, 0x1F62F, prEmojiPresentation}, // E1.0 [2] (😮..😯) face with open mouth..hushed face
{0x1F630, 0x1F633, prEmojiPresentation}, // E0.6 [4] (😰..😳) anxious face with sweat..flushed face
{0x1F634, 0x1F634, prEmojiPresentation}, // E1.0 [1] (😴) sleeping face
{0x1F635, 0x1F635, prEmojiPresentation}, // E0.6 [1] (😵) face with crossed-out eyes
{0x1F636, 0x1F636, prEmojiPresentation}, // E1.0 [1] (😶) face without mouth
{0x1F637, 0x1F640, prEmojiPresentation}, // E0.6 [10] (😷..🙀) face with medical mask..weary cat
{0x1F641, 0x1F644, prEmojiPresentation}, // E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes
{0x1F645, 0x1F64F, prEmojiPresentation}, // E0.6 [11] (🙅..🙏) person gesturing NO..folded hands
{0x1F680, 0x1F680, prEmojiPresentation}, // E0.6 [1] (🚀) rocket
{0x1F681, 0x1F682, prEmojiPresentation}, // E1.0 [2] (🚁..🚂) helicopter..locomotive
{0x1F683, 0x1F685, prEmojiPresentation}, // E0.6 [3] (🚃..🚅) railway car..bullet train
{0x1F686, 0x1F686, prEmojiPresentation}, // E1.0 [1] (🚆) train
{0x1F687, 0x1F687, prEmojiPresentation}, // E0.6 [1] (🚇) metro
{0x1F688, 0x1F688, prEmojiPresentation}, // E1.0 [1] (🚈) light rail
{0x1F689, 0x1F689, prEmojiPresentation}, // E0.6 [1] (🚉) station
{0x1F68A, 0x1F68B, prEmojiPresentation}, // E1.0 [2] (🚊..🚋) tram..tram car
{0x1F68C, 0x1F68C, prEmojiPresentation}, // E0.6 [1] (🚌) bus
{0x1F68D, 0x1F68D, prEmojiPresentation}, // E0.7 [1] (🚍) oncoming bus
{0x1F68E, 0x1F68E, prEmojiPresentation}, // E1.0 [1] (🚎) trolleybus
{0x1F68F, 0x1F68F, prEmojiPresentation}, // E0.6 [1] (🚏) bus stop
{0x1F690, 0x1F690, prEmojiPresentation}, // E1.0 [1] (🚐) minibus
{0x1F691, 0x1F693, prEmojiPresentation}, // E0.6 [3] (🚑..🚓) ambulance..police car
{0x1F694, 0x1F694, prEmojiPresentation}, // E0.7 [1] (🚔) oncoming police car
{0x1F695, 0x1F695, prEmojiPresentation}, // E0.6 [1] (🚕) taxi
{0x1F696, 0x1F696, prEmojiPresentation}, // E1.0 [1] (🚖) oncoming taxi
{0x1F697, 0x1F697, prEmojiPresentation}, // E0.6 [1] (🚗) automobile
{0x1F698, 0x1F698, prEmojiPresentation}, // E0.7 [1] (🚘) oncoming automobile
{0x1F699, 0x1F69A, prEmojiPresentation}, // E0.6 [2] (🚙..🚚) sport utility vehicle..delivery truck
{0x1F69B, 0x1F6A1, prEmojiPresentation}, // E1.0 [7] (🚛..🚡) articulated lorry..aerial tramway
{0x1F6A2, 0x1F6A2, prEmojiPresentation}, // E0.6 [1] (🚢) ship
{0x1F6A3, 0x1F6A3, prEmojiPresentation}, // E1.0 [1] (🚣) person rowing boat
{0x1F6A4, 0x1F6A5, prEmojiPresentation}, // E0.6 [2] (🚤..🚥) speedboat..horizontal traffic light
{0x1F6A6, 0x1F6A6, prEmojiPresentation}, // E1.0 [1] (🚦) vertical traffic light
{0x1F6A7, 0x1F6AD, prEmojiPresentation}, // E0.6 [7] (🚧..🚭) construction..no smoking
{0x1F6AE, 0x1F6B1, prEmojiPresentation}, // E1.0 [4] (🚮..🚱) litter in bin sign..non-potable water
{0x1F6B2, 0x1F6B2, prEmojiPresentation}, // E0.6 [1] (🚲) bicycle
{0x1F6B3, 0x1F6B5, prEmojiPresentation}, // E1.0 [3] (🚳..🚵) no bicycles..person mountain biking
{0x1F6B6, 0x1F6B6, prEmojiPresentation}, // E0.6 [1] (🚶) person walking
{0x1F6B7, 0x1F6B8, prEmojiPresentation}, // E1.0 [2] (🚷..🚸) no pedestrians..children crossing
{0x1F6B9, 0x1F6BE, prEmojiPresentation}, // E0.6 [6] (🚹..🚾) mens room..water closet
{0x1F6BF, 0x1F6BF, prEmojiPresentation}, // E1.0 [1] (🚿) shower
{0x1F6C0, 0x1F6C0, prEmojiPresentation}, // E0.6 [1] (🛀) person taking bath
{0x1F6C1, 0x1F6C5, prEmojiPresentation}, // E1.0 [5] (🛁..🛅) bathtub..left luggage
{0x1F6CC, 0x1F6CC, prEmojiPresentation}, // E1.0 [1] (🛌) person in bed
{0x1F6D0, 0x1F6D0, prEmojiPresentation}, // E1.0 [1] (🛐) place of worship
{0x1F6D1, 0x1F6D2, prEmojiPresentation}, // E3.0 [2] (🛑..🛒) stop sign..shopping cart
{0x1F6D5, 0x1F6D5, prEmojiPresentation}, // E12.0 [1] (🛕) hindu temple
{0x1F6D6, 0x1F6D7, prEmojiPresentation}, // E13.0 [2] (🛖..🛗) hut..elevator
{0x1F6DC, 0x1F6DC, prEmojiPresentation}, // E15.0 [1] (🛜) wireless
{0x1F6DD, 0x1F6DF, prEmojiPresentation}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy
{0x1F6EB, 0x1F6EC, prEmojiPresentation}, // E1.0 [2] (🛫..🛬) airplane departure..airplane arrival
{0x1F6F4, 0x1F6F6, prEmojiPresentation}, // E3.0 [3] (🛴..🛶) kick scooter..canoe
{0x1F6F7, 0x1F6F8, prEmojiPresentation}, // E5.0 [2] (🛷..🛸) sled..flying saucer
{0x1F6F9, 0x1F6F9, prEmojiPresentation}, // E11.0 [1] (🛹) skateboard
{0x1F6FA, 0x1F6FA, prEmojiPresentation}, // E12.0 [1] (🛺) auto rickshaw
{0x1F6FB, 0x1F6FC, prEmojiPresentation}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate
{0x1F7E0, 0x1F7EB, prEmojiPresentation}, // E12.0 [12] (🟠..🟫) orange circle..brown square
{0x1F7F0, 0x1F7F0, prEmojiPresentation}, // E14.0 [1] (🟰) heavy equals sign
{0x1F90C, 0x1F90C, prEmojiPresentation}, // E13.0 [1] (🤌) pinched fingers
{0x1F90D, 0x1F90F, prEmojiPresentation}, // E12.0 [3] (🤍..🤏) white heart..pinching hand
{0x1F910, 0x1F918, prEmojiPresentation}, // E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
{0x1F919, 0x1F91E, prEmojiPresentation}, // E3.0 [6] (🤙..🤞) call me hand..crossed fingers
{0x1F91F, 0x1F91F, prEmojiPresentation}, // E5.0 [1] (🤟) love-you gesture
{0x1F920, 0x1F927, prEmojiPresentation}, // E3.0 [8] (🤠..🤧) cowboy hat face..sneezing face
{0x1F928, 0x1F92F, prEmojiPresentation}, // E5.0 [8] (🤨..🤯) face with raised eyebrow..exploding head
{0x1F930, 0x1F930, prEmojiPresentation}, // E3.0 [1] (🤰) pregnant woman
{0x1F931, 0x1F932, prEmojiPresentation}, // E5.0 [2] (🤱..🤲) breast-feeding..palms up together
{0x1F933, 0x1F93A, prEmojiPresentation}, // E3.0 [8] (🤳..🤺) selfie..person fencing
{0x1F93C, 0x1F93E, prEmojiPresentation}, // E3.0 [3] (🤼..🤾) people wrestling..person playing handball
{0x1F93F, 0x1F93F, prEmojiPresentation}, // E12.0 [1] (🤿) diving mask
{0x1F940, 0x1F945, prEmojiPresentation}, // E3.0 [6] (🥀..🥅) wilted flower..goal net
{0x1F947, 0x1F94B, prEmojiPresentation}, // E3.0 [5] (🥇..🥋) 1st place medal..martial arts uniform
{0x1F94C, 0x1F94C, prEmojiPresentation}, // E5.0 [1] (🥌) curling stone
{0x1F94D, 0x1F94F, prEmojiPresentation}, // E11.0 [3] (🥍..🥏) lacrosse..flying disc
{0x1F950, 0x1F95E, prEmojiPresentation}, // E3.0 [15] (🥐..🥞) croissant..pancakes
{0x1F95F, 0x1F96B, prEmojiPresentation}, // E5.0 [13] (🥟..🥫) dumpling..canned food
{0x1F96C, 0x1F970, prEmojiPresentation}, // E11.0 [5] (🥬..🥰) leafy green..smiling face with hearts
{0x1F971, 0x1F971, prEmojiPresentation}, // E12.0 [1] (🥱) yawning face
{0x1F972, 0x1F972, prEmojiPresentation}, // E13.0 [1] (🥲) smiling face with tear
{0x1F973, 0x1F976, prEmojiPresentation}, // E11.0 [4] (🥳..🥶) partying face..cold face
{0x1F977, 0x1F978, prEmojiPresentation}, // E13.0 [2] (🥷..🥸) ninja..disguised face
{0x1F979, 0x1F979, prEmojiPresentation}, // E14.0 [1] (🥹) face holding back tears
{0x1F97A, 0x1F97A, prEmojiPresentation}, // E11.0 [1] (🥺) pleading face
{0x1F97B, 0x1F97B, prEmojiPresentation}, // E12.0 [1] (🥻) sari
{0x1F97C, 0x1F97F, prEmojiPresentation}, // E11.0 [4] (🥼..🥿) lab coat..flat shoe
{0x1F980, 0x1F984, prEmojiPresentation}, // E1.0 [5] (🦀..🦄) crab..unicorn
{0x1F985, 0x1F991, prEmojiPresentation}, // E3.0 [13] (🦅..🦑) eagle..squid
{0x1F992, 0x1F997, prEmojiPresentation}, // E5.0 [6] (🦒..🦗) giraffe..cricket
{0x1F998, 0x1F9A2, prEmojiPresentation}, // E11.0 [11] (🦘..🦢) kangaroo..swan
{0x1F9A3, 0x1F9A4, prEmojiPresentation}, // E13.0 [2] (🦣..🦤) mammoth..dodo
{0x1F9A5, 0x1F9AA, prEmojiPresentation}, // E12.0 [6] (🦥..🦪) sloth..oyster
{0x1F9AB, 0x1F9AD, prEmojiPresentation}, // E13.0 [3] (🦫..🦭) beaver..seal
{0x1F9AE, 0x1F9AF, prEmojiPresentation}, // E12.0 [2] (🦮..🦯) guide dog..white cane
{0x1F9B0, 0x1F9B9, prEmojiPresentation}, // E11.0 [10] (🦰..🦹) red hair..supervillain
{0x1F9BA, 0x1F9BF, prEmojiPresentation}, // E12.0 [6] (🦺..🦿) safety vest..mechanical leg
{0x1F9C0, 0x1F9C0, prEmojiPresentation}, // E1.0 [1] (🧀) cheese wedge
{0x1F9C1, 0x1F9C2, prEmojiPresentation}, // E11.0 [2] (🧁..🧂) cupcake..salt
{0x1F9C3, 0x1F9CA, prEmojiPresentation}, // E12.0 [8] (🧃..🧊) beverage box..ice
{0x1F9CB, 0x1F9CB, prEmojiPresentation}, // E13.0 [1] (🧋) bubble tea
{0x1F9CC, 0x1F9CC, prEmojiPresentation}, // E14.0 [1] (🧌) troll
{0x1F9CD, 0x1F9CF, prEmojiPresentation}, // E12.0 [3] (🧍..🧏) person standing..deaf person
{0x1F9D0, 0x1F9E6, prEmojiPresentation}, // E5.0 [23] (🧐..🧦) face with monocle..socks
{0x1F9E7, 0x1F9FF, prEmojiPresentation}, // E11.0 [25] (🧧..🧿) red envelope..nazar amulet
{0x1FA70, 0x1FA73, prEmojiPresentation}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts
{0x1FA74, 0x1FA74, prEmojiPresentation}, // E13.0 [1] (🩴) thong sandal
{0x1FA75, 0x1FA77, prEmojiPresentation}, // E15.0 [3] (🩵..🩷) light blue heart..pink heart
{0x1FA78, 0x1FA7A, prEmojiPresentation}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope
{0x1FA7B, 0x1FA7C, prEmojiPresentation}, // E14.0 [2] (🩻..🩼) x-ray..crutch
{0x1FA80, 0x1FA82, prEmojiPresentation}, // E12.0 [3] (🪀..🪂) yo-yo..parachute
{0x1FA83, 0x1FA86, prEmojiPresentation}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls
{0x1FA87, 0x1FA88, prEmojiPresentation}, // E15.0 [2] (🪇..🪈) maracas..flute
{0x1FA90, 0x1FA95, prEmojiPresentation}, // E12.0 [6] (🪐..🪕) ringed planet..banjo
{0x1FA96, 0x1FAA8, prEmojiPresentation}, // E13.0 [19] (🪖..🪨) military helmet..rock
{0x1FAA9, 0x1FAAC, prEmojiPresentation}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa
{0x1FAAD, 0x1FAAF, prEmojiPresentation}, // E15.0 [3] (🪭..🪯) folding hand fan..khanda
{0x1FAB0, 0x1FAB6, prEmojiPresentation}, // E13.0 [7] (🪰..🪶) fly..feather
{0x1FAB7, 0x1FABA, prEmojiPresentation}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs
{0x1FABB, 0x1FABD, prEmojiPresentation}, // E15.0 [3] (🪻..🪽) hyacinth..wing
{0x1FABF, 0x1FABF, prEmojiPresentation}, // E15.0 [1] (🪿) goose
{0x1FAC0, 0x1FAC2, prEmojiPresentation}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging
{0x1FAC3, 0x1FAC5, prEmojiPresentation}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown
{0x1FACE, 0x1FACF, prEmojiPresentation}, // E15.0 [2] (🫎..🫏) moose..donkey
{0x1FAD0, 0x1FAD6, prEmojiPresentation}, // E13.0 [7] (🫐..🫖) blueberries..teapot
{0x1FAD7, 0x1FAD9, prEmojiPresentation}, // E14.0 [3] (🫗..🫙) pouring liquid..jar
{0x1FADA, 0x1FADB, prEmojiPresentation}, // E15.0 [2] (🫚..🫛) ginger root..pea pod
{0x1FAE0, 0x1FAE7, prEmojiPresentation}, // E14.0 [8] (🫠..🫧) melting face..bubbles
{0x1FAE8, 0x1FAE8, prEmojiPresentation}, // E15.0 [1] (🫨) shaking face
{0x1FAF0, 0x1FAF6, prEmojiPresentation}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
{0x1FAF7, 0x1FAF8, prEmojiPresentation}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand
}

215
vendor/github.com/rivo/uniseg/gen_breaktest.go generated vendored Normal file
View File

@ -0,0 +1,215 @@
//go:build generate
// This program generates a Go containing a slice of test cases based on the
// Unicode Character Database auxiliary data files. The command line arguments
// are as follows:
//
// 1. The name of the Unicode data file (just the filename, without extension).
// 2. The name of the locally generated Go file.
// 3. The name of the slice containing the test cases.
// 4. The name of the generator, for logging purposes.
//
//go:generate go run gen_breaktest.go GraphemeBreakTest graphemebreak_test.go graphemeBreakTestCases graphemes
//go:generate go run gen_breaktest.go WordBreakTest wordbreak_test.go wordBreakTestCases words
//go:generate go run gen_breaktest.go SentenceBreakTest sentencebreak_test.go sentenceBreakTestCases sentences
//go:generate go run gen_breaktest.go LineBreakTest linebreak_test.go lineBreakTestCases lines
package main
import (
"bufio"
"bytes"
"errors"
"fmt"
"go/format"
"io/ioutil"
"log"
"net/http"
"os"
"time"
)
// We want to test against a specific version rather than the latest. When the
// package is upgraded to a new version, change these to generate new tests.
const (
testCaseURL = `https://www.unicode.org/Public/15.0.0/ucd/auxiliary/%s.txt`
)
func main() {
if len(os.Args) < 5 {
fmt.Println("Not enough arguments, see code for details")
os.Exit(1)
}
log.SetPrefix("gen_breaktest (" + os.Args[4] + "): ")
log.SetFlags(0)
// Read text of testcases and parse into Go source code.
src, err := parse(fmt.Sprintf(testCaseURL, os.Args[1]))
if err != nil {
log.Fatal(err)
}
// Format the Go code.
formatted, err := format.Source(src)
if err != nil {
log.Fatalln("gofmt:", err)
}
// Write it out.
log.Print("Writing to ", os.Args[2])
if err := ioutil.WriteFile(os.Args[2], formatted, 0644); err != nil {
log.Fatal(err)
}
}
// parse reads a break text file, either from a local file or from a URL. It
// parses the file data into Go source code representing the test cases.
func parse(url string) ([]byte, error) {
log.Printf("Parsing %s", url)
res, err := http.Get(url)
if err != nil {
return nil, err
}
body := res.Body
defer body.Close()
buf := new(bytes.Buffer)
buf.Grow(120 << 10)
buf.WriteString(`// Code generated via go generate from gen_breaktest.go. DO NOT EDIT.
package uniseg
// ` + os.Args[3] + ` are Grapheme testcases taken from
// ` + url + `
// on ` + time.Now().Format("January 2, 2006") + `. See
// https://www.unicode.org/license.html for the Unicode license agreement.
var ` + os.Args[3] + ` = []testCase {
`)
sc := bufio.NewScanner(body)
num := 1
var line []byte
original := make([]byte, 0, 64)
expected := make([]byte, 0, 64)
for sc.Scan() {
num++
line = sc.Bytes()
if len(line) == 0 || line[0] == '#' {
continue
}
var comment []byte
if i := bytes.IndexByte(line, '#'); i >= 0 {
comment = bytes.TrimSpace(line[i+1:])
line = bytes.TrimSpace(line[:i])
}
original, expected, err := parseRuneSequence(line, original[:0], expected[:0])
if err != nil {
return nil, fmt.Errorf(`line %d: %v: %q`, num, err, line)
}
fmt.Fprintf(buf, "\t{original: \"%s\", expected: %s}, // %s\n", original, expected, comment)
}
if err := sc.Err(); err != nil {
return nil, err
}
// Check for final "# EOF", useful check if we're streaming via HTTP
if !bytes.Equal(line, []byte("# EOF")) {
return nil, fmt.Errorf(`line %d: exected "# EOF" as final line, got %q`, num, line)
}
buf.WriteString("}\n")
return buf.Bytes(), nil
}
// Used by parseRuneSequence to match input via bytes.HasPrefix.
var (
prefixBreak = []byte("÷ ")
prefixDontBreak = []byte("× ")
breakOk = []byte("÷")
breakNo = []byte("×")
)
// parseRuneSequence parses a rune + breaking opportunity sequence from b
// and appends the Go code for testcase.original to orig
// and appends the Go code for testcase.expected to exp.
// It retuns the new orig and exp slices.
//
// E.g. for the input b="÷ 0020 × 0308 ÷ 1F1E6 ÷"
// it will append
//
// "\u0020\u0308\U0001F1E6"
//
// and "[][]rune{{0x0020,0x0308},{0x1F1E6},}"
// to orig and exp respectively.
//
// The formatting of exp is expected to be cleaned up by gofmt or format.Source.
// Note we explicitly require the sequence to start with ÷ and we implicitly
// require it to end with ÷.
func parseRuneSequence(b, orig, exp []byte) ([]byte, []byte, error) {
// Check for and remove first ÷ or ×.
if !bytes.HasPrefix(b, prefixBreak) && !bytes.HasPrefix(b, prefixDontBreak) {
return nil, nil, errors.New("expected ÷ or × as first character")
}
if bytes.HasPrefix(b, prefixBreak) {
b = b[len(prefixBreak):]
} else {
b = b[len(prefixDontBreak):]
}
boundary := true
exp = append(exp, "[][]rune{"...)
for len(b) > 0 {
if boundary {
exp = append(exp, '{')
}
exp = append(exp, "0x"...)
// Find end of hex digits.
var i int
for i = 0; i < len(b) && b[i] != ' '; i++ {
if d := b[i]; ('0' <= d || d <= '9') ||
('A' <= d || d <= 'F') ||
('a' <= d || d <= 'f') {
continue
}
return nil, nil, errors.New("bad hex digit")
}
switch i {
case 4:
orig = append(orig, "\\u"...)
case 5:
orig = append(orig, "\\U000"...)
default:
return nil, nil, errors.New("unsupport code point hex length")
}
orig = append(orig, b[:i]...)
exp = append(exp, b[:i]...)
b = b[i:]
// Check for space between hex and ÷ or ×.
if len(b) < 1 || b[0] != ' ' {
return nil, nil, errors.New("bad input")
}
b = b[1:]
// Check for next boundary.
switch {
case bytes.HasPrefix(b, breakOk):
boundary = true
b = b[len(breakOk):]
case bytes.HasPrefix(b, breakNo):
boundary = false
b = b[len(breakNo):]
default:
return nil, nil, errors.New("missing ÷ or ×")
}
if boundary {
exp = append(exp, '}')
}
exp = append(exp, ',')
if len(b) > 0 && b[0] == ' ' {
b = b[1:]
}
}
exp = append(exp, '}')
return orig, exp, nil
}

261
vendor/github.com/rivo/uniseg/gen_properties.go generated vendored Normal file
View File

@ -0,0 +1,261 @@
//go:build generate
// This program generates a property file in Go file from Unicode Character
// Database auxiliary data files. The command line arguments are as follows:
//
// 1. The name of the Unicode data file (just the filename, without extension).
// Can be "-" (to skip) if the emoji flag is included.
// 2. The name of the locally generated Go file.
// 3. The name of the slice mapping code points to properties.
// 4. The name of the generator, for logging purposes.
// 5. (Optional) Flags, comma-separated. The following flags are available:
// - "emojis=<property>": include the specified emoji properties (e.g.
// "Extended_Pictographic").
// - "gencat": include general category properties.
//
//go:generate go run gen_properties.go auxiliary/GraphemeBreakProperty graphemeproperties.go graphemeCodePoints graphemes emojis=Extended_Pictographic
//go:generate go run gen_properties.go auxiliary/WordBreakProperty wordproperties.go workBreakCodePoints words emojis=Extended_Pictographic
//go:generate go run gen_properties.go auxiliary/SentenceBreakProperty sentenceproperties.go sentenceBreakCodePoints sentences
//go:generate go run gen_properties.go LineBreak lineproperties.go lineBreakCodePoints lines gencat
//go:generate go run gen_properties.go EastAsianWidth eastasianwidth.go eastAsianWidth eastasianwidth
//go:generate go run gen_properties.go - emojipresentation.go emojiPresentation emojipresentation emojis=Emoji_Presentation
package main
import (
"bufio"
"bytes"
"errors"
"fmt"
"go/format"
"io/ioutil"
"log"
"net/http"
"os"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
// We want to test against a specific version rather than the latest. When the
// package is upgraded to a new version, change these to generate new tests.
const (
propertyURL = `https://www.unicode.org/Public/15.0.0/ucd/%s.txt`
emojiURL = `https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt`
)
// The regular expression for a line containing a code point range property.
var propertyPattern = regexp.MustCompile(`^([0-9A-F]{4,6})(\.\.([0-9A-F]{4,6}))?\s*;\s*([A-Za-z0-9_]+)\s*#\s(.+)$`)
func main() {
if len(os.Args) < 5 {
fmt.Println("Not enough arguments, see code for details")
os.Exit(1)
}
log.SetPrefix("gen_properties (" + os.Args[4] + "): ")
log.SetFlags(0)
// Parse flags.
flags := make(map[string]string)
if len(os.Args) >= 6 {
for _, flag := range strings.Split(os.Args[5], ",") {
flagFields := strings.Split(flag, "=")
if len(flagFields) == 1 {
flags[flagFields[0]] = "yes"
} else {
flags[flagFields[0]] = flagFields[1]
}
}
}
// Parse the text file and generate Go source code from it.
_, includeGeneralCategory := flags["gencat"]
var mainURL string
if os.Args[1] != "-" {
mainURL = fmt.Sprintf(propertyURL, os.Args[1])
}
src, err := parse(mainURL, flags["emojis"], includeGeneralCategory)
if err != nil {
log.Fatal(err)
}
// Format the Go code.
formatted, err := format.Source([]byte(src))
if err != nil {
log.Fatal("gofmt:", err)
}
// Save it to the (local) target file.
log.Print("Writing to ", os.Args[2])
if err := ioutil.WriteFile(os.Args[2], formatted, 0644); err != nil {
log.Fatal(err)
}
}
// parse parses the Unicode Properties text files located at the given URLs and
// returns their equivalent Go source code to be used in the uniseg package. If
// "emojiProperty" is not an empty string, emoji code points for that emoji
// property (e.g. "Extended_Pictographic") will be included. In those cases, you
// may pass an empty "propertyURL" to skip parsing the main properties file. If
// "includeGeneralCategory" is true, the Unicode General Category property will
// be extracted from the comments and included in the output.
func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (string, error) {
if propertyURL == "" && emojiProperty == "" {
return "", errors.New("no properties to parse")
}
// Temporary buffer to hold properties.
var properties [][4]string
// Open the first URL.
if propertyURL != "" {
log.Printf("Parsing %s", propertyURL)
res, err := http.Get(propertyURL)
if err != nil {
return "", err
}
in1 := res.Body
defer in1.Close()
// Parse it.
scanner := bufio.NewScanner(in1)
num := 0
for scanner.Scan() {
num++
line := strings.TrimSpace(scanner.Text())
// Skip comments and empty lines.
if strings.HasPrefix(line, "#") || line == "" {
continue
}
// Everything else must be a code point range, a property and a comment.
from, to, property, comment, err := parseProperty(line)
if err != nil {
return "", fmt.Errorf("%s line %d: %v", os.Args[4], num, err)
}
properties = append(properties, [4]string{from, to, property, comment})
}
if err := scanner.Err(); err != nil {
return "", err
}
}
// Open the second URL.
if emojiProperty != "" {
log.Printf("Parsing %s", emojiURL)
res, err := http.Get(emojiURL)
if err != nil {
return "", err
}
in2 := res.Body
defer in2.Close()
// Parse it.
scanner := bufio.NewScanner(in2)
num := 0
for scanner.Scan() {
num++
line := scanner.Text()
// Skip comments, empty lines, and everything not containing
// "Extended_Pictographic".
if strings.HasPrefix(line, "#") || line == "" || !strings.Contains(line, emojiProperty) {
continue
}
// Everything else must be a code point range, a property and a comment.
from, to, property, comment, err := parseProperty(line)
if err != nil {
return "", fmt.Errorf("emojis line %d: %v", num, err)
}
properties = append(properties, [4]string{from, to, property, comment})
}
if err := scanner.Err(); err != nil {
return "", err
}
}
// Avoid overflow during binary search.
if len(properties) >= 1<<31 {
return "", errors.New("too many properties")
}
// Sort properties.
sort.Slice(properties, func(i, j int) bool {
left, _ := strconv.ParseUint(properties[i][0], 16, 64)
right, _ := strconv.ParseUint(properties[j][0], 16, 64)
return left < right
})
// Header.
var (
buf bytes.Buffer
emojiComment string
)
columns := 3
if includeGeneralCategory {
columns = 4
}
if emojiURL != "" {
emojiComment = `
// and
// ` + emojiURL + `
// ("Extended_Pictographic" only)`
}
buf.WriteString(`// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// ` + os.Args[3] + ` are taken from
// ` + propertyURL + emojiComment + `
// on ` + time.Now().Format("January 2, 2006") + `. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var ` + os.Args[3] + ` = [][` + strconv.Itoa(columns) + `]int{
`)
// Properties.
for _, prop := range properties {
if includeGeneralCategory {
generalCategory := "gc" + prop[3][:2]
if generalCategory == "gcL&" {
generalCategory = "gcLC"
}
prop[3] = prop[3][3:]
fmt.Fprintf(&buf, "{0x%s,0x%s,%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), generalCategory, prop[3])
} else {
fmt.Fprintf(&buf, "{0x%s,0x%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), prop[3])
}
}
// Tail.
buf.WriteString("}")
return buf.String(), nil
}
// parseProperty parses a line of the Unicode properties text file containing a
// property for a code point range and returns it along with its comment.
func parseProperty(line string) (from, to, property, comment string, err error) {
fields := propertyPattern.FindStringSubmatch(line)
if fields == nil {
err = errors.New("no property found")
return
}
from = fields[1]
to = fields[3]
if to == "" {
to = from
}
property = fields[4]
comment = fields[5]
return
}
// translateProperty translates a property name as used in the Unicode data file
// to a variable used in the Go code.
func translateProperty(prefix, property string) string {
return prefix + strings.ReplaceAll(property, "_", "")
}

View File

@ -2,267 +2,330 @@ package uniseg
import "unicode/utf8"
// The states of the grapheme cluster parser.
const (
grAny = iota
grCR
grControlLF
grL
grLVV
grLVTT
grPrepend
grExtendedPictographic
grExtendedPictographicZWJ
grRIOdd
grRIEven
)
// The grapheme cluster parser's breaking instructions.
const (
grNoBoundary = iota
grBoundary
)
// The grapheme cluster parser's state transitions. Maps (state, property) to
// (new state, breaking instruction, rule number). The breaking instruction
// always refers to the boundary between the last and next code point.
// Graphemes implements an iterator over Unicode grapheme clusters, or
// user-perceived characters. While iterating, it also provides information
// about word boundaries, sentence boundaries, line breaks, and monospace
// character widths.
//
// This map is queried as follows:
// After constructing the class via [NewGraphemes] for a given string "str",
// [Graphemes.Next] is called for every grapheme cluster in a loop until it
// returns false. Inside the loop, information about the grapheme cluster as
// well as boundary information and character width is available via the various
// methods (see examples below).
//
// 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property.
// 3. Find any state + specific property.
// 4. If only (2) or (3) (but not both) was found, stop.
// 5. If both (2) and (3) were found, use state and breaking instruction from
// the transition with the lower rule number, prefer (3) if rule numbers
// are equal. Stop.
// 6. Assume grAny and grBoundary.
var grTransitions = map[[2]int][3]int{
// GB5
{grAny, prCR}: {grCR, grBoundary, 50},
{grAny, prLF}: {grControlLF, grBoundary, 50},
{grAny, prControl}: {grControlLF, grBoundary, 50},
// GB4
{grCR, prAny}: {grAny, grBoundary, 40},
{grControlLF, prAny}: {grAny, grBoundary, 40},
// GB3.
{grCR, prLF}: {grAny, grNoBoundary, 30},
// GB6.
{grAny, prL}: {grL, grBoundary, 9990},
{grL, prL}: {grL, grNoBoundary, 60},
{grL, prV}: {grLVV, grNoBoundary, 60},
{grL, prLV}: {grLVV, grNoBoundary, 60},
{grL, prLVT}: {grLVTT, grNoBoundary, 60},
// GB7.
{grAny, prLV}: {grLVV, grBoundary, 9990},
{grAny, prV}: {grLVV, grBoundary, 9990},
{grLVV, prV}: {grLVV, grNoBoundary, 70},
{grLVV, prT}: {grLVTT, grNoBoundary, 70},
// GB8.
{grAny, prLVT}: {grLVTT, grBoundary, 9990},
{grAny, prT}: {grLVTT, grBoundary, 9990},
{grLVTT, prT}: {grLVTT, grNoBoundary, 80},
// GB9.
{grAny, prExtend}: {grAny, grNoBoundary, 90},
{grAny, prZWJ}: {grAny, grNoBoundary, 90},
// GB9a.
{grAny, prSpacingMark}: {grAny, grNoBoundary, 91},
// GB9b.
{grAny, prPreprend}: {grPrepend, grBoundary, 9990},
{grPrepend, prAny}: {grAny, grNoBoundary, 92},
// GB11.
{grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990},
{grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110},
{grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110},
{grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110},
// GB12 / GB13.
{grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990},
{grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120},
{grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120},
}
// Graphemes implements an iterator over Unicode extended grapheme clusters,
// specified in the Unicode Standard Annex #29. Grapheme clusters correspond to
// "user-perceived characters". These characters often consist of multiple
// code points (e.g. the "woman kissing woman" emoji consists of 8 code points:
// woman + ZWJ + heavy black heart (2 code points) + ZWJ + kiss mark + ZWJ +
// woman) and the rules described in Annex #29 must be applied to group those
// code points into clusters perceived by the user as one character.
// This class basically wraps the [StepString] parser and provides a convenient
// interface to it. If you are only interested in some parts of this package's
// functionality, using the specialized functions starting with "First" is
// almost always faster.
type Graphemes struct {
// The code points over which this class iterates.
codePoints []rune
// The original string.
original string
// The (byte-based) indices of the code points into the original string plus
// len(original string). Thus, len(indices) = len(codePoints) + 1.
indices []int
// The remaining string to be parsed.
remaining string
// The current grapheme cluster to be returned. These are indices into
// codePoints/indices. If start == end, we either haven't started iterating
// yet (0) or the iteration has already completed (1).
start, end int
// The current grapheme cluster.
cluster string
// The index of the next code point to be parsed.
pos int
// The byte offset of the current grapheme cluster relative to the original
// string.
offset int
// The current state of the code point parser.
// The current boundary information of the [Step] parser.
boundaries int
// The current state of the [Step] parser.
state int
}
// NewGraphemes returns a new grapheme cluster iterator.
func NewGraphemes(s string) *Graphemes {
l := utf8.RuneCountInString(s)
codePoints := make([]rune, l)
indices := make([]int, l+1)
i := 0
for pos, r := range s {
codePoints[i] = r
indices[i] = pos
i++
func NewGraphemes(str string) *Graphemes {
return &Graphemes{
original: str,
remaining: str,
state: -1,
}
indices[l] = len(s)
g := &Graphemes{
codePoints: codePoints,
indices: indices,
}
g.Next() // Parse ahead.
return g
}
// Next advances the iterator by one grapheme cluster and returns false if no
// clusters are left. This function must be called before the first cluster is
// accessed.
func (g *Graphemes) Next() bool {
g.start = g.end
// The state transition gives us a boundary instruction BEFORE the next code
// point so we always need to stay ahead by one code point.
// Parse the next code point.
for g.pos <= len(g.codePoints) {
// GB2.
if g.pos == len(g.codePoints) {
g.end = g.pos
g.pos++
break
}
// Determine the property of the next character.
nextProperty := property(g.codePoints[g.pos])
g.pos++
// Find the applicable transition.
var boundary bool
transition, ok := grTransitions[[2]int{g.state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
g.state = transition[0]
boundary = transition[1] == grBoundary
} else {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := grTransitions[[2]int{g.state, prAny}]
transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}]
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
g.state = transAnyState[0]
boundary = transAnyState[1] == grBoundary
if transAnyProp[2] < transAnyState[2] {
g.state = transAnyProp[0]
boundary = transAnyProp[1] == grBoundary
}
} else if okAnyProp {
// We only have a specific state.
g.state = transAnyProp[0]
boundary = transAnyProp[1] == grBoundary
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
// We only have a specific property.
g.state = transAnyState[0]
boundary = transAnyState[1] == grBoundary
} else {
// No known transition. GB999: Any x Any.
g.state = grAny
boundary = true
}
}
// If we found a cluster boundary, let's stop here. The current cluster will
// be the one that just ended.
if g.pos-1 == 0 /* GB1 */ || boundary {
g.end = g.pos - 1
break
}
if len(g.remaining) == 0 {
// We're already past the end.
g.state = -2
g.cluster = ""
return false
}
return g.start != g.end
g.offset += len(g.cluster)
g.cluster, g.remaining, g.boundaries, g.state = StepString(g.remaining, g.state)
return true
}
// Runes returns a slice of runes (code points) which corresponds to the current
// grapheme cluster. If the iterator is already past the end or Next() has not
// yet been called, nil is returned.
// grapheme cluster. If the iterator is already past the end or [Graphemes.Next]
// has not yet been called, nil is returned.
func (g *Graphemes) Runes() []rune {
if g.start == g.end {
if g.state < 0 {
return nil
}
return g.codePoints[g.start:g.end]
return []rune(g.cluster)
}
// Str returns a substring of the original string which corresponds to the
// current grapheme cluster. If the iterator is already past the end or Next()
// has not yet been called, an empty string is returned.
// current grapheme cluster. If the iterator is already past the end or
// [Graphemes.Next] has not yet been called, an empty string is returned.
func (g *Graphemes) Str() string {
if g.start == g.end {
return ""
}
return string(g.codePoints[g.start:g.end])
return g.cluster
}
// Bytes returns a byte slice which corresponds to the current grapheme cluster.
// If the iterator is already past the end or Next() has not yet been called,
// nil is returned.
// If the iterator is already past the end or [Graphemes.Next] has not yet been
// called, nil is returned.
func (g *Graphemes) Bytes() []byte {
if g.start == g.end {
if g.state < 0 {
return nil
}
return []byte(string(g.codePoints[g.start:g.end]))
return []byte(g.cluster)
}
// Positions returns the interval of the current grapheme cluster as byte
// positions into the original string. The first returned value "from" indexes
// the first byte and the second returned value "to" indexes the first byte that
// is not included anymore, i.e. str[from:to] is the current grapheme cluster of
// the original string "str". If Next() has not yet been called, both values are
// 0. If the iterator is already past the end, both values are 1.
// the original string "str". If [Graphemes.Next] has not yet been called, both
// values are 0. If the iterator is already past the end, both values are 1.
func (g *Graphemes) Positions() (int, int) {
return g.indices[g.start], g.indices[g.end]
if g.state == -1 {
return 0, 0
} else if g.state == -2 {
return 1, 1
}
return g.offset, g.offset + len(g.cluster)
}
// IsWordBoundary returns true if a word ends after the current grapheme
// cluster.
func (g *Graphemes) IsWordBoundary() bool {
if g.state < 0 {
return true
}
return g.boundaries&MaskWord != 0
}
// IsSentenceBoundary returns true if a sentence ends after the current
// grapheme cluster.
func (g *Graphemes) IsSentenceBoundary() bool {
if g.state < 0 {
return true
}
return g.boundaries&MaskSentence != 0
}
// LineBreak returns whether the line can be broken after the current grapheme
// cluster. A value of [LineDontBreak] means the line may not be broken, a value
// of [LineMustBreak] means the line must be broken, and a value of
// [LineCanBreak] means the line may or may not be broken.
func (g *Graphemes) LineBreak() int {
if g.state == -1 {
return LineDontBreak
}
if g.state == -2 {
return LineMustBreak
}
return g.boundaries & MaskLine
}
// Width returns the monospace width of the current grapheme cluster.
func (g *Graphemes) Width() int {
if g.state < 0 {
return 0
}
return g.boundaries >> ShiftWidth
}
// Reset puts the iterator into its initial state such that the next call to
// Next() sets it to the first grapheme cluster again.
// [Graphemes.Next] sets it to the first grapheme cluster again.
func (g *Graphemes) Reset() {
g.start, g.end, g.pos, g.state = 0, 0, 0, grAny
g.Next() // Parse ahead again.
g.state = -1
g.offset = 0
g.cluster = ""
g.remaining = g.original
}
// GraphemeClusterCount returns the number of user-perceived characters
// (grapheme clusters) for the given string. To calculate this number, it
// iterates through the string using the Graphemes iterator.
// (grapheme clusters) for the given string.
func GraphemeClusterCount(s string) (n int) {
g := NewGraphemes(s)
for g.Next() {
state := -1
for len(s) > 0 {
_, s, _, state = FirstGraphemeClusterInString(s, state)
n++
}
return
}
// ReverseString reverses the given string while observing grapheme cluster
// boundaries.
func ReverseString(s string) string {
str := []byte(s)
reversed := make([]byte, len(str))
state := -1
index := len(str)
for len(str) > 0 {
var cluster []byte
cluster, str, _, state = FirstGraphemeCluster(str, state)
index -= len(cluster)
copy(reversed[index:], cluster)
if index <= len(str)/2 {
break
}
}
return string(reversed)
}
// The number of bits the grapheme property must be shifted to make place for
// grapheme states.
const shiftGraphemePropState = 4
// FirstGraphemeCluster returns the first grapheme cluster found in the given
// byte slice according to the rules of [Unicode Standard Annex #29, Grapheme
// Cluster Boundaries]. This function can be called continuously to extract all
// grapheme clusters from a byte slice, as illustrated in the example below.
//
// If you don't know the current state, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified grapheme cluster. If the length of the
// "rest" slice is 0, the entire byte slice "b" has been processed. The
// "cluster" byte slice is the sub-slice of the input slice containing the
// identified grapheme cluster.
//
// The returned width is the width of the grapheme cluster for most monospace
// fonts where a value of 1 represents one character cell.
//
// Given an empty byte slice "b", the function returns nil values.
//
// While slightly less convenient than using the Graphemes class, this function
// has much better performance and makes no allocations. It lends itself well to
// large byte slices.
//
// [Unicode Standard Annex #29, Grapheme Cluster Boundaries]: http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = propertyGraphemes(r)
} else {
prop = state >> shiftGraphemePropState
}
return b, nil, runeWidth(r, prop), grAny | (prop << shiftGraphemePropState)
}
// If we don't know the state, determine it now.
var firstProp int
if state < 0 {
state, firstProp, _ = transitionGraphemeState(state, r)
} else {
firstProp = state >> shiftGraphemePropState
}
width += runeWidth(r, firstProp)
// Transition until we find a boundary.
for {
var (
prop int
boundary bool
)
r, l := utf8.DecodeRune(b[length:])
state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r)
if boundary {
return b[:length], b[length:], width, state | (prop << shiftGraphemePropState)
}
if firstProp == prExtendedPictographic {
if r == vs15 {
width = 1
} else if r == vs16 {
width = 2
}
} else if firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
}
length += l
if len(b) <= length {
return b, nil, width, grAny | (prop << shiftGraphemePropState)
}
}
}
// FirstGraphemeClusterInString is like [FirstGraphemeCluster] but its input and
// outputs are strings.
func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, width, newState int) {
// An empty string returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = propertyGraphemes(r)
} else {
prop = state >> shiftGraphemePropState
}
return str, "", runeWidth(r, prop), grAny | (prop << shiftGraphemePropState)
}
// If we don't know the state, determine it now.
var firstProp int
if state < 0 {
state, firstProp, _ = transitionGraphemeState(state, r)
} else {
firstProp = state >> shiftGraphemePropState
}
width += runeWidth(r, firstProp)
// Transition until we find a boundary.
for {
var (
prop int
boundary bool
)
r, l := utf8.DecodeRuneInString(str[length:])
state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r)
if boundary {
return str[:length], str[length:], width, state | (prop << shiftGraphemePropState)
}
if firstProp == prExtendedPictographic {
if r == vs15 {
width = 1
} else if r == vs16 {
width = 2
}
} else if firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
}
length += l
if len(str) <= length {
return str, "", width, grAny | (prop << shiftGraphemePropState)
}
}
}

1915
vendor/github.com/rivo/uniseg/graphemeproperties.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

176
vendor/github.com/rivo/uniseg/graphemerules.go generated vendored Normal file
View File

@ -0,0 +1,176 @@
package uniseg
// The states of the grapheme cluster parser.
const (
grAny = iota
grCR
grControlLF
grL
grLVV
grLVTT
grPrepend
grExtendedPictographic
grExtendedPictographicZWJ
grRIOdd
grRIEven
)
// The grapheme cluster parser's breaking instructions.
const (
grNoBoundary = iota
grBoundary
)
// grTransitions implements the grapheme cluster parser's state transitions.
// Maps state and property to a new state, a breaking instruction, and rule
// number. The breaking instruction always refers to the boundary between the
// last and next code point. Returns negative values if no transition is found.
//
// This function is used as follows:
//
// 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property.
// 3. Find any state + specific property.
// 4. If only (2) or (3) (but not both) was found, stop.
// 5. If both (2) and (3) were found, use state from (3) and breaking instruction
// from the transition with the lower rule number, prefer (3) if rule numbers
// are equal. Stop.
// 6. Assume grAny and grBoundary.
//
// Unicode version 15.0.0.
func grTransitions(state, prop int) (newState int, newProp int, boundary int) {
// It turns out that using a big switch statement is much faster than using
// a map.
switch uint64(state) | uint64(prop)<<32 {
// GB5
case grAny | prCR<<32:
return grCR, grBoundary, 50
case grAny | prLF<<32:
return grControlLF, grBoundary, 50
case grAny | prControl<<32:
return grControlLF, grBoundary, 50
// GB4
case grCR | prAny<<32:
return grAny, grBoundary, 40
case grControlLF | prAny<<32:
return grAny, grBoundary, 40
// GB3
case grCR | prLF<<32:
return grControlLF, grNoBoundary, 30
// GB6
case grAny | prL<<32:
return grL, grBoundary, 9990
case grL | prL<<32:
return grL, grNoBoundary, 60
case grL | prV<<32:
return grLVV, grNoBoundary, 60
case grL | prLV<<32:
return grLVV, grNoBoundary, 60
case grL | prLVT<<32:
return grLVTT, grNoBoundary, 60
// GB7
case grAny | prLV<<32:
return grLVV, grBoundary, 9990
case grAny | prV<<32:
return grLVV, grBoundary, 9990
case grLVV | prV<<32:
return grLVV, grNoBoundary, 70
case grLVV | prT<<32:
return grLVTT, grNoBoundary, 70
// GB8
case grAny | prLVT<<32:
return grLVTT, grBoundary, 9990
case grAny | prT<<32:
return grLVTT, grBoundary, 9990
case grLVTT | prT<<32:
return grLVTT, grNoBoundary, 80
// GB9
case grAny | prExtend<<32:
return grAny, grNoBoundary, 90
case grAny | prZWJ<<32:
return grAny, grNoBoundary, 90
// GB9a
case grAny | prSpacingMark<<32:
return grAny, grNoBoundary, 91
// GB9b
case grAny | prPrepend<<32:
return grPrepend, grBoundary, 9990
case grPrepend | prAny<<32:
return grAny, grNoBoundary, 92
// GB11
case grAny | prExtendedPictographic<<32:
return grExtendedPictographic, grBoundary, 9990
case grExtendedPictographic | prExtend<<32:
return grExtendedPictographic, grNoBoundary, 110
case grExtendedPictographic | prZWJ<<32:
return grExtendedPictographicZWJ, grNoBoundary, 110
case grExtendedPictographicZWJ | prExtendedPictographic<<32:
return grExtendedPictographic, grNoBoundary, 110
// GB12 / GB13
case grAny | prRegionalIndicator<<32:
return grRIOdd, grBoundary, 9990
case grRIOdd | prRegionalIndicator<<32:
return grRIEven, grNoBoundary, 120
case grRIEven | prRegionalIndicator<<32:
return grRIOdd, grBoundary, 120
default:
return -1, -1, -1
}
}
// transitionGraphemeState determines the new state of the grapheme cluster
// parser given the current state and the next code point. It also returns the
// code point's grapheme property (the value mapped by the [graphemeCodePoints]
// table) and whether a cluster boundary was detected.
func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) {
// Determine the property of the next character.
prop = propertyGraphemes(r)
// Find the applicable transition.
nextState, nextProp, _ := grTransitions(state, prop)
if nextState >= 0 {
// We have a specific transition. We'll use it.
return nextState, prop, nextProp == grBoundary
}
// No specific transition found. Try the less specific ones.
anyPropState, anyPropProp, anyPropRule := grTransitions(state, prAny)
anyStateState, anyStateProp, anyStateRule := grTransitions(grAny, prop)
if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
newState = anyStateState
boundary = anyStateProp == grBoundary
if anyPropRule < anyStateRule {
boundary = anyPropProp == grBoundary
}
return
}
if anyPropState >= 0 {
// We only have a specific state.
return anyPropState, prop, anyPropProp == grBoundary
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
}
if anyStateState >= 0 {
// We only have a specific property.
return anyStateState, prop, anyStateProp == grBoundary
}
// No known transition. GB999: Any ÷ Any.
return grAny, prop, true
}

134
vendor/github.com/rivo/uniseg/line.go generated vendored Normal file
View File

@ -0,0 +1,134 @@
package uniseg
import "unicode/utf8"
// FirstLineSegment returns the prefix of the given byte slice after which a
// decision to break the string over to the next line can or must be made,
// according to the rules of [Unicode Standard Annex #14]. This is used to
// implement line breaking.
//
// Line breaking, also known as word wrapping, is the process of breaking a
// section of text into lines such that it will fit in the available width of a
// page, window or other display area.
//
// The returned "segment" may not be broken into smaller parts, unless no other
// breaking opportunities present themselves, in which case you may break by
// grapheme clusters (using the [FirstGraphemeCluster] function to determine the
// grapheme clusters).
//
// The "mustBreak" flag indicates whether you MUST break the line after the
// given segment (true), for example after newline characters, or you MAY break
// the line after the given segment (false).
//
// This function can be called continuously to extract all non-breaking sub-sets
// from a byte slice, as illustrated in the example below.
//
// If you don't know the current state, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified line segment. If the length of the
// "rest" slice is 0, the entire byte slice "b" has been processed. The
// "segment" byte slice is the sub-slice of the input slice containing the
// identified line segment.
//
// Given an empty byte slice "b", the function returns nil values.
//
// Note that in accordance with [UAX #14 LB3], the final segment will end with
// "mustBreak" set to true. You can choose to ignore this by checking if the
// length of the "rest" slice is 0 and calling [HasTrailingLineBreak] or
// [HasTrailingLineBreakInString] on the last rune.
//
// Note also that this algorithm may break within grapheme clusters. This is
// addressed in Section 8.2 Example 6 of UAX #14. To avoid this, you can use
// the [Step] function instead.
//
// [Unicode Standard Annex #14]: https://www.unicode.org/reports/tr14/
// [UAX #14 LB3]: https://www.unicode.org/reports/tr14/#Algorithm
func FirstLineSegment(b []byte, state int) (segment, rest []byte, mustBreak bool, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
return b, nil, true, lbAny // LB3.
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionLineBreakState(state, r, b[length:], "")
}
// Transition until we find a boundary.
var boundary int
for {
r, l := utf8.DecodeRune(b[length:])
state, boundary = transitionLineBreakState(state, r, b[length+l:], "")
if boundary != LineDontBreak {
return b[:length], b[length:], boundary == LineMustBreak, state
}
length += l
if len(b) <= length {
return b, nil, true, lbAny // LB3
}
}
}
// FirstLineSegmentInString is like [FirstLineSegment] but its input and outputs
// are strings.
func FirstLineSegmentInString(str string, state int) (segment, rest string, mustBreak bool, newState int) {
// An empty byte slice returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
return str, "", true, lbAny // LB3.
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionLineBreakState(state, r, nil, str[length:])
}
// Transition until we find a boundary.
var boundary int
for {
r, l := utf8.DecodeRuneInString(str[length:])
state, boundary = transitionLineBreakState(state, r, nil, str[length+l:])
if boundary != LineDontBreak {
return str[:length], str[length:], boundary == LineMustBreak, state
}
length += l
if len(str) <= length {
return str, "", true, lbAny // LB3.
}
}
}
// HasTrailingLineBreak returns true if the last rune in the given byte slice is
// one of the hard line break code points defined in LB4 and LB5 of [UAX #14].
//
// [UAX #14]: https://www.unicode.org/reports/tr14/#Algorithm
func HasTrailingLineBreak(b []byte) bool {
r, _ := utf8.DecodeLastRune(b)
property, _ := propertyLineBreak(r)
return property == prBK || property == prCR || property == prLF || property == prNL
}
// HasTrailingLineBreakInString is like [HasTrailingLineBreak] but for a string.
func HasTrailingLineBreakInString(str string) bool {
r, _ := utf8.DecodeLastRuneInString(str)
property, _ := propertyLineBreak(r)
return property == prBK || property == prCR || property == prLF || property == prNL
}

3554
vendor/github.com/rivo/uniseg/lineproperties.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

626
vendor/github.com/rivo/uniseg/linerules.go generated vendored Normal file
View File

@ -0,0 +1,626 @@
package uniseg
import "unicode/utf8"
// The states of the line break parser.
const (
lbAny = iota
lbBK
lbCR
lbLF
lbNL
lbSP
lbZW
lbWJ
lbGL
lbBA
lbHY
lbCL
lbCP
lbEX
lbIS
lbSY
lbOP
lbQU
lbQUSP
lbNS
lbCLCPSP
lbB2
lbB2SP
lbCB
lbBB
lbLB21a
lbHL
lbAL
lbNU
lbPR
lbEB
lbIDEM
lbNUNU
lbNUSY
lbNUIS
lbNUCL
lbNUCP
lbPO
lbJL
lbJV
lbJT
lbH2
lbH3
lbOddRI
lbEvenRI
lbExtPicCn
lbZWJBit = 64
lbCPeaFWHBit = 128
)
// These constants define whether a given text may be broken into the next line.
// If the break is optional (LineCanBreak), you may choose to break or not based
// on your own criteria, for example, if the text has reached the available
// width.
const (
LineDontBreak = iota // You may not break the line here.
LineCanBreak // You may or may not break the line here.
LineMustBreak // You must break the line here.
)
// lbTransitions implements the line break parser's state transitions. It's
// anologous to [grTransitions], see comments there for details.
//
// Unicode version 15.0.0.
func lbTransitions(state, prop int) (newState, lineBreak, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// LB4.
case lbBK | prAny<<32:
return lbAny, LineMustBreak, 40
// LB5.
case lbCR | prLF<<32:
return lbLF, LineDontBreak, 50
case lbCR | prAny<<32:
return lbAny, LineMustBreak, 50
case lbLF | prAny<<32:
return lbAny, LineMustBreak, 50
case lbNL | prAny<<32:
return lbAny, LineMustBreak, 50
// LB6.
case lbAny | prBK<<32:
return lbBK, LineDontBreak, 60
case lbAny | prCR<<32:
return lbCR, LineDontBreak, 60
case lbAny | prLF<<32:
return lbLF, LineDontBreak, 60
case lbAny | prNL<<32:
return lbNL, LineDontBreak, 60
// LB7.
case lbAny | prSP<<32:
return lbSP, LineDontBreak, 70
case lbAny | prZW<<32:
return lbZW, LineDontBreak, 70
// LB8.
case lbZW | prSP<<32:
return lbZW, LineDontBreak, 70
case lbZW | prAny<<32:
return lbAny, LineCanBreak, 80
// LB11.
case lbAny | prWJ<<32:
return lbWJ, LineDontBreak, 110
case lbWJ | prAny<<32:
return lbAny, LineDontBreak, 110
// LB12.
case lbAny | prGL<<32:
return lbGL, LineCanBreak, 310
case lbGL | prAny<<32:
return lbAny, LineDontBreak, 120
// LB13 (simple transitions).
case lbAny | prCL<<32:
return lbCL, LineCanBreak, 310
case lbAny | prCP<<32:
return lbCP, LineCanBreak, 310
case lbAny | prEX<<32:
return lbEX, LineDontBreak, 130
case lbAny | prIS<<32:
return lbIS, LineCanBreak, 310
case lbAny | prSY<<32:
return lbSY, LineCanBreak, 310
// LB14.
case lbAny | prOP<<32:
return lbOP, LineCanBreak, 310
case lbOP | prSP<<32:
return lbOP, LineDontBreak, 70
case lbOP | prAny<<32:
return lbAny, LineDontBreak, 140
// LB15.
case lbQU | prSP<<32:
return lbQUSP, LineDontBreak, 70
case lbQU | prOP<<32:
return lbOP, LineDontBreak, 150
case lbQUSP | prOP<<32:
return lbOP, LineDontBreak, 150
// LB16.
case lbCL | prSP<<32:
return lbCLCPSP, LineDontBreak, 70
case lbNUCL | prSP<<32:
return lbCLCPSP, LineDontBreak, 70
case lbCP | prSP<<32:
return lbCLCPSP, LineDontBreak, 70
case lbNUCP | prSP<<32:
return lbCLCPSP, LineDontBreak, 70
case lbCL | prNS<<32:
return lbNS, LineDontBreak, 160
case lbNUCL | prNS<<32:
return lbNS, LineDontBreak, 160
case lbCP | prNS<<32:
return lbNS, LineDontBreak, 160
case lbNUCP | prNS<<32:
return lbNS, LineDontBreak, 160
case lbCLCPSP | prNS<<32:
return lbNS, LineDontBreak, 160
// LB17.
case lbAny | prB2<<32:
return lbB2, LineCanBreak, 310
case lbB2 | prSP<<32:
return lbB2SP, LineDontBreak, 70
case lbB2 | prB2<<32:
return lbB2, LineDontBreak, 170
case lbB2SP | prB2<<32:
return lbB2, LineDontBreak, 170
// LB18.
case lbSP | prAny<<32:
return lbAny, LineCanBreak, 180
case lbQUSP | prAny<<32:
return lbAny, LineCanBreak, 180
case lbCLCPSP | prAny<<32:
return lbAny, LineCanBreak, 180
case lbB2SP | prAny<<32:
return lbAny, LineCanBreak, 180
// LB19.
case lbAny | prQU<<32:
return lbQU, LineDontBreak, 190
case lbQU | prAny<<32:
return lbAny, LineDontBreak, 190
// LB20.
case lbAny | prCB<<32:
return lbCB, LineCanBreak, 200
case lbCB | prAny<<32:
return lbAny, LineCanBreak, 200
// LB21.
case lbAny | prBA<<32:
return lbBA, LineDontBreak, 210
case lbAny | prHY<<32:
return lbHY, LineDontBreak, 210
case lbAny | prNS<<32:
return lbNS, LineDontBreak, 210
case lbAny | prBB<<32:
return lbBB, LineCanBreak, 310
case lbBB | prAny<<32:
return lbAny, LineDontBreak, 210
// LB21a.
case lbAny | prHL<<32:
return lbHL, LineCanBreak, 310
case lbHL | prHY<<32:
return lbLB21a, LineDontBreak, 210
case lbHL | prBA<<32:
return lbLB21a, LineDontBreak, 210
case lbLB21a | prAny<<32:
return lbAny, LineDontBreak, 211
// LB21b.
case lbSY | prHL<<32:
return lbHL, LineDontBreak, 212
case lbNUSY | prHL<<32:
return lbHL, LineDontBreak, 212
// LB22.
case lbAny | prIN<<32:
return lbAny, LineDontBreak, 220
// LB23.
case lbAny | prAL<<32:
return lbAL, LineCanBreak, 310
case lbAny | prNU<<32:
return lbNU, LineCanBreak, 310
case lbAL | prNU<<32:
return lbNU, LineDontBreak, 230
case lbHL | prNU<<32:
return lbNU, LineDontBreak, 230
case lbNU | prAL<<32:
return lbAL, LineDontBreak, 230
case lbNU | prHL<<32:
return lbHL, LineDontBreak, 230
case lbNUNU | prAL<<32:
return lbAL, LineDontBreak, 230
case lbNUNU | prHL<<32:
return lbHL, LineDontBreak, 230
// LB23a.
case lbAny | prPR<<32:
return lbPR, LineCanBreak, 310
case lbAny | prID<<32:
return lbIDEM, LineCanBreak, 310
case lbAny | prEB<<32:
return lbEB, LineCanBreak, 310
case lbAny | prEM<<32:
return lbIDEM, LineCanBreak, 310
case lbPR | prID<<32:
return lbIDEM, LineDontBreak, 231
case lbPR | prEB<<32:
return lbEB, LineDontBreak, 231
case lbPR | prEM<<32:
return lbIDEM, LineDontBreak, 231
case lbIDEM | prPO<<32:
return lbPO, LineDontBreak, 231
case lbEB | prPO<<32:
return lbPO, LineDontBreak, 231
// LB24.
case lbAny | prPO<<32:
return lbPO, LineCanBreak, 310
case lbPR | prAL<<32:
return lbAL, LineDontBreak, 240
case lbPR | prHL<<32:
return lbHL, LineDontBreak, 240
case lbPO | prAL<<32:
return lbAL, LineDontBreak, 240
case lbPO | prHL<<32:
return lbHL, LineDontBreak, 240
case lbAL | prPR<<32:
return lbPR, LineDontBreak, 240
case lbAL | prPO<<32:
return lbPO, LineDontBreak, 240
case lbHL | prPR<<32:
return lbPR, LineDontBreak, 240
case lbHL | prPO<<32:
return lbPO, LineDontBreak, 240
// LB25 (simple transitions).
case lbPR | prNU<<32:
return lbNU, LineDontBreak, 250
case lbPO | prNU<<32:
return lbNU, LineDontBreak, 250
case lbOP | prNU<<32:
return lbNU, LineDontBreak, 250
case lbHY | prNU<<32:
return lbNU, LineDontBreak, 250
case lbNU | prNU<<32:
return lbNUNU, LineDontBreak, 250
case lbNU | prSY<<32:
return lbNUSY, LineDontBreak, 250
case lbNU | prIS<<32:
return lbNUIS, LineDontBreak, 250
case lbNUNU | prNU<<32:
return lbNUNU, LineDontBreak, 250
case lbNUNU | prSY<<32:
return lbNUSY, LineDontBreak, 250
case lbNUNU | prIS<<32:
return lbNUIS, LineDontBreak, 250
case lbNUSY | prNU<<32:
return lbNUNU, LineDontBreak, 250
case lbNUSY | prSY<<32:
return lbNUSY, LineDontBreak, 250
case lbNUSY | prIS<<32:
return lbNUIS, LineDontBreak, 250
case lbNUIS | prNU<<32:
return lbNUNU, LineDontBreak, 250
case lbNUIS | prSY<<32:
return lbNUSY, LineDontBreak, 250
case lbNUIS | prIS<<32:
return lbNUIS, LineDontBreak, 250
case lbNU | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNU | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNUNU | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUNU | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNUSY | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUSY | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNUIS | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUIS | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNU | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUNU | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUSY | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUIS | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUCL | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUCP | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNU | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUNU | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUSY | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUIS | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUCL | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUCP | prPR<<32:
return lbPR, LineDontBreak, 250
// LB26.
case lbAny | prJL<<32:
return lbJL, LineCanBreak, 310
case lbAny | prJV<<32:
return lbJV, LineCanBreak, 310
case lbAny | prJT<<32:
return lbJT, LineCanBreak, 310
case lbAny | prH2<<32:
return lbH2, LineCanBreak, 310
case lbAny | prH3<<32:
return lbH3, LineCanBreak, 310
case lbJL | prJL<<32:
return lbJL, LineDontBreak, 260
case lbJL | prJV<<32:
return lbJV, LineDontBreak, 260
case lbJL | prH2<<32:
return lbH2, LineDontBreak, 260
case lbJL | prH3<<32:
return lbH3, LineDontBreak, 260
case lbJV | prJV<<32:
return lbJV, LineDontBreak, 260
case lbJV | prJT<<32:
return lbJT, LineDontBreak, 260
case lbH2 | prJV<<32:
return lbJV, LineDontBreak, 260
case lbH2 | prJT<<32:
return lbJT, LineDontBreak, 260
case lbJT | prJT<<32:
return lbJT, LineDontBreak, 260
case lbH3 | prJT<<32:
return lbJT, LineDontBreak, 260
// LB27.
case lbJL | prPO<<32:
return lbPO, LineDontBreak, 270
case lbJV | prPO<<32:
return lbPO, LineDontBreak, 270
case lbJT | prPO<<32:
return lbPO, LineDontBreak, 270
case lbH2 | prPO<<32:
return lbPO, LineDontBreak, 270
case lbH3 | prPO<<32:
return lbPO, LineDontBreak, 270
case lbPR | prJL<<32:
return lbJL, LineDontBreak, 270
case lbPR | prJV<<32:
return lbJV, LineDontBreak, 270
case lbPR | prJT<<32:
return lbJT, LineDontBreak, 270
case lbPR | prH2<<32:
return lbH2, LineDontBreak, 270
case lbPR | prH3<<32:
return lbH3, LineDontBreak, 270
// LB28.
case lbAL | prAL<<32:
return lbAL, LineDontBreak, 280
case lbAL | prHL<<32:
return lbHL, LineDontBreak, 280
case lbHL | prAL<<32:
return lbAL, LineDontBreak, 280
case lbHL | prHL<<32:
return lbHL, LineDontBreak, 280
// LB29.
case lbIS | prAL<<32:
return lbAL, LineDontBreak, 290
case lbIS | prHL<<32:
return lbHL, LineDontBreak, 290
case lbNUIS | prAL<<32:
return lbAL, LineDontBreak, 290
case lbNUIS | prHL<<32:
return lbHL, LineDontBreak, 290
default:
return -1, -1, -1
}
}
// transitionLineBreakState determines the new state of the line break parser
// given the current state and the next code point. It also returns the type of
// line break: LineDontBreak, LineCanBreak, or LineMustBreak. If more than one
// code point is needed to determine the new state, the byte slice or the string
// starting after rune "r" can be used (whichever is not nil or empty) for
// further lookups.
func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) {
// Determine the property of the next character.
nextProperty, generalCategory := propertyLineBreak(r)
// Prepare.
var forceNoBreak, isCPeaFWH bool
if state >= 0 && state&lbCPeaFWHBit != 0 {
isCPeaFWH = true // LB30: CP but ea is not F, W, or H.
state = state &^ lbCPeaFWHBit
}
if state >= 0 && state&lbZWJBit != 0 {
state = state &^ lbZWJBit // Extract zero-width joiner bit.
forceNoBreak = true // LB8a.
}
defer func() {
// Transition into LB30.
if newState == lbCP || newState == lbNUCP {
ea := propertyEastAsianWidth(r)
if ea != prF && ea != prW && ea != prH {
newState |= lbCPeaFWHBit
}
}
// Override break.
if forceNoBreak {
lineBreak = LineDontBreak
}
}()
// LB1.
if nextProperty == prAI || nextProperty == prSG || nextProperty == prXX {
nextProperty = prAL
} else if nextProperty == prSA {
if generalCategory == gcMn || generalCategory == gcMc {
nextProperty = prCM
} else {
nextProperty = prAL
}
} else if nextProperty == prCJ {
nextProperty = prNS
}
// Combining marks.
if nextProperty == prZWJ || nextProperty == prCM {
var bit int
if nextProperty == prZWJ {
bit = lbZWJBit
}
mustBreakState := state < 0 || state == lbBK || state == lbCR || state == lbLF || state == lbNL
if !mustBreakState && state != lbSP && state != lbZW && state != lbQUSP && state != lbCLCPSP && state != lbB2SP {
// LB9.
return state | bit, LineDontBreak
} else {
// LB10.
if mustBreakState {
return lbAL | bit, LineMustBreak
}
return lbAL | bit, LineCanBreak
}
}
// Find the applicable transition in the table.
var rule int
newState, lineBreak, rule = lbTransitions(state, nextProperty)
if newState < 0 {
// No specific transition found. Try the less specific ones.
anyPropProp, anyPropLineBreak, anyPropRule := lbTransitions(state, prAny)
anyStateProp, anyStateLineBreak, anyStateRule := lbTransitions(lbAny, nextProperty)
if anyPropProp >= 0 && anyStateProp >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
if anyPropRule < anyStateRule {
lineBreak, rule = anyPropLineBreak, anyPropRule
}
} else if anyPropProp >= 0 {
// We only have a specific state.
newState, lineBreak, rule = anyPropProp, anyPropLineBreak, anyPropRule
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if anyStateProp >= 0 {
// We only have a specific property.
newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
} else {
// No known transition. LB31: ALL ÷ ALL.
newState, lineBreak, rule = lbAny, LineCanBreak, 310
}
}
// LB12a.
if rule > 121 &&
nextProperty == prGL &&
(state != lbSP && state != lbBA && state != lbHY && state != lbLB21a && state != lbQUSP && state != lbCLCPSP && state != lbB2SP) {
return lbGL, LineDontBreak
}
// LB13.
if rule > 130 && state != lbNU && state != lbNUNU {
switch nextProperty {
case prCL:
return lbCL, LineDontBreak
case prCP:
return lbCP, LineDontBreak
case prIS:
return lbIS, LineDontBreak
case prSY:
return lbSY, LineDontBreak
}
}
// LB25 (look ahead).
if rule > 250 &&
(state == lbPR || state == lbPO) &&
nextProperty == prOP || nextProperty == prHY {
var r rune
if b != nil { // Byte slice version.
r, _ = utf8.DecodeRune(b)
} else { // String version.
r, _ = utf8.DecodeRuneInString(str)
}
if r != utf8.RuneError {
pr, _ := propertyLineBreak(r)
if pr == prNU {
return lbNU, LineDontBreak
}
}
}
// LB30 (part one).
if rule > 300 {
if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP {
ea := propertyEastAsianWidth(r)
if ea != prF && ea != prW && ea != prH {
return lbOP, LineDontBreak
}
} else if isCPeaFWH {
switch nextProperty {
case prAL:
return lbAL, LineDontBreak
case prHL:
return lbHL, LineDontBreak
case prNU:
return lbNU, LineDontBreak
}
}
}
// LB30a.
if newState == lbAny && nextProperty == prRI {
if state != lbOddRI && state != lbEvenRI { // Includes state == -1.
// Transition into the first RI.
return lbOddRI, lineBreak
}
if state == lbOddRI {
// Don't break pairs of Regional Indicators.
return lbEvenRI, LineDontBreak
}
return lbOddRI, lineBreak
}
// LB30b.
if rule > 302 {
if nextProperty == prEM {
if state == lbEB || state == lbExtPicCn {
return prAny, LineDontBreak
}
}
graphemeProperty := propertyGraphemes(r)
if graphemeProperty == prExtendedPictographic && generalCategory == gcCn {
return lbExtPicCn, LineCanBreak
}
}
return
}

File diff suppressed because it is too large Load Diff

90
vendor/github.com/rivo/uniseg/sentence.go generated vendored Normal file
View File

@ -0,0 +1,90 @@
package uniseg
import "unicode/utf8"
// FirstSentence returns the first sentence found in the given byte slice
// according to the rules of [Unicode Standard Annex #29, Sentence Boundaries].
// This function can be called continuously to extract all sentences from a byte
// slice, as illustrated in the example below.
//
// If you don't know the current state, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified sentence. If the length of the "rest"
// slice is 0, the entire byte slice "b" has been processed. The "sentence" byte
// slice is the sub-slice of the input slice containing the identified sentence.
//
// Given an empty byte slice "b", the function returns nil values.
//
// [Unicode Standard Annex #29, Sentence Boundaries]: http://unicode.org/reports/tr29/#Sentence_Boundaries
func FirstSentence(b []byte, state int) (sentence, rest []byte, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
return b, nil, sbAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionSentenceBreakState(state, r, b[length:], "")
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRune(b[length:])
state, boundary = transitionSentenceBreakState(state, r, b[length+l:], "")
if boundary {
return b[:length], b[length:], state
}
length += l
if len(b) <= length {
return b, nil, sbAny
}
}
}
// FirstSentenceInString is like [FirstSentence] but its input and outputs are
// strings.
func FirstSentenceInString(str string, state int) (sentence, rest string, newState int) {
// An empty byte slice returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
return str, "", sbAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionSentenceBreakState(state, r, nil, str[length:])
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRuneInString(str[length:])
state, boundary = transitionSentenceBreakState(state, r, nil, str[length+l:])
if boundary {
return str[:length], str[length:], state
}
length += l
if len(str) <= length {
return str, "", sbAny
}
}
}

2845
vendor/github.com/rivo/uniseg/sentenceproperties.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

276
vendor/github.com/rivo/uniseg/sentencerules.go generated vendored Normal file
View File

@ -0,0 +1,276 @@
package uniseg
import "unicode/utf8"
// The states of the sentence break parser.
const (
sbAny = iota
sbCR
sbParaSep
sbATerm
sbUpper
sbLower
sbSB7
sbSB8Close
sbSB8Sp
sbSTerm
sbSB8aClose
sbSB8aSp
)
// sbTransitions implements the sentence break parser's state transitions. It's
// anologous to [grTransitions], see comments there for details.
//
// Unicode version 15.0.0.
func sbTransitions(state, prop int) (newState int, sentenceBreak bool, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// SB3.
case sbAny | prCR<<32:
return sbCR, false, 9990
case sbCR | prLF<<32:
return sbParaSep, false, 30
// SB4.
case sbAny | prSep<<32:
return sbParaSep, false, 9990
case sbAny | prLF<<32:
return sbParaSep, false, 9990
case sbParaSep | prAny<<32:
return sbAny, true, 40
case sbCR | prAny<<32:
return sbAny, true, 40
// SB6.
case sbAny | prATerm<<32:
return sbATerm, false, 9990
case sbATerm | prNumeric<<32:
return sbAny, false, 60
case sbSB7 | prNumeric<<32:
return sbAny, false, 60 // Because ATerm also appears in SB7.
// SB7.
case sbAny | prUpper<<32:
return sbUpper, false, 9990
case sbAny | prLower<<32:
return sbLower, false, 9990
case sbUpper | prATerm<<32:
return sbSB7, false, 70
case sbLower | prATerm<<32:
return sbSB7, false, 70
case sbSB7 | prUpper<<32:
return sbUpper, false, 70
// SB8a.
case sbAny | prSTerm<<32:
return sbSTerm, false, 9990
case sbATerm | prSContinue<<32:
return sbAny, false, 81
case sbATerm | prATerm<<32:
return sbATerm, false, 81
case sbATerm | prSTerm<<32:
return sbSTerm, false, 81
case sbSB7 | prSContinue<<32:
return sbAny, false, 81
case sbSB7 | prATerm<<32:
return sbATerm, false, 81
case sbSB7 | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8Close | prSContinue<<32:
return sbAny, false, 81
case sbSB8Close | prATerm<<32:
return sbATerm, false, 81
case sbSB8Close | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8Sp | prSContinue<<32:
return sbAny, false, 81
case sbSB8Sp | prATerm<<32:
return sbATerm, false, 81
case sbSB8Sp | prSTerm<<32:
return sbSTerm, false, 81
case sbSTerm | prSContinue<<32:
return sbAny, false, 81
case sbSTerm | prATerm<<32:
return sbATerm, false, 81
case sbSTerm | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8aClose | prSContinue<<32:
return sbAny, false, 81
case sbSB8aClose | prATerm<<32:
return sbATerm, false, 81
case sbSB8aClose | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8aSp | prSContinue<<32:
return sbAny, false, 81
case sbSB8aSp | prATerm<<32:
return sbATerm, false, 81
case sbSB8aSp | prSTerm<<32:
return sbSTerm, false, 81
// SB9.
case sbATerm | prClose<<32:
return sbSB8Close, false, 90
case sbSB7 | prClose<<32:
return sbSB8Close, false, 90
case sbSB8Close | prClose<<32:
return sbSB8Close, false, 90
case sbATerm | prSp<<32:
return sbSB8Sp, false, 90
case sbSB7 | prSp<<32:
return sbSB8Sp, false, 90
case sbSB8Close | prSp<<32:
return sbSB8Sp, false, 90
case sbSTerm | prClose<<32:
return sbSB8aClose, false, 90
case sbSB8aClose | prClose<<32:
return sbSB8aClose, false, 90
case sbSTerm | prSp<<32:
return sbSB8aSp, false, 90
case sbSB8aClose | prSp<<32:
return sbSB8aSp, false, 90
case sbATerm | prSep<<32:
return sbParaSep, false, 90
case sbATerm | prCR<<32:
return sbParaSep, false, 90
case sbATerm | prLF<<32:
return sbParaSep, false, 90
case sbSB7 | prSep<<32:
return sbParaSep, false, 90
case sbSB7 | prCR<<32:
return sbParaSep, false, 90
case sbSB7 | prLF<<32:
return sbParaSep, false, 90
case sbSB8Close | prSep<<32:
return sbParaSep, false, 90
case sbSB8Close | prCR<<32:
return sbParaSep, false, 90
case sbSB8Close | prLF<<32:
return sbParaSep, false, 90
case sbSTerm | prSep<<32:
return sbParaSep, false, 90
case sbSTerm | prCR<<32:
return sbParaSep, false, 90
case sbSTerm | prLF<<32:
return sbParaSep, false, 90
case sbSB8aClose | prSep<<32:
return sbParaSep, false, 90
case sbSB8aClose | prCR<<32:
return sbParaSep, false, 90
case sbSB8aClose | prLF<<32:
return sbParaSep, false, 90
// SB10.
case sbSB8Sp | prSp<<32:
return sbSB8Sp, false, 100
case sbSB8aSp | prSp<<32:
return sbSB8aSp, false, 100
case sbSB8Sp | prSep<<32:
return sbParaSep, false, 100
case sbSB8Sp | prCR<<32:
return sbParaSep, false, 100
case sbSB8Sp | prLF<<32:
return sbParaSep, false, 100
// SB11.
case sbATerm | prAny<<32:
return sbAny, true, 110
case sbSB7 | prAny<<32:
return sbAny, true, 110
case sbSB8Close | prAny<<32:
return sbAny, true, 110
case sbSB8Sp | prAny<<32:
return sbAny, true, 110
case sbSTerm | prAny<<32:
return sbAny, true, 110
case sbSB8aClose | prAny<<32:
return sbAny, true, 110
case sbSB8aSp | prAny<<32:
return sbAny, true, 110
// We'll always break after ParaSep due to SB4.
default:
return -1, false, -1
}
}
// transitionSentenceBreakState determines the new state of the sentence break
// parser given the current state and the next code point. It also returns
// whether a sentence boundary was detected. If more than one code point is
// needed to determine the new state, the byte slice or the string starting
// after rune "r" can be used (whichever is not nil or empty) for further
// lookups.
func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newState int, sentenceBreak bool) {
// Determine the property of the next character.
nextProperty := property(sentenceBreakCodePoints, r)
// SB5 (Replacing Ignore Rules).
if nextProperty == prExtend || nextProperty == prFormat {
if state == sbParaSep || state == sbCR {
return sbAny, true // Make sure we don't apply SB5 to SB3 or SB4.
}
if state < 0 {
return sbAny, true // SB1.
}
return state, false
}
// Find the applicable transition in the table.
var rule int
newState, sentenceBreak, rule = sbTransitions(state, nextProperty)
if newState < 0 {
// No specific transition found. Try the less specific ones.
anyPropState, anyPropProp, anyPropRule := sbTransitions(state, prAny)
anyStateState, anyStateProp, anyStateRule := sbTransitions(sbAny, nextProperty)
if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
if anyPropRule < anyStateRule {
sentenceBreak, rule = anyPropProp, anyPropRule
}
} else if anyPropState >= 0 {
// We only have a specific state.
newState, sentenceBreak, rule = anyPropState, anyPropProp, anyPropRule
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if anyStateState >= 0 {
// We only have a specific property.
newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
} else {
// No known transition. SB999: Any × Any.
newState, sentenceBreak, rule = sbAny, false, 9990
}
}
// SB8.
if rule > 80 && (state == sbATerm || state == sbSB8Close || state == sbSB8Sp || state == sbSB7) {
// Check the right side of the rule.
var length int
for nextProperty != prOLetter &&
nextProperty != prUpper &&
nextProperty != prLower &&
nextProperty != prSep &&
nextProperty != prCR &&
nextProperty != prLF &&
nextProperty != prATerm &&
nextProperty != prSTerm {
// Move on to the next rune.
if b != nil { // Byte slice version.
r, length = utf8.DecodeRune(b)
b = b[length:]
} else { // String version.
r, length = utf8.DecodeRuneInString(str)
str = str[length:]
}
if r == utf8.RuneError {
break
}
nextProperty = property(sentenceBreakCodePoints, r)
}
if nextProperty == prLower {
return sbLower, false
}
}
return
}

242
vendor/github.com/rivo/uniseg/step.go generated vendored Normal file
View File

@ -0,0 +1,242 @@
package uniseg
import "unicode/utf8"
// The bit masks used to extract boundary information returned by [Step].
const (
MaskLine = 3
MaskWord = 4
MaskSentence = 8
)
// The number of bits to shift the boundary information returned by [Step] to
// obtain the monospace width of the grapheme cluster.
const ShiftWidth = 4
// The bit positions by which boundary flags are shifted by the [Step] function.
// These must correspond to the Mask constants.
const (
shiftWord = 2
shiftSentence = 3
// shiftwWidth is ShiftWidth above. No mask as these are always the remaining bits.
)
// The bit positions by which states are shifted by the [Step] function. These
// values must ensure state values defined for each of the boundary algorithms
// don't overlap (and that they all still fit in a single int). These must
// correspond to the Mask constants.
const (
shiftWordState = 4
shiftSentenceState = 9
shiftLineState = 13
shiftPropState = 21 // No mask as these are always the remaining bits.
)
// The bit mask used to extract the state returned by the [Step] function, after
// shifting. These values must correspond to the shift constants.
const (
maskGraphemeState = 0xf
maskWordState = 0x1f
maskSentenceState = 0xf
maskLineState = 0xff
)
// Step returns the first grapheme cluster (user-perceived character) found in
// the given byte slice. It also returns information about the boundary between
// that grapheme cluster and the one following it as well as the monospace width
// of the grapheme cluster. There are three types of boundary information: word
// boundaries, sentence boundaries, and line breaks. This function is therefore
// a combination of [FirstGraphemeCluster], [FirstWord], [FirstSentence], and
// [FirstLineSegment].
//
// The "boundaries" return value can be evaluated as follows:
//
// - boundaries&MaskWord != 0: The boundary is a word boundary.
// - boundaries&MaskWord == 0: The boundary is not a word boundary.
// - boundaries&MaskSentence != 0: The boundary is a sentence boundary.
// - boundaries&MaskSentence == 0: The boundary is not a sentence boundary.
// - boundaries&MaskLine == LineDontBreak: You must not break the line at the
// boundary.
// - boundaries&MaskLine == LineMustBreak: You must break the line at the
// boundary.
// - boundaries&MaskLine == LineCanBreak: You may or may not break the line at
// the boundary.
// - boundaries >> ShiftWidth: The width of the grapheme cluster for most
// monospace fonts where a value of 1 represents one character cell.
//
// This function can be called continuously to extract all grapheme clusters
// from a byte slice, as illustrated in the examples below.
//
// If you don't know which state to pass, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified grapheme cluster. If the length of the
// "rest" slice is 0, the entire byte slice "b" has been processed. The
// "cluster" byte slice is the sub-slice of the input slice containing the
// first identified grapheme cluster.
//
// Given an empty byte slice "b", the function returns nil values.
//
// While slightly less convenient than using the Graphemes class, this function
// has much better performance and makes no allocations. It lends itself well to
// large byte slices.
//
// Note that in accordance with [UAX #14 LB3], the final segment will end with
// a mandatory line break (boundaries&MaskLine == LineMustBreak). You can choose
// to ignore this by checking if the length of the "rest" slice is 0 and calling
// [HasTrailingLineBreak] or [HasTrailingLineBreakInString] on the last rune.
//
// [UAX #14 LB3]: https://www.unicode.org/reports/tr14/#Algorithm
func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = propertyGraphemes(r)
} else {
prop = state >> shiftPropState
}
return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState)
}
// If we don't know the state, determine it now.
var graphemeState, wordState, sentenceState, lineState, firstProp int
remainder := b[length:]
if state < 0 {
graphemeState, firstProp, _ = transitionGraphemeState(state, r)
wordState, _ = transitionWordBreakState(state, r, remainder, "")
sentenceState, _ = transitionSentenceBreakState(state, r, remainder, "")
lineState, _ = transitionLineBreakState(state, r, remainder, "")
} else {
graphemeState = state & maskGraphemeState
wordState = (state >> shiftWordState) & maskWordState
sentenceState = (state >> shiftSentenceState) & maskSentenceState
lineState = (state >> shiftLineState) & maskLineState
firstProp = state >> shiftPropState
}
// Transition until we find a grapheme cluster boundary.
width := runeWidth(r, firstProp)
for {
var (
graphemeBoundary, wordBoundary, sentenceBoundary bool
lineBreak, prop int
)
r, l := utf8.DecodeRune(remainder)
remainder = b[length+l:]
graphemeState, prop, graphemeBoundary = transitionGraphemeState(graphemeState, r)
wordState, wordBoundary = transitionWordBreakState(wordState, r, remainder, "")
sentenceState, sentenceBoundary = transitionSentenceBreakState(sentenceState, r, remainder, "")
lineState, lineBreak = transitionLineBreakState(lineState, r, remainder, "")
if graphemeBoundary {
boundary := lineBreak | (width << ShiftWidth)
if wordBoundary {
boundary |= 1 << shiftWord
}
if sentenceBoundary {
boundary |= 1 << shiftSentence
}
return b[:length], b[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) | (prop << shiftPropState)
}
if firstProp == prExtendedPictographic {
if r == vs15 {
width = 1
} else if r == vs16 {
width = 2
}
} else if firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
}
length += l
if len(b) <= length {
return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (width << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState)
}
}
}
// StepString is like [Step] but its input and outputs are strings.
func StepString(str string, state int) (cluster, rest string, boundaries int, newState int) {
// An empty byte slice returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
prop := propertyGraphemes(r)
return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
}
// If we don't know the state, determine it now.
var graphemeState, wordState, sentenceState, lineState, firstProp int
remainder := str[length:]
if state < 0 {
graphemeState, firstProp, _ = transitionGraphemeState(state, r)
wordState, _ = transitionWordBreakState(state, r, nil, remainder)
sentenceState, _ = transitionSentenceBreakState(state, r, nil, remainder)
lineState, _ = transitionLineBreakState(state, r, nil, remainder)
} else {
graphemeState = state & maskGraphemeState
wordState = (state >> shiftWordState) & maskWordState
sentenceState = (state >> shiftSentenceState) & maskSentenceState
lineState = (state >> shiftLineState) & maskLineState
firstProp = state >> shiftPropState
}
// Transition until we find a grapheme cluster boundary.
width := runeWidth(r, firstProp)
for {
var (
graphemeBoundary, wordBoundary, sentenceBoundary bool
lineBreak, prop int
)
r, l := utf8.DecodeRuneInString(remainder)
remainder = str[length+l:]
graphemeState, prop, graphemeBoundary = transitionGraphemeState(graphemeState, r)
wordState, wordBoundary = transitionWordBreakState(wordState, r, nil, remainder)
sentenceState, sentenceBoundary = transitionSentenceBreakState(sentenceState, r, nil, remainder)
lineState, lineBreak = transitionLineBreakState(lineState, r, nil, remainder)
if graphemeBoundary {
boundary := lineBreak | (width << ShiftWidth)
if wordBoundary {
boundary |= 1 << shiftWord
}
if sentenceBoundary {
boundary |= 1 << shiftSentence
}
return str[:length], str[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) | (prop << shiftPropState)
}
if firstProp == prExtendedPictographic {
if r == vs15 {
width = 1
} else if r == vs16 {
width = 2
}
} else if firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
}
length += l
if len(str) <= length {
return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (width << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState)
}
}
}

61
vendor/github.com/rivo/uniseg/width.go generated vendored Normal file
View File

@ -0,0 +1,61 @@
package uniseg
// EastAsianAmbiguousWidth specifies the monospace width for East Asian
// characters classified as Ambiguous. The default is 1 but some rare fonts
// render them with a width of 2.
var EastAsianAmbiguousWidth = 1
// runeWidth returns the monospace width for the given rune. The provided
// grapheme property is a value mapped by the [graphemeCodePoints] table.
//
// Every rune has a width of 1, except for runes with the following properties
// (evaluated in this order):
//
// - Control, CR, LF, Extend, ZWJ: Width of 0
// - \u2e3a, TWO-EM DASH: Width of 3
// - \u2e3b, THREE-EM DASH: Width of 4
// - East-Asian width Fullwidth and Wide: Width of 2 (Ambiguous and Neutral
// have a width of 1)
// - Regional Indicator: Width of 2
// - Extended Pictographic: Width of 2, unless Emoji Presentation is "No".
func runeWidth(r rune, graphemeProperty int) int {
switch graphemeProperty {
case prControl, prCR, prLF, prExtend, prZWJ:
return 0
case prRegionalIndicator:
return 2
case prExtendedPictographic:
if property(emojiPresentation, r) == prEmojiPresentation {
return 2
}
return 1
}
switch r {
case 0x2e3a:
return 3
case 0x2e3b:
return 4
}
switch propertyEastAsianWidth(r) {
case prW, prF:
return 2
case prA:
return EastAsianAmbiguousWidth
}
return 1
}
// StringWidth returns the monospace width for the given string, that is, the
// number of same-size cells to be occupied by the string.
func StringWidth(s string) (width int) {
state := -1
for len(s) > 0 {
var w int
_, s, w, state = FirstGraphemeClusterInString(s, state)
width += w
}
return
}

89
vendor/github.com/rivo/uniseg/word.go generated vendored Normal file
View File

@ -0,0 +1,89 @@
package uniseg
import "unicode/utf8"
// FirstWord returns the first word found in the given byte slice according to
// the rules of [Unicode Standard Annex #29, Word Boundaries]. This function can
// be called continuously to extract all words from a byte slice, as illustrated
// in the example below.
//
// If you don't know the current state, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified word. If the length of the "rest" slice
// is 0, the entire byte slice "b" has been processed. The "word" byte slice is
// the sub-slice of the input slice containing the identified word.
//
// Given an empty byte slice "b", the function returns nil values.
//
// [Unicode Standard Annex #29, Word Boundaries]: http://unicode.org/reports/tr29/#Word_Boundaries
func FirstWord(b []byte, state int) (word, rest []byte, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
return b, nil, wbAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionWordBreakState(state, r, b[length:], "")
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRune(b[length:])
state, boundary = transitionWordBreakState(state, r, b[length+l:], "")
if boundary {
return b[:length], b[length:], state
}
length += l
if len(b) <= length {
return b, nil, wbAny
}
}
}
// FirstWordInString is like [FirstWord] but its input and outputs are strings.
func FirstWordInString(str string, state int) (word, rest string, newState int) {
// An empty byte slice returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
return str, "", wbAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionWordBreakState(state, r, nil, str[length:])
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRuneInString(str[length:])
state, boundary = transitionWordBreakState(state, r, nil, str[length+l:])
if boundary {
return str[:length], str[length:], state
}
length += l
if len(str) <= length {
return str, "", wbAny
}
}
}

1883
vendor/github.com/rivo/uniseg/wordproperties.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

282
vendor/github.com/rivo/uniseg/wordrules.go generated vendored Normal file
View File

@ -0,0 +1,282 @@
package uniseg
import "unicode/utf8"
// The states of the word break parser.
const (
wbAny = iota
wbCR
wbLF
wbNewline
wbWSegSpace
wbHebrewLetter
wbALetter
wbWB7
wbWB7c
wbNumeric
wbWB11
wbKatakana
wbExtendNumLet
wbOddRI
wbEvenRI
wbZWJBit = 16 // This bit is set for any states followed by at least one zero-width joiner (see WB4 and WB3c).
)
// wbTransitions implements the word break parser's state transitions. It's
// anologous to [grTransitions], see comments there for details.
//
// Unicode version 15.0.0.
func wbTransitions(state, prop int) (newState int, wordBreak bool, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// WB3b.
case wbAny | prNewline<<32:
return wbNewline, true, 32
case wbAny | prCR<<32:
return wbCR, true, 32
case wbAny | prLF<<32:
return wbLF, true, 32
// WB3a.
case wbNewline | prAny<<32:
return wbAny, true, 31
case wbCR | prAny<<32:
return wbAny, true, 31
case wbLF | prAny<<32:
return wbAny, true, 31
// WB3.
case wbCR | prLF<<32:
return wbLF, false, 30
// WB3d.
case wbAny | prWSegSpace<<32:
return wbWSegSpace, true, 9990
case wbWSegSpace | prWSegSpace<<32:
return wbWSegSpace, false, 34
// WB5.
case wbAny | prALetter<<32:
return wbALetter, true, 9990
case wbAny | prHebrewLetter<<32:
return wbHebrewLetter, true, 9990
case wbALetter | prALetter<<32:
return wbALetter, false, 50
case wbALetter | prHebrewLetter<<32:
return wbHebrewLetter, false, 50
case wbHebrewLetter | prALetter<<32:
return wbALetter, false, 50
case wbHebrewLetter | prHebrewLetter<<32:
return wbHebrewLetter, false, 50
// WB7. Transitions to wbWB7 handled by transitionWordBreakState().
case wbWB7 | prALetter<<32:
return wbALetter, false, 70
case wbWB7 | prHebrewLetter<<32:
return wbHebrewLetter, false, 70
// WB7a.
case wbHebrewLetter | prSingleQuote<<32:
return wbAny, false, 71
// WB7c. Transitions to wbWB7c handled by transitionWordBreakState().
case wbWB7c | prHebrewLetter<<32:
return wbHebrewLetter, false, 73
// WB8.
case wbAny | prNumeric<<32:
return wbNumeric, true, 9990
case wbNumeric | prNumeric<<32:
return wbNumeric, false, 80
// WB9.
case wbALetter | prNumeric<<32:
return wbNumeric, false, 90
case wbHebrewLetter | prNumeric<<32:
return wbNumeric, false, 90
// WB10.
case wbNumeric | prALetter<<32:
return wbALetter, false, 100
case wbNumeric | prHebrewLetter<<32:
return wbHebrewLetter, false, 100
// WB11. Transitions to wbWB11 handled by transitionWordBreakState().
case wbWB11 | prNumeric<<32:
return wbNumeric, false, 110
// WB13.
case wbAny | prKatakana<<32:
return wbKatakana, true, 9990
case wbKatakana | prKatakana<<32:
return wbKatakana, false, 130
// WB13a.
case wbAny | prExtendNumLet<<32:
return wbExtendNumLet, true, 9990
case wbALetter | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbHebrewLetter | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbNumeric | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbKatakana | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbExtendNumLet | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
// WB13b.
case wbExtendNumLet | prALetter<<32:
return wbALetter, false, 132
case wbExtendNumLet | prHebrewLetter<<32:
return wbHebrewLetter, false, 132
case wbExtendNumLet | prNumeric<<32:
return wbNumeric, false, 132
case wbExtendNumLet | prKatakana<<32:
return wbKatakana, false, 132
default:
return -1, false, -1
}
}
// transitionWordBreakState determines the new state of the word break parser
// given the current state and the next code point. It also returns whether a
// word boundary was detected. If more than one code point is needed to
// determine the new state, the byte slice or the string starting after rune "r"
// can be used (whichever is not nil or empty) for further lookups.
func transitionWordBreakState(state int, r rune, b []byte, str string) (newState int, wordBreak bool) {
// Determine the property of the next character.
nextProperty := property(workBreakCodePoints, r)
// "Replacing Ignore Rules".
if nextProperty == prZWJ {
// WB4 (for zero-width joiners).
if state == wbNewline || state == wbCR || state == wbLF {
return wbAny | wbZWJBit, true // Make sure we don't apply WB4 to WB3a.
}
if state < 0 {
return wbAny | wbZWJBit, false
}
return state | wbZWJBit, false
} else if nextProperty == prExtend || nextProperty == prFormat {
// WB4 (for Extend and Format).
if state == wbNewline || state == wbCR || state == wbLF {
return wbAny, true // Make sure we don't apply WB4 to WB3a.
}
if state == wbWSegSpace || state == wbAny|wbZWJBit {
return wbAny, false // We don't break but this is also not WB3d or WB3c.
}
if state < 0 {
return wbAny, false
}
return state, false
} else if nextProperty == prExtendedPictographic && state >= 0 && state&wbZWJBit != 0 {
// WB3c.
return wbAny, false
}
if state >= 0 {
state = state &^ wbZWJBit
}
// Find the applicable transition in the table.
var rule int
newState, wordBreak, rule = wbTransitions(state, nextProperty)
if newState < 0 {
// No specific transition found. Try the less specific ones.
anyPropState, anyPropWordBreak, anyPropRule := wbTransitions(state, prAny)
anyStateState, anyStateWordBreak, anyStateRule := wbTransitions(wbAny, nextProperty)
if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule
if anyPropRule < anyStateRule {
wordBreak, rule = anyPropWordBreak, anyPropRule
}
} else if anyPropState >= 0 {
// We only have a specific state.
newState, wordBreak, rule = anyPropState, anyPropWordBreak, anyPropRule
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if anyStateState >= 0 {
// We only have a specific property.
newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule
} else {
// No known transition. WB999: Any ÷ Any.
newState, wordBreak, rule = wbAny, true, 9990
}
}
// For those rules that need to look up runes further in the string, we
// determine the property after nextProperty, skipping over Format, Extend,
// and ZWJ (according to WB4). It's -1 if not needed, if such a rune cannot
// be determined (because the text ends or the rune is faulty).
farProperty := -1
if rule > 60 &&
(state == wbALetter || state == wbHebrewLetter || state == wbNumeric) &&
(nextProperty == prMidLetter || nextProperty == prMidNumLet || nextProperty == prSingleQuote || // WB6.
nextProperty == prDoubleQuote || // WB7b.
nextProperty == prMidNum) { // WB12.
for {
var (
r rune
length int
)
if b != nil { // Byte slice version.
r, length = utf8.DecodeRune(b)
b = b[length:]
} else { // String version.
r, length = utf8.DecodeRuneInString(str)
str = str[length:]
}
if r == utf8.RuneError {
break
}
prop := property(workBreakCodePoints, r)
if prop == prExtend || prop == prFormat || prop == prZWJ {
continue
}
farProperty = prop
break
}
}
// WB6.
if rule > 60 &&
(state == wbALetter || state == wbHebrewLetter) &&
(nextProperty == prMidLetter || nextProperty == prMidNumLet || nextProperty == prSingleQuote) &&
(farProperty == prALetter || farProperty == prHebrewLetter) {
return wbWB7, false
}
// WB7b.
if rule > 72 &&
state == wbHebrewLetter &&
nextProperty == prDoubleQuote &&
farProperty == prHebrewLetter {
return wbWB7c, false
}
// WB12.
if rule > 120 &&
state == wbNumeric &&
(nextProperty == prMidNum || nextProperty == prMidNumLet || nextProperty == prSingleQuote) &&
farProperty == prNumeric {
return wbWB11, false
}
// WB15 and WB16.
if newState == wbAny && nextProperty == prRegionalIndicator {
if state != wbOddRI && state != wbEvenRI { // Includes state == -1.
// Transition into the first RI.
return wbOddRI, true
}
if state == wbOddRI {
// Don't break pairs of Regional Indicators.
return wbEvenRI, false
}
return wbOddRI, true // We can break after a pair.
}
return
}

View File

@ -14,7 +14,7 @@
*Did you know that the shortest valid piece of HTML5 is `<!doctype html><title>x</title>`? See for yourself at the [W3C Validator](http://validator.w3.org/)!*
Minify is a minifier package written in [Go][1]. It provides HTML5, CSS3, JS, JSON, SVG and XML minifiers and an interface to implement any other minifier. Minification is the process of removing bytes from a file (such as whitespace) without changing its output and therefore shrinking its size and speeding up transmission over the internet and possibly parsing. The implemented minifiers are designed for high performance.
Minify is a minifier package written in [Go][1]. It provides HTML5, CSS3, JS, JSON, SVG and XML minifiers and an interface to implement any other minifier. Minification is the process of removing bytes from a file (such as whitespace) without changing its output and therefore shrinking its size and speeding up transmission over the internet and possibly parsing. The implemented minifiers are designed for high performance (see https://github.com/privatenumber/minification-benchmarks where this library is (one of) the fastest JS minifiers).
The core functionality associates mimetypes with minification functions, allowing embedded resources (like CSS or JS within HTML files) to be minified as well. Users can add new implementations that are triggered based on a mimetype (or pattern), or redirect to an external command (like ClosureCompiler, UglifyCSS, ...).

View File

@ -81,14 +81,21 @@ func (t Token) String() string {
if len(t.Args) == 0 {
return t.TokenType.String() + "(" + string(t.Data) + ")"
}
return fmt.Sprint(t.Args)
sb := strings.Builder{}
sb.Write(t.Data)
for _, arg := range t.Args {
sb.WriteString(arg.String())
}
sb.WriteByte(')')
return sb.String()
}
// Equal returns true if both tokens are equal.
func (t Token) Equal(t2 Token) bool {
if t.TokenType == t2.TokenType && bytes.Equal(t.Data, t2.Data) && len(t.Args) == len(t2.Args) {
for i := 0; i < len(t.Args); i++ {
if t.Args[i].TokenType != t2.Args[i].TokenType || !bytes.Equal(t.Args[i].Data, t2.Args[i].Data) {
if !t.Args[i].Equal(t2.Args[i]) {
return false
}
}
@ -405,7 +412,7 @@ func (c *cssMinifier) minifyDeclaration(property []byte, components []css.Token)
}
values = c.minifyTokens(prop, 0, values)
if len(values) > 0 {
if 0 < len(values) {
values = c.minifyProperty(prop, values)
}
c.writeDeclaration(values, important)
@ -1127,7 +1134,7 @@ func (c *cssMinifier) minifyProperty(prop Hash, values []Token) []Token {
} else {
values[i] = minifyColor(values[i])
}
if 0 < i && sameValues && !bytes.Equal(values[0].Data, values[i].Data) {
if 0 < i && sameValues && !values[0].Equal(values[i]) {
sameValues = false
}
}

View File

@ -504,7 +504,7 @@ func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]st
if 0 < len(attr.Data) && (attr.Data[len(attr.Data)-1] == '\'' || attr.Data[len(attr.Data)-1] == '"') {
quote = attr.Data[len(attr.Data)-1]
}
val = html.EscapeAttrVal(&attrByteBuffer, val, quote, o.KeepQuotes, isXML)
val = html.EscapeAttrVal(&attrByteBuffer, val, quote, o.KeepQuotes || isXML)
w.Write(val)
}
}

View File

@ -6,7 +6,7 @@ var (
)
// EscapeAttrVal returns the escaped attribute value bytes with quotes. Either single or double quotes are used, whichever is shorter. If there are no quotes present in the value and the value is in HTML (not XML), it will return the value without quotes.
func EscapeAttrVal(buf *[]byte, b []byte, origQuote byte, mustQuote, isXML bool) []byte {
func EscapeAttrVal(buf *[]byte, b []byte, origQuote byte, mustQuote bool) []byte {
singles := 0
doubles := 0
unquoted := true
@ -20,9 +20,9 @@ func EscapeAttrVal(buf *[]byte, b []byte, origQuote byte, mustQuote, isXML bool)
}
}
}
if unquoted && (!mustQuote || origQuote == 0) && !isXML {
if unquoted && (!mustQuote || origQuote == 0) {
return b
} else if singles == 0 && origQuote == '\'' && !isXML || doubles == 0 && origQuote == '"' {
} else if singles == 0 && origQuote == '\'' || doubles == 0 && origQuote == '"' {
if len(b)+2 > cap(*buf) {
*buf = make([]byte, 0, len(b)+2)
}
@ -36,14 +36,10 @@ func EscapeAttrVal(buf *[]byte, b []byte, origQuote byte, mustQuote, isXML bool)
n := len(b) + 2
var quote byte
var escapedQuote []byte
if singles >= doubles || isXML {
if singles > doubles || singles == doubles && origQuote != '\'' {
n += doubles * 4
quote = '"'
escapedQuote = doubleQuoteEntityBytes
if singles == doubles && origQuote == '\'' && !isXML {
quote = '\''
escapedQuote = singleQuoteEntityBytes
}
} else {
n += singles * 4
quote = '\''

16
vendor/modules.txt vendored
View File

@ -1,12 +1,12 @@
# github.com/a-h/templ v0.2.543
## explicit; go 1.20
# github.com/a-h/templ v0.2.598
## explicit; go 1.21
github.com/a-h/templ
github.com/a-h/templ/safehtml
# github.com/aymanbagabas/go-osc52/v2 v2.0.1
## explicit; go 1.16
github.com/aymanbagabas/go-osc52/v2
# github.com/charmbracelet/lipgloss v0.9.1
## explicit; go 1.17
# github.com/charmbracelet/lipgloss v0.10.0
## explicit; go 1.18
github.com/charmbracelet/lipgloss
# github.com/charmbracelet/log v0.3.1
## explicit; go 1.19
@ -50,15 +50,15 @@ github.com/muesli/reflow/wrap
# github.com/muesli/termenv v0.15.2
## explicit; go 1.17
github.com/muesli/termenv
# github.com/rivo/uniseg v0.2.0
## explicit; go 1.12
# github.com/rivo/uniseg v0.4.7
## explicit; go 1.18
github.com/rivo/uniseg
# github.com/tdewolff/minify/v2 v2.20.15
# github.com/tdewolff/minify/v2 v2.20.19
## explicit; go 1.18
github.com/tdewolff/minify/v2
github.com/tdewolff/minify/v2/css
github.com/tdewolff/minify/v2/html
# github.com/tdewolff/parse/v2 v2.7.10
# github.com/tdewolff/parse/v2 v2.7.12
## explicit; go 1.13
github.com/tdewolff/parse/v2
github.com/tdewolff/parse/v2/buffer