#!/pro/bin/perl
use strict;
use warnings;
binmode STDOUT, ":utf8";
use Getopt::Long qw(:config nopermute bundling);
my @opt_m;
my $opt_v = 0;
my $opt_f = 0;
my $opt_d = 0;
my $opt_k = 0; # Show key combo, compose key
my $opt_h = 0;
GetOptions (
"m:s" => \@opt_m, # show map(s)
"v:1" => \$opt_v,
"f" => \$opt_f,
"k|c" => \$opt_k,
"d" => \$opt_d, # Randomly diacritify
"h" => \$opt_h, # Also show HTML entity if available
) or die "usage: uchar [-v] [-m base[:count] [ -m base[:count] ] ... | char ... | -f char\n";
use HTML::Entities;
use PROCURA::Diac 4.14;
use charnames ":alias" => ":pro";
use Encode qw(encode decode);
my %compose;
if (open my $cf, "< /usr/X11R6/lib/X11/locale/$ENV{LANG}/Compose") {
while (<$cf>) {
m/^\s*(.*?)\s*:\s*(?:".*?"\s+)[Uu]([0-9A-Fa-f]+)/ or next;
$compose{sprintf "%04x", hex $2} = $1;
}
close $cf;
}
my %xlat = (
":)" => "\N{WHITE SMILING FACE}",
":(" => "\N{WHITE FROWNING FACE}",
"->" => "\N{WHITE RIGHT POINTING INDEX}",
"<-" => "\N{WHITE LEFT POINTING INDEX}",
phone => "\N{WHITE TELEPHONE}",
death => "\N{SKULL AND CROSSBONES}",
euro => "\N{EURO SIGN}",
);
@opt_m == 1 && !$opt_m[0] and
@opt_m = qw( 00a0:df 2000:3f 20a0:1f 2140:1f 2190:1f 21c0:1f 2630:1f );
sub Names ()
{
do "unicore/Name.pl";
} # Names
my (%name, %cp, $n);
for (split m/\n/ => Names ()) {
s/\s+$//;
my ($cp, $cp2, $name) = split m/\t/, $_, 3;
$name =~ m/[a-z]/ and next; # Non-character
($cp, $cp2) = map { hex "0$_" } ($cp, $cp2);
$name{$cp} = $name;
$cp{$name} //= $cp;
}
if ($opt_f) {
my $found = 0;
foreach my $w (['\b', '\b'], ['\b', ''], ['', '']) {
my $pat = join ".*", map { "$w->[0]$_$w->[1]" } map { split m/_/ } @ARGV;
$pat = qr{$pat}i;
foreach my $name (sort grep m/$pat/ => keys %cp) {
my $cp = $cp{$name};
my $c = chr $cp;
my $pro = DiacLookup ("utf8", $c);
$name =~ m/^COMBINING / and $c = " $c";
if ($opt_h) {
my $chr_h = encode_entities ($c);
$chr_h eq $c and $chr_h = "";
$chr_h =~ s/^&// and chop $chr_h;
printf "%06x %s %-7s %-10s %s\n", $cp, $c, $chr_h,
$pro && $pro->[1] ? $pro->[2] : "", $name;
}
else {
printf "%06x %s %-15s %s\n", $cp, $c,
$pro && $pro->[1] ? $pro->[2] : "", $name;
if ($opt_k) {
my $h = sprintf "%04x", $cp;
exists $compose{$h} and print "\t$compose{$h}\n";
}
}
$found++;
}
$found and last;
}
exit;
}
if ($opt_d) {
my %ll;
my %fcp = map { $_ => 1 } 0x20 .. 0x7f;
if (my $font = ( grep m{^ (?: xterm ) \* (?: vt100 \* )? font: \s* (.*) }ix =>
sort `xrdb -query` )[-1] ) {
$font =~ s/^\S+:\s+(\S.*\S)\s*/$1/;
local @ARGV = ("xlsfonts -lll -fn '$font' |");
while (<>) {
my ($cp, $m) = m/^\s+0x\w+\s+\((\d+)\)((?:\s+\d+)+)\s+0x\w+/ or next;
$m =~ m/[1-9]/ and $fcp{$cp}++;
}
}
for (keys %cp) {
m{^LATIN (SMALL|CAPITAL) LETTER (.) WITH (.*)} or next;
my $cp = $cp{$_};
exists $fcp{$cp} or next; # Not in this font
my $bc = $1 eq "SMALL" ? lc $2 : $2;
push @{$ll{$bc}}, $cp;
}
foreach (unpack "U*", decode "UTF-8", join " ", @ARGV) {
my $c = chr $_;
if ($c =~ m/[A-Za-z]/) {
exists $ll{$c} and
$c = chr $ll{$c}[int rand scalar @{$ll{$c}}];
}
print $c;
}
print "\n";
exit;
}
if (@opt_m) {
@opt_m == 1 and push @opt_m, @ARGV;
@opt_m == 1 && $opt_m[0] =~ m/^(0|all|\*)$/ and @opt_m = ("a0:5f", map { sprintf "%x", 0x100 * $_ } 1..0x2e);
for (@opt_m) {
my ($base, $count) =
map { m/^0?x?([\da-f]+)$/i ? hex $1 : 0 }
split m/:/, "$_:7f";
$count += $base;
print " 0123456789abcdef 0123456789abcdef\n";
while ($base <= $count) {
printf "0x%04x:\t", $base;
print chr ($base + $_) for 0 .. 15;
print " ";
print chr ($base + $_) for 16 .. 31;
print "\n";
$base += 32;
}
print "\n";
}
exit;
}
my $c;
if ($opt_v) {
@ARGV = map { chr $_ } unpack "U*", decode "UTF-8", join " ", @ARGV;
}
for (@ARGV) {
exists $xlat{$_} and $_ = $xlat{$_}, next;
s/^(?:0?x)?([a-f\d]+)$/chr hex $1/e and next;
$c = DiacLookup ("utf8", $_) and $c->[1] and $_ = $c->[3], next;
$c = DiacLookup ("utf8", $_."_IDX") and $c->[1] and $_ = $c->[3], next;
$c = charnames::vianame ($_) and $_ = chr $c, next;
$c = charnames::vianame (uc $_) and $_ = chr $c;
}
if ($opt_v) {
$_ .= " \\N{".charnames::viacode (ord ($_))."} " for @ARGV;
}
print join "", @ARGV, "\n";