mirror of
https://github.com/perlbot/perlbuut
synced 2025-06-07 17:15:40 -04:00
62 lines
1.5 KiB
Perl
62 lines
1.5 KiB
Perl
# ------------------------
|
|
# Originally by Mauke at: http://mauke.ath.cx/stuff/perl/unip
|
|
# ------------------------
|
|
|
|
use Unicode::UCD 'charinfo';
|
|
use Encode qw/decode encode_utf8/;
|
|
use utf8;
|
|
use strict;
|
|
|
|
sub speng {
|
|
my $x = shift;
|
|
|
|
$x =~ /^0[0-7]+\z/ and return oct $x;
|
|
|
|
$x =~ /^(?:[Uu]\+?|0[Xx])([[:xdigit:]]+)\z/ || (
|
|
length($x) > 1 && $x =~ /^([[:xdigit:]]*[A-Fa-f][[:xdigit:]]*)\z/
|
|
) and return hex $1;
|
|
|
|
$x =~ /^[0-9]+\z/ and return $x;
|
|
|
|
return map ord, split //, $x
|
|
}
|
|
|
|
sub unip {
|
|
my @pieces = @_;
|
|
my (@out, @err);
|
|
for (@pieces) {
|
|
my $chr = chr;
|
|
my $utf8 = join ' ', unpack '(H2)*', encode_utf8 $chr;
|
|
my $x;
|
|
unless ($x = charinfo $_) {
|
|
push @err, sprintf "U+%X (%s): no match found", $_, $utf8;
|
|
next;
|
|
}
|
|
my $comb = "";
|
|
$comb = " " if $x->{name} =~ /^COMBINING /; # add a space for combining chars
|
|
$chr = chr(0x2400 + ord($chr)) if ord($chr) < 0x20; # lower control chars
|
|
$chr = "\x{2421}" if ord($chr) == 0x7f; # DEL
|
|
push @out, "U+$x->{code} ($utf8): $x->{name} [$comb$chr]";
|
|
}
|
|
|
|
\@err, \@out
|
|
}
|
|
|
|
# ------------------------
|
|
no warnings 'void';
|
|
|
|
sub {
|
|
my( $said, $pm ) = @_;
|
|
|
|
utf8::decode( $said->{body} );
|
|
|
|
my ($err, $out) = unip map speng($_), split " ", $said->{body};
|
|
|
|
utf8::upgrade( $_ ) for @$err, @$out;
|
|
|
|
print "Error: @$err\n" if @$err;
|
|
print "@$out\n";
|
|
}
|
|
|
|
__DATA__
|
|
unicode U+2301; returns the unicode character and associated information given either a unicode character or one of the various ways you can specify a code point.
|