1
0
Fork 0
mirror of https://github.com/perlbot/perlbuut synced 2025-06-07 16:05:40 -04:00
perlbuut/plugins/unicode.pm

62 lines
1.5 KiB
Perl

# ------------------------
# Originally by Mauke at: http://mauke.ath.cx/stuff/perl/unip
# ------------------------
use Unicode::UCD 'charinfo';
use Encode qw/decode encode_utf8/;
use utf8;
use strict;
sub speng {
my $x = shift;
$x =~ /^0[0-7]+\z/ and return oct $x;
$x =~ /^(?:[Uu]\+?|0[Xx])([[:xdigit:]]+)\z/ || (
length($x) > 1 && $x =~ /^([[:xdigit:]]*[A-Fa-f][[:xdigit:]]*)\z/
) and return hex $1;
$x =~ /^[0-9]+\z/ and return $x;
return map ord, split //, $x
}
sub unip {
my @pieces = @_;
my (@out, @err);
for (@pieces) {
my $chr = chr;
my $utf8 = join ' ', unpack '(H2)*', encode_utf8 $chr;
my $x;
unless ($x = charinfo $_) {
push @err, sprintf "U+%X (%s): no match found", $_, $utf8;
next;
}
my $comb = "";
$comb = " " if $x->{name} =~ /^COMBINING /; # add a space for combining chars
$chr = chr(0x2400 + ord($chr)) if ord($chr) < 0x20; # lower control chars
$chr = "\x{2421}" if ord($chr) == 0x7f; # DEL
push @out, "U+$x->{code} ($utf8): $x->{name} [$comb$chr]";
}
\@err, \@out
}
# ------------------------
no warnings 'void';
sub {
my( $said, $pm ) = @_;
utf8::decode( $said->{body} );
my ($err, $out) = unip map speng($_), split " ", $said->{body};
utf8::upgrade( $_ ) for @$err, @$out;
print "Error: @$err\n" if @$err;
print "@$out\n";
}
__DATA__
unicode U+2301; returns the unicode character and associated information given either a unicode character or one of the various ways you can specify a code point.