mirror of
https://github.com/perlbot/perlbuut
synced 2025-06-07 16:25:41 -04:00
66 lines
1.8 KiB
Perl
66 lines
1.8 KiB
Perl
use LWP::UserAgent;
|
|
use HTML::TreeBuilder::XPath;
|
|
|
|
package
|
|
XML::XPathEngine::Function {
|
|
sub string_join {
|
|
my $self = shift;
|
|
my ($node, @params) = @_;
|
|
die "concat: Too few parameters\n" if @params < 2;
|
|
my $joiner = pop @params;
|
|
my $string = join($joiner->string_value, map {$_->string_values} @params);
|
|
return XML::XPathEngine::Literal->new($string);
|
|
}
|
|
};
|
|
|
|
|
|
sub {
|
|
my( $said ) = @_;
|
|
|
|
my $ua = LWP::UserAgent->new( agent => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko, really Perlbot) Chrome/45.0.2454.85 Safari/537.36" );
|
|
my $url;
|
|
|
|
|
|
if( $said->{body} =~ s{(https?://\S+)\s*}{} ) {
|
|
$url = $1;
|
|
}
|
|
elsif( $said->{body} =~ s/(\S+)\s*// ) {
|
|
$url = "http://$1";
|
|
}
|
|
else {
|
|
print "That doesn't look like a url..";
|
|
return;
|
|
}
|
|
|
|
my $resp = $ua->get( $url );
|
|
|
|
if( not $resp ) {
|
|
print "Couldn't fetch [$url] you failure";
|
|
return;
|
|
}
|
|
|
|
my @text;
|
|
my $document = HTML::TreeBuilder::XPath->new_from_content( $resp->decoded_content );
|
|
if (!$document) {
|
|
print "Could not parsinate that page!";
|
|
}
|
|
# just the xpath left
|
|
if ($said->{body} =~ /^\s*\.\*\s*$/) {
|
|
print $resp->decoded_content;
|
|
} elsif ($said->{body}) {
|
|
@text = eval{
|
|
$document->findvalues( $said->{body} );
|
|
};
|
|
@text = "Your Xpath didn't match anything" if 0 == @text;
|
|
@text = "Your Xpath fails: $@" if $@;
|
|
}
|
|
if (! $said->{body} ){
|
|
@text = ($@,$document->findvalues( '//title' ), ': ',$document->findvalues( '//body' ));
|
|
}
|
|
local $, = ', ';
|
|
|
|
print map { local $_ = "$_"; s/\s+/ /g;s/^ +//; s/ +$//; $_} @text
|
|
}
|
|
|
|
__DATA__
|
|
get http://url/ //xpath - get page from interents, extract the xpath, show it to people. (Xpath defaults to '//title' + '//body' ) spaces squashed too
|