1
0
Fork 0
mirror of https://github.com/perlbot/perlbuut synced 2025-06-07 16:55:42 -04:00

Metaphone search is working

This commit is contained in:
Ryan Voots 2016-12-24 01:45:40 -08:00
parent 0b5ffc3035
commit 26c2bfe8ae
3 changed files with 39 additions and 4 deletions

27
bin/generate_metaphones.pl Executable file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use DBI;
use Text::Metaphone;
my $dbh = DBI->connect(
"dbi:SQLite:dbname=var/factoids.db",
"",
"",
{ RaiseError => 1, PrintError => 0 }
);
my $fsth = $dbh->prepare('SELECT * FROM factoid;');
my $isth = $dbh->prepare('UPDATE factoid SET metaphone = ? WHERE factoid_id = ?');
$fsth->execute();
while (my $row = $fsth->fetchrow_hashref()) {
my $orig_sub = $row->{original_subject};
my $metaphone = Metaphone($orig_sub);
print "$orig_sub => $metaphone\n";
$isth->execute($metaphone, $row->{factoid_id});
}

View file

@ -88,3 +88,6 @@ requires 'Return::MultiLevel' => 0;
requires 'Try::Tiny::ByClass' => 0; requires 'Try::Tiny::ByClass' => 0;
requires 'IPC::Run' => 0; requires 'IPC::Run' => 0;
requires 'Text::Metaphone' => 0; requires 'Text::Metaphone' => 0;
requires 'DBD::SQLite::BundledExtensions' => 0;
requires 'Text::Levenshtein' => 0;

View file

@ -1,6 +1,7 @@
package Bot::BB3::Plugin::Factoids; package Bot::BB3::Plugin::Factoids;
use DBI; use DBI;
use DBD::SQLite; use DBD::SQLite;
use DBD::SQLite::BundledExtensions;
use POE::Component::IRC::Common qw/l_irc/; use POE::Component::IRC::Common qw/l_irc/;
use Text::Metaphone; use Text::Metaphone;
use strict; use strict;
@ -66,6 +67,8 @@ sub dbh {
{ RaiseError => 1, PrintError => 0 } { RaiseError => 1, PrintError => 0 }
); );
DBD::SQLite::BundledExtensions->load_spellfix($dbh);
return $dbh; return $dbh;
} }
@ -628,7 +631,7 @@ sub basic_get_fact {
my $metaphone = Metaphone( _clean_subject($subject, 1) ); my $metaphone = Metaphone( _clean_subject($subject, 1) );
my $matches = $self->_metaphone_matches( $metaphone ); my $matches = $self->_metaphone_matches( $metaphone, $subject );
push @{$said->{metaphone_matches}}, @$matches; push @{$said->{metaphone_matches}}, @$matches;
@ -642,18 +645,20 @@ sub basic_get_fact {
} }
sub _metaphone_matches { sub _metaphone_matches {
my( $self, $metaphone ) = @_; my( $self, $metaphone, $subject ) = @_;
my $dbh = $self->dbh; my $dbh = $self->dbh;
# TODO this needs to be rewritten to do an edit distance based on the metaphone columns, rather than a direct comparison # TODO this needs to be rewritten to do an edit distance based on the metaphone columns, rather than a direct comparison
#XXX HACK WARNING: not really a hack, but something to document, the inner query here seems to work fine on sqlite, but i suspect on other databases it might need an ORDER BY factoid_id clause to enforce that it picks the last entry in the database #XXX HACK WARNING: not really a hack, but something to document, the inner query here seems to work fine on sqlite, but i suspect on other databases it might need an ORDER BY factoid_id clause to enforce that it picks the last entry in the database
my $rows = $dbh->selectall_arrayref( my $rows = $dbh->selectall_arrayref(
"SELECT * FROM (SELECT factoid_id,subject,predicate FROM factoid WHERE metaphone = ? GROUP BY original_subject) as subquery WHERE NOT (predicate = ' ') LIMIT 10", "SELECT f.factoid_id, f.subject, f.predicate, f.metaphone, spellfix1_editdist(f.metaphone, ?) AS score FROM (SELECT max(factoid_id) AS factoid_id FROM factoid GROUP BY subject) as subquery JOIN factoid AS f USING (factoid_id) WHERE NOT (f.predicate = ' ') AND f.predicate IS NOT NULL AND length(f.metaphone) > 1 ORDER BY score ASC LIMIT 10;",
undef, undef,
$metaphone $metaphone
); );
return [ map $_->[1], grep $_->[2] =~ /\S/, @$rows ]; use Text::Levenshtein qw/distance/; # only import it in this scope
return [ map {$_->[0]} sort {$a->[1] <=> $b->[1]} map {[$_->[1], distance($subject, $_->[1])]} grep {$_->[2] =~ /\S/} @$rows ];
} }
no warnings 'void'; no warnings 'void';