mirror of
https://github.com/perlbot/perlbuut
synced 2025-06-07 10:35:41 -04:00
Suggestions now work, search is still not-namespace aware
This commit is contained in:
parent
e798537174
commit
487f5e9f27
3 changed files with 110 additions and 61 deletions
|
@ -13,17 +13,19 @@ factoid_lookup_order (depth, namespace, server, alias_namespace, alias_server, p
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT 0, '', '', NULL, NULL, NULL, NULL, false, '', '' WHERE NOT EXISTS (table factoid_lookup_order_inner)
|
SELECT 0, '', '', NULL, NULL, NULL, NULL, false, '', '' WHERE NOT EXISTS (table factoid_lookup_order_inner)
|
||||||
),
|
),
|
||||||
get_latest_factoid (depth, factoid_id, subject, copula, predicate, author, modified_time, compose_macro, protected, original_subject, deleted, server, namespace) AS (
|
get_factoid_trigram (depth, factoid_id, subject, copula, predicate, author, modified_time, compose_macro, protected, original_subject, deleted, server, namespace, similarity) AS (
|
||||||
SELECT DISTINCT ON(lo.depth) lo.depth, factoid_id, subject, copula, predicate, author, modified_time, compose_macro, protected, original_subject, f.deleted, f.server, f.namespace
|
SELECT DISTINCT ON (lo.depth, original_subject) lo.depth, factoid_id, subject,
|
||||||
|
copula, predicate, author, modified_time, compose_macro, protected,
|
||||||
|
original_subject, f.deleted, f.server, f.namespace,
|
||||||
|
(difference(original_subject, 'hillss') ::float + similarity('hillss', original_subject)) / greatest(length('hillss'), length(original_subject))-- PLACEHOLDER TARGET
|
||||||
FROM factoid f
|
FROM factoid f
|
||||||
INNER JOIN factoid_lookup_order lo
|
INNER JOIN factoid_lookup_order lo
|
||||||
ON f.generated_server = lo.gen_server
|
ON f.generated_server = lo.gen_server
|
||||||
AND f.generated_namespace = lo.gen_namespace
|
AND f.generated_namespace = lo.gen_namespace
|
||||||
WHERE original_subject = 'hello' -- PLACEHOLDER TARGET
|
WHERE difference(original_subject, 'hillss') ::float + similarity('hillss', original_subject) > 0.01 -- PLACEHOLDER TARGET
|
||||||
ORDER BY depth ASC, factoid_id DESC
|
ORDER BY depth ASC, original_subject ASC, factoid_id DESC
|
||||||
)
|
)
|
||||||
SELECT * FROM get_latest_factoid WHERE NOT deleted ORDER BY depth ASC, factoid_id DESC LIMIT 1;
|
SELECT DISTINCT ON (similarity, original_subject) similarity, factoid_id, original_subject FROM get_factoid_trigram WHERE NOT deleted ORDER BY similarity DESC, original_subject, depth, factoid_id DESC LIMIT 10;
|
||||||
--SELECT * FROM factoid_lookup_order;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -75,8 +75,6 @@ sub dbh($self) {
|
||||||
my $dbh = $self->{dbh} =
|
my $dbh = $self->{dbh} =
|
||||||
DBI->connect("dbi:Pg:dbname=$dbname;host=192.168.32.1", $dbuser, $dbpass, { RaiseError => 1, PrintError => 0 });
|
DBI->connect("dbi:Pg:dbname=$dbname;host=192.168.32.1", $dbuser, $dbpass, { RaiseError => 1, PrintError => 0 });
|
||||||
|
|
||||||
# DBD::SQLite::BundledExtensions->load_spellfix($dbh);
|
|
||||||
|
|
||||||
return $dbh;
|
return $dbh;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,8 +89,8 @@ sub get_namespace($self, $said) {
|
||||||
sub get_alias_namespace($self, $said) {
|
sub get_alias_namespace($self, $said) {
|
||||||
my $conf = $self->get_conf_for_channel($said);
|
my $conf = $self->get_conf_for_channel($said);
|
||||||
|
|
||||||
my $server = $conf->{alias_server} // $conf->{server};
|
my $server = $conf->{alias_server};
|
||||||
my $namespace = $conf->{alias_namespace} // $conf->{namespace};
|
my $namespace = $conf->{alias_namespace};
|
||||||
|
|
||||||
return ($server, $namespace);
|
return ($server, $namespace);
|
||||||
}
|
}
|
||||||
|
@ -188,10 +186,7 @@ sub sub_command ($self, $said, $pm) {
|
||||||
|
|
||||||
my $fact_string; # used to capture return values
|
my $fact_string; # used to capture return values
|
||||||
|
|
||||||
warn "Checking: $subject\n";
|
|
||||||
|
|
||||||
if (!$call_only && $subject =~ s/^\s*($commands_re)\s+//) {
|
if (!$call_only && $subject =~ s/^\s*($commands_re)\s+//) {
|
||||||
warn "COMMAND RE $1: $subject, $said->{name}\n";
|
|
||||||
$fact_string =
|
$fact_string =
|
||||||
$commandhash{$1}->($self, $subject, $said->{name}, $said);
|
$commandhash{$1}->($self, $subject, $said->{name}, $said);
|
||||||
} elsif (($subject =~ m{\w\s*=~\s*s /.+ / .* /[gi]*\s*$}ix)
|
} elsif (($subject =~ m{\w\s*=~\s*s /.+ / .* /[gi]*\s*$}ix)
|
||||||
|
@ -210,12 +205,9 @@ sub sub_command ($self, $said, $pm) {
|
||||||
$fact_string = "@ret" if ($ret[0] =~ /^insuff/i);
|
$fact_string = "@ret" if ($ret[0] =~ /^insuff/i);
|
||||||
$fact_string = "Stored @ret";
|
$fact_string = "Stored @ret";
|
||||||
} else {
|
} else {
|
||||||
warn "INSIDE FACT HANDLE: $subject, $said->{name}, $call_only\n";
|
|
||||||
$fact_string = $self->get_fact($pm, $said, $subject, $said->{name}, $call_only);
|
$fact_string = $self->get_fact($pm, $said, $subject, $said->{name}, $call_only);
|
||||||
}
|
}
|
||||||
|
|
||||||
warn "got fact: $fact_string\n";
|
|
||||||
|
|
||||||
if (defined $fact_string) {
|
if (defined $fact_string) {
|
||||||
return ('handled', $fact_string);
|
return ('handled', $fact_string);
|
||||||
} else {
|
} else {
|
||||||
|
@ -313,7 +305,7 @@ sub store_factoid ($self, $said) {
|
||||||
if ($subject =~ s/^\s*\@?macro\b\s*//) {$compose_macro = 1;}
|
if ($subject =~ s/^\s*\@?macro\b\s*//) {$compose_macro = 1;}
|
||||||
elsif ($subject =~ s/^\s*\@?func\b\s*//) {$compose_macro = 2;}
|
elsif ($subject =~ s/^\s*\@?func\b\s*//) {$compose_macro = 2;}
|
||||||
elsif ($predicate =~ s/^\s*also\s+//) {
|
elsif ($predicate =~ s/^\s*also\s+//) {
|
||||||
my $fact = $self->_db_get_fact(_clean_subject($subject), $author, $server, $namespace);
|
my $fact = $self->_db_get_fact(_clean_subject($subject), 0, $server, $namespace);
|
||||||
|
|
||||||
$predicate = $fact->{predicate} . " | " . $predicate;
|
$predicate = $fact->{predicate} . " | " . $predicate;
|
||||||
}
|
}
|
||||||
|
@ -372,7 +364,7 @@ sub get_fact_protect ($self, $subject, $name, $said) {
|
||||||
return "Insufficient permissions for protecting factoid [$subject]"
|
return "Insufficient permissions for protecting factoid [$subject]"
|
||||||
if (!$self->_db_check_perm($subject, $said));
|
if (!$self->_db_check_perm($subject, $said));
|
||||||
|
|
||||||
my $fact = $self->_db_get_fact(_clean_subject($subject), $name, $server, $namespace);
|
my $fact = $self->_db_get_fact(_clean_subject($subject), 0, $server, $namespace);
|
||||||
|
|
||||||
if (defined($fact->{predicate})) {
|
if (defined($fact->{predicate})) {
|
||||||
$self->_insert_factoid($name, $subject, $fact->{copula}, $fact->{predicate}, $fact->{compose_macro}, 1, $aliasserver, $aliasnamespace);
|
$self->_insert_factoid($name, $subject, $fact->{copula}, $fact->{predicate}, $fact->{compose_macro}, 1, $aliasserver, $aliasnamespace);
|
||||||
|
@ -393,7 +385,7 @@ sub get_fact_unprotect ($self, $subject, $name, $said) {
|
||||||
return "Insufficient permissions for unprotecting factoid [$subject]"
|
return "Insufficient permissions for unprotecting factoid [$subject]"
|
||||||
if (!$self->_db_check_perm($subject, $said));
|
if (!$self->_db_check_perm($subject, $said));
|
||||||
|
|
||||||
my $fact = $self->_db_get_fact(_clean_subject($subject), $name, $server, $namespace);
|
my $fact = $self->_db_get_fact(_clean_subject($subject), 0, $server, $namespace);
|
||||||
|
|
||||||
if (defined($fact->{predicate})) {
|
if (defined($fact->{predicate})) {
|
||||||
$self->_insert_factoid($name, $subject, $fact->{copula}, $fact->{predicate}, $fact->{compose_macro}, 0, $aliasserver, $aliasnamespace);
|
$self->_insert_factoid($name, $subject, $fact->{copula}, $fact->{predicate}, $fact->{compose_macro}, 0, $aliasserver, $aliasnamespace);
|
||||||
|
@ -419,39 +411,80 @@ sub get_fact_forget ($self, $subject, $name, $said) {
|
||||||
return "Forgot $subject";
|
return "Forgot $subject";
|
||||||
}
|
}
|
||||||
|
|
||||||
sub _fact_literal_format($r) {
|
sub _fact_literal_format($r, $aliasserver, $aliasnamespace) {
|
||||||
|
$aliasserver ||= "*";
|
||||||
|
$aliasnamespace ||= "##NULL";
|
||||||
# TODO make this express the parent namespace if present
|
# TODO make this express the parent namespace if present
|
||||||
# <server:namespace>
|
# <server:namespace>
|
||||||
($r->{protected} ? "P:" : "") . ("", "macro ", "func ")[$r->{compose_macro}] . "$r->{subject} $r->{copula} $r->{predicate}";
|
#
|
||||||
|
|
||||||
|
(($aliasserver eq $r->{server} && $aliasnamespace eq $r->{namespace}) ? "" : sprintf("<%s:%s> ", $r->{generated_server}||"*", $r->{generated_namespace}||"##NULL"))
|
||||||
|
. ($r->{deleted} ? "[REDACTED]" :
|
||||||
|
(
|
||||||
|
($r->{protected} ? "P:" : "")
|
||||||
|
. ("", "macro ", "func ")[$r->{compose_macro}]
|
||||||
|
. "$r->{subject} $r->{copula} $r->{predicate}"
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
sub get_fact_revisions ($self, $subject, $name, $said) {
|
sub get_fact_revisions ($self, $subject, $name, $said) {
|
||||||
my $dbh = $self->dbh;
|
my $dbh = $self->dbh;
|
||||||
|
|
||||||
my ($server, $namespace) = $self->get_namespace($said);
|
my ($server, $namespace) = $self->get_namespace($said);
|
||||||
|
my ($aliasserver, $aliasnamespace) = $self->get_alias_namespace($said);
|
||||||
|
|
||||||
# TODO this query needs to be rewritten
|
# TODO this query should use the deleted flag to figure out
|
||||||
my $revisions = $dbh->selectall_arrayref(
|
# which depth lookup should be valid at any given time
|
||||||
"SELECT factoid_id, subject, copula, predicate, author, compose_macro, protected, server, namespace
|
# but that's a much more complicated query i don't want to make
|
||||||
FROM factoid
|
# maybe just do it in perl later
|
||||||
WHERE original_subject = ?
|
my $revisions = $dbh->selectall_arrayref("
|
||||||
ORDER BY modified_time DESC
|
WITH RECURSIVE factoid_lookup_order_inner (depth, namespace, server, alias_namespace, alias_server, parent_namespace, parent_server, recursive, gen_server, gen_namespace) AS (
|
||||||
|
SELECT 0 AS depth, namespace, server, alias_namespace, alias_server, parent_namespace, parent_server, recursive, generated_server, generated_namespace
|
||||||
|
FROM factoid_config
|
||||||
|
WHERE namespace = ? AND server = ?
|
||||||
|
UNION ALL
|
||||||
|
SELECT p.depth+1 AS depth, m.namespace, m.server, m.alias_namespace, m.alias_server, m.parent_namespace, m.parent_server, m.recursive, m.generated_server, m.generated_namespace
|
||||||
|
FROM factoid_config m
|
||||||
|
INNER JOIN factoid_lookup_order_inner p
|
||||||
|
ON m.namespace = p.parent_namespace AND m.server = p.parent_server AND p.recursive
|
||||||
|
),
|
||||||
|
factoid_lookup_order (depth, namespace, server, alias_namespace, alias_server, parent_namespace, parent_server, recursive, gen_server, gen_namespace) AS (
|
||||||
|
SELECT * FROM factoid_lookup_order_inner
|
||||||
|
UNION ALL
|
||||||
|
SELECT 0, '', '', NULL, NULL, NULL, NULL, false, '', '' WHERE NOT EXISTS (table factoid_lookup_order_inner)
|
||||||
|
),
|
||||||
|
get_latest_factoid (depth, factoid_id, subject, copula, predicate, author, modified_time, compose_macro, protected, original_subject, deleted, server, namespace) AS (
|
||||||
|
SELECT lo.depth, factoid_id, subject, copula, predicate, author, modified_time, compose_macro, protected, original_subject, f.deleted, f.server, f.namespace
|
||||||
|
FROM factoid f
|
||||||
|
INNER JOIN factoid_lookup_order lo
|
||||||
|
ON f.generated_server = lo.gen_server
|
||||||
|
AND f.generated_namespace = lo.gen_namespace
|
||||||
|
WHERE original_subject = ?
|
||||||
|
ORDER BY depth ASC, factoid_id DESC
|
||||||
|
)
|
||||||
|
SELECT * FROM get_latest_factoid ORDER BY factoid_id DESC;
|
||||||
", # newest revision first
|
", # newest revision first
|
||||||
{ Slice => {} },
|
{ Slice => {} },
|
||||||
|
$namespace, $server,
|
||||||
_clean_subject($subject),
|
_clean_subject($subject),
|
||||||
);
|
);
|
||||||
|
|
||||||
my $ret_string = join " ", map {"[$_->{factoid_id} by $_->{author}: " . _fact_literal_format($_) . "]";} @$revisions;
|
my $ret_string = join " \n", map {"[$_->{factoid_id} by $_->{author}: " . _fact_literal_format($_, $aliasserver, $aliasnamespace) . "]";} @$revisions;
|
||||||
|
|
||||||
return $ret_string;
|
return $ret_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub get_fact_literal ($self, $subject, $name, $said) {
|
sub get_fact_literal ($self, $subject, $name, $said) {
|
||||||
my ($server, $namespace) = $self->get_namespace($said);
|
my ($server, $namespace) = $self->get_namespace($said);
|
||||||
my $fact = $self->_db_get_fact(_clean_subject($subject), $name, $server, $namespace);
|
my ($aliasserver, $aliasnamespace) = $self->get_alias_namespace($said);
|
||||||
|
|
||||||
return _fact_literal_format($fact);
|
print STDERR "literal parse: $subject, $name, $server, $namespace\n";
|
||||||
|
my $fact = $self->_db_get_fact(_clean_subject($subject), 0, $server, $namespace);
|
||||||
|
print STDERR "literal fact: ".Dumper($fact)."\n";
|
||||||
|
|
||||||
|
my $formatted = _fact_literal_format($fact, $aliasserver, $aliasnamespace);
|
||||||
|
print STDERR "formatted: $formatted\n";
|
||||||
|
return $formatted;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub _fact_substitute ($self, $pred, $match, $subst, $flags) {
|
sub _fact_substitute ($self, $pred, $match, $subst, $flags) {
|
||||||
|
@ -503,7 +536,7 @@ sub get_fact_substitute ($self, $subject, $name, $said) {
|
||||||
my ($subject, $match, $subst, $flags) = ($1, $2, $3, $4);
|
my ($subject, $match, $subst, $flags) = ($1, $2, $3, $4);
|
||||||
|
|
||||||
# TODO does this need to be done via the ->get_fact() instead now?
|
# TODO does this need to be done via the ->get_fact() instead now?
|
||||||
my $fact = $self->_db_get_fact(_clean_subject($subject), $name, $server, $namespace);
|
my $fact = $self->_db_get_fact(_clean_subject($subject), 0, $server, $namespace);
|
||||||
|
|
||||||
if ($fact && $fact->{predicate} =~ /\S/) { #we've got a fact to operate on
|
if ($fact && $fact->{predicate} =~ /\S/) { #we've got a fact to operate on
|
||||||
if ($match !~ /(?:\(\?\??\{)/) { #ok, match has checked out to be "safe", this will likely be extended later
|
if ($match !~ /(?:\(\?\??\{)/) { #ok, match has checked out to be "safe", this will likely be extended later
|
||||||
|
@ -570,8 +603,6 @@ sub get_fact_learn ($self, $body, $name, $said, $subject=undef, $predicate=undef
|
||||||
my ($aliasserver, $aliasnamespace) = $self->get_alias_namespace($said);
|
my ($aliasserver, $aliasnamespace) = $self->get_alias_namespace($said);
|
||||||
my ($server, $namespace) = $self->get_namespace($said);
|
my ($server, $namespace) = $self->get_namespace($said);
|
||||||
|
|
||||||
print STDERR Dumper($said, $body, $name, $subject, $predicate);
|
|
||||||
|
|
||||||
return if ($said->{nolearn});
|
return if ($said->{nolearn});
|
||||||
|
|
||||||
$body =~ s/^\s*learn\s+//;
|
$body =~ s/^\s*learn\s+//;
|
||||||
|
@ -581,12 +612,10 @@ sub get_fact_learn ($self, $body, $name, $said, $subject=undef, $predicate=undef
|
||||||
($subject, $copula, $predicate) = $body =~ /^\s*(.*?)\s+(as|$COPULA_RE)\s+(.*)\s*$/ig;
|
($subject, $copula, $predicate) = $body =~ /^\s*(.*?)\s+(as|$COPULA_RE)\s+(.*)\s*$/ig;
|
||||||
}
|
}
|
||||||
|
|
||||||
print STDERR "trying to check perms\n";
|
|
||||||
#XXX check permissions here
|
#XXX check permissions here
|
||||||
return "Insufficient permissions for changing protected factoid [$subject]"
|
return "Insufficient permissions for changing protected factoid [$subject]"
|
||||||
if (!$self->_db_check_perm($subject, $said));
|
if (!$self->_db_check_perm($subject, $said));
|
||||||
|
|
||||||
print STDERR "Trying to set\n";
|
|
||||||
#my @ret = $self->store_factoid( $name, $said->{body} );
|
#my @ret = $self->store_factoid( $name, $said->{body} );
|
||||||
$self->_insert_factoid($name, $subject, $copula, $predicate, 0, $self->_db_get_protect($subject, $server, $namespace), $aliasserver, $aliasnamespace);
|
$self->_insert_factoid($name, $subject, $copula, $predicate, 0, $self->_db_get_protect($subject, $server, $namespace), $aliasserver, $aliasnamespace);
|
||||||
|
|
||||||
|
@ -753,8 +782,6 @@ SELECT * FROM get_latest_factoid WHERE NOT deleted ORDER BY depth ASC, factoid_i
|
||||||
$subj,
|
$subj,
|
||||||
);
|
);
|
||||||
|
|
||||||
warn Dumper("fact is:", $fact);
|
|
||||||
|
|
||||||
if ($func && (!$fact->{compose_macro})) {
|
if ($func && (!$fact->{compose_macro})) {
|
||||||
return undef;
|
return undef;
|
||||||
} else {
|
} else {
|
||||||
|
@ -773,8 +800,6 @@ sub basic_get_fact ($self, $pm, $said, $subject, $name, $call_only) {
|
||||||
$fact = $self->_db_get_fact($key, 0, $server, $namespace);
|
$fact = $self->_db_get_fact($key, 0, $server, $namespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
warn "fact is: $fact\n";
|
|
||||||
|
|
||||||
# Attempt to determine if our subject matches a previously defined
|
# Attempt to determine if our subject matches a previously defined
|
||||||
# 'macro' or 'func' type factoid.
|
# 'macro' or 'func' type factoid.
|
||||||
# I suspect it won't match two word function names now.
|
# I suspect it won't match two word function names now.
|
||||||
|
@ -816,11 +841,10 @@ sub basic_get_fact ($self, $pm, $said, $subject, $name, $call_only) {
|
||||||
return $self->basic_get_fact($pm, $said, $newsubject, $name, $call_only);
|
return $self->basic_get_fact($pm, $said, $newsubject, $name, $call_only);
|
||||||
}
|
}
|
||||||
|
|
||||||
my $metaphone = Metaphone(_clean_subject($subject));
|
print STDERR "Got to here\n";
|
||||||
|
my $matches = $self->get_suggestions($key, $server, $namespace);
|
||||||
|
|
||||||
my $matches = $self->_metaphone_matches($metaphone, $subject, $server, $namespace);
|
push @{ $said->{suggestion_matches} }, @$matches;
|
||||||
|
|
||||||
push @{ $said->{metaphone_matches} }, @$matches;
|
|
||||||
|
|
||||||
if (($matches and @$matches) && (!$said->{backdressed})) {
|
if (($matches and @$matches) && (!$said->{backdressed})) {
|
||||||
return "No factoid found. Did you mean one of these: " . join " ", map "[$_]", @$matches;
|
return "No factoid found. Did you mean one of these: " . join " ", map "[$_]", @$matches;
|
||||||
|
@ -830,28 +854,50 @@ sub basic_get_fact ($self, $pm, $said, $subject, $name, $call_only) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sub _metaphone_matches($self, $metaphone, $subject, $server, $namespace) {
|
sub get_suggestions($self, $subject, $server, $namespace) {
|
||||||
my $dbh = $self->dbh;
|
my $dbh = $self->dbh;
|
||||||
|
|
||||||
return [];
|
print STDERR "Running search for $subject\n";
|
||||||
|
my $threshold = 0.2;
|
||||||
|
|
||||||
# TODO this should be using the trigram stuff once it's ready
|
# TODO this should be using the trigram stuff once it's ready
|
||||||
my $rows = $dbh->selectall_arrayref(
|
my $rows = $dbh->selectall_arrayref("
|
||||||
"SELECT f.factoid_id, f.subject, f.predicate, f.metaphone, spellfix1_editdist(f.metaphone, ?) AS score FROM (SELECT max(factoid_id) AS factoid_id FROM factoid GROUP BY original_subject) as subquery JOIN factoid AS f USING (factoid_id) WHERE NOT (f.predicate = ' ' OR f.predicate = '') AND f.predicate IS NOT NULL AND length(f.metaphone) > 1 AND score < 200 ORDER BY score ASC;",
|
WITH RECURSIVE factoid_lookup_order_inner (depth, namespace, server, alias_namespace, alias_server, parent_namespace, parent_server, recursive, gen_server, gen_namespace) AS (
|
||||||
undef, $metaphone
|
SELECT 0 AS depth, namespace, server, alias_namespace, alias_server, parent_namespace, parent_server, recursive, generated_server, generated_namespace
|
||||||
|
FROM factoid_config
|
||||||
|
WHERE namespace = ? AND server = ?
|
||||||
|
UNION ALL
|
||||||
|
SELECT p.depth+1 AS depth, m.namespace, m.server, m.alias_namespace, m.alias_server, m.parent_namespace, m.parent_server, m.recursive, m.generated_server, m.generated_namespace
|
||||||
|
FROM factoid_config m
|
||||||
|
INNER JOIN factoid_lookup_order_inner p
|
||||||
|
ON m.namespace = p.parent_namespace AND m.server = p.parent_server AND p.recursive
|
||||||
|
),
|
||||||
|
factoid_lookup_order (depth, namespace, server, alias_namespace, alias_server, parent_namespace, parent_server, recursive, gen_server, gen_namespace) AS (
|
||||||
|
SELECT * FROM factoid_lookup_order_inner
|
||||||
|
UNION ALL
|
||||||
|
SELECT 0, '', '', NULL, NULL, NULL, NULL, false, '', '' WHERE NOT EXISTS (table factoid_lookup_order_inner)
|
||||||
|
),
|
||||||
|
get_factoid_trigram (depth, factoid_id, subject, copula, predicate, author, modified_time, compose_macro, protected, original_subject, deleted, server, namespace, similarity) AS (
|
||||||
|
SELECT DISTINCT ON (lo.depth, original_subject) lo.depth, factoid_id, subject,
|
||||||
|
copula, predicate, author, modified_time, compose_macro, protected,
|
||||||
|
original_subject, f.deleted, f.server, f.namespace,
|
||||||
|
(difference(original_subject, ?) ::float + similarity(?, original_subject)) / greatest(length(?), length(original_subject))
|
||||||
|
FROM factoid f
|
||||||
|
INNER JOIN factoid_lookup_order lo
|
||||||
|
ON f.generated_server = lo.gen_server
|
||||||
|
AND f.generated_namespace = lo.gen_namespace
|
||||||
|
WHERE (difference(original_subject, ?) ::float + similarity(?, original_subject)) / greatest(length(?), length(original_subject)) > ?
|
||||||
|
ORDER BY depth ASC, original_subject ASC, factoid_id DESC
|
||||||
|
)
|
||||||
|
SELECT DISTINCT ON (similarity, original_subject) similarity, factoid_id, original_subject FROM get_factoid_trigram WHERE NOT deleted ORDER BY similarity DESC, original_subject, depth, factoid_id DESC LIMIT 10
|
||||||
|
", undef,
|
||||||
|
$namespace, $server,
|
||||||
|
$subject, $subject, $subject, $subject, $subject, $subject, $threshold
|
||||||
);
|
);
|
||||||
|
|
||||||
|
print STDERR Dumper($rows);
|
||||||
|
|
||||||
use Text::Levenshtein qw/distance/; # only import it in this scope
|
return [grep {$_} map {$_->[2]} @$rows ];
|
||||||
|
|
||||||
my $threshold = int(max(4, min(10, 4 + length($subject) / 7)));
|
|
||||||
my @sorted =
|
|
||||||
map {$_->[0]}
|
|
||||||
sort {$a->[1] <=> $b->[1]}
|
|
||||||
grep {$_->[1] < $threshold}
|
|
||||||
map {[$_->[1], distance($subject, $_->[1])]}
|
|
||||||
grep {$_->[2] =~ /\S/} @$rows;
|
|
||||||
|
|
||||||
return [grep {$_} @sorted[0 .. 9]];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
no warnings 'void';
|
no warnings 'void';
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||||
|
CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
|
||||||
|
|
||||||
BEGIN;
|
BEGIN;
|
||||||
DROP TABLE IF EXISTS public.factoid;
|
DROP TABLE IF EXISTS public.factoid;
|
||||||
|
|
Loading…
Add table
Reference in a new issue