1
0
Fork 0
mirror of https://github.com/DBD-SQLite/DBD-SQLite synced 2025-06-07 14:19:10 -04:00

The test suite now passes without segfaults or anything

This commit is contained in:
Max Maischein 2020-09-04 15:52:29 +02:00
parent 1ad93cacc0
commit e521f92bf6
5 changed files with 226 additions and 43 deletions

View file

@ -314,6 +314,31 @@ register_fts3_perl_tokenizer(dbh)
OUTPUT:
RETVAL
static int
register_fts5_perl_tokenizer(dbh)
SV *dbh
ALIAS:
DBD::SQLite::db::sqlite_register_fts5_perl_tokenizer = 1
CODE:
RETVAL = sqlite_db_register_fts5_perl_tokenizer(aTHX_ dbh);
OUTPUT:
RETVAL
static int
fts5_xToken(pCtx,tflags,svToken,iStart,iEnd)
SV *pCtx
int tflags
SV *svToken
STRLEN iStart
STRLEN iEnd
ALIAS:
DBD::SQLite::db::fts5_xToken = 1
CODE:
dTHX;
RETVAL = perl_fts5_xToken(aTHX_ pCtx,tflags,svToken,iStart,iEnd);
OUTPUT:
RETVAL
HV*
db_status(dbh, reset = 0)
SV* dbh

View file

@ -182,6 +182,8 @@ HV* sqlite_db_table_column_metadata(pTHX_ SV *dbh, SV *dbname, SV *tablename, SV
HV* _sqlite_db_status(pTHX_ SV *dbh, int reset);
SV* sqlite_db_filename(pTHX_ SV *dbh);
int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh);
int sqlite_db_register_fts5_perl_tokenizer(pTHX_ SV *dbh);
int perl_fts5_xToken(pTHX_ SV* pCtx, int tflags, SV* svToken, int iStart, int iEnd );
HV* _sqlite_status(int reset);
HV* _sqlite_st_status(pTHX_ SV *sth, int reset);
int sqlite_db_create_module(pTHX_ SV *dbh, const char *name, const char *perl_class);

View file

@ -17,7 +17,7 @@ typedef struct perl_fts3_tokenizer_cursor {
} perl_fts3_tokenizer_cursor;
typedef struct perl_Fts5Tokenizer {
Fts5Tokenizer base;
/* Fts5Tokenizer base; /* this is an empty struct, so we omit it entirely */
SV *coderef; /* the perl tokenizer is a coderef that takes
** a string and and some parameters and
** in turn calls the xToken() function
@ -25,6 +25,23 @@ typedef struct perl_Fts5Tokenizer {
*/
} perl_Fts5Tokenizer;
/* This is the structure where we store the information between calls
* from Perl and callbacks to SQLite. We could instead pass these values
* as opaque arguments to Perl and back, but this reduces the number of
* opaque values handled by Perl to a single such value.
*/
typedef struct perl_cb_ctx {
void * Ctx;
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input text */
);
} perl_cb_ctx;
/*
** Create a new tokenizer instance.
** Will be called whenever a FTS3 table is created with
@ -338,7 +355,6 @@ static int perl_fts5_tokenizer_Create(
int n_retval;
SV *retval;
perl_Fts5Tokenizer *t;
if (!nArg) {
return SQLITE_ERROR;
}
@ -346,7 +362,6 @@ static int perl_fts5_tokenizer_Create(
t = (perl_Fts5Tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
ENTER;
SAVETMPS;
@ -358,11 +373,12 @@ static int perl_fts5_tokenizer_Create(
/* store a copy of the returned coderef into the tokenizer structure */
if (n_retval != 1) {
warn("tokenizer_Create returned %d arguments", n_retval);
warn("tokenizer_Create returned %d arguments, expected a single coderef", n_retval);
}
retval = POPs;
t->coderef = newSVsv(retval);
*ppOut = &t->base;
/* *ppOut = &t->base; */ /* Fts5Tokenizer is empty and gcc complains about that */
*ppOut = (Fts5Tokenizer *) t;
PUTBACK;
FREETMPS;
@ -405,18 +421,38 @@ static int perl_fts5_tokenizer_Tokenize(
int n_retval;
char *token;
char *byteOffset;
STRLEN n_a; /* this is required for older perls < 5.8.8 */
I32 hop;
dTHX;
dSP;
/* The implicit assumption here is that our callback will only be
* invoked from a stack frame below this frame!
*/
perl_cb_ctx ctx;
SV* ctxP;
SV* text;
STRLEN n_a; /* this is required for older perls < 5.8.8 */
I32 hop;
ENTER;
SAVETMPS;
/* call the Perl tokenizer, and pass it our token callback */
PUSHMARK(SP);
PUTBACK;
ctx.Ctx = pCtx;
ctx.xToken = xToken;
ctxP = newSVpvn((const char *const)&ctx, sizeof(ctx));
text = newSVpvn(pText, nText);
// We pass four arguments
//EXTEND(SP, 2);
XPUSHs(sv_2mortal(ctxP));
XPUSHs(sv_2mortal(text));
XPUSHs(sv_2mortal(newSViv(flags)));
// We need to properly wrap this so it is callable from Perl...
// ... without needing actual local storage or a global variable...
// XXX Wrap the "found token" callback, and pass it to the user
// Then, restructure the data if it is UTF-8
@ -471,11 +507,10 @@ static int perl_fts5_tokenizer_Tokenize(
//
// result = SQLITE_OK;
//
PUTBACK;
n_retval = call_sv(c->coderef, G_ARRAY);
SPAGAIN;
PUTBACK;
FREETMPS;
LEAVE;
@ -483,6 +518,20 @@ static int perl_fts5_tokenizer_Tokenize(
return result;
}
int perl_fts5_xToken(pTHX_
SV* pCtx,
int tflags, /* Mask of FTS5_TOKEN_* flags */
SV* svToken, /* Pointer to buffer containing token */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input text */
) {
const char* chrToken = SvPV_nolen(svToken);
STRLEN nToken = strlen(chrToken);
perl_cb_ctx * p = (perl_cb_ctx *)SvPV_nolen( pCtx );
return p->xToken(p->Ctx,tflags,chrToken,nToken,iStart,iEnd);
}
/*
** The set of routines that implement the perl FTS5 tokenizer
*/
@ -533,8 +582,7 @@ int sqlite_db_register_fts5_perl_tokenizer(pTHX_ SV *dbh)
fts5_api *pFts5Api = sqlite_fetch_fts5_api(aTHX_ dbh);
fts5_tokenizer *p = &perl_fts5_tokenizer_Module;
// pFts5Api->xCreateTokenizer(pFts5Api,...);
rc = pFts5Api->xCreateTokenizer(pFts5Api, "perl", 0, p, 0);
return 0;
return rc;
}

View file

@ -143,10 +143,12 @@ sub connect {
$dbh->sqlite_collation_needed( \&install_collation );
$dbh->sqlite_create_function( "REGEXP", 2, \&regexp );
$dbh->sqlite_register_fts3_perl_tokenizer();
$dbh->sqlite_register_fts5_perl_tokenizer();
} else {
$dbh->func( \&install_collation, "collation_needed" );
$dbh->func( "REGEXP", 2, \&regexp, "create_function" );
$dbh->func( "register_fts3_perl_tokenizer" );
$dbh->func( "register_fts5_perl_tokenizer" );
}
# HACK: Since PrintWarn = 0 doesn't seem to actually prevent warnings

106
t/67_fts5.t Normal file
View file

@ -0,0 +1,106 @@
use strict;
use warnings;
no if $] >= 5.022, "warnings", "locale";
use lib "t/lib";
use SQLiteTest;
use Test::More;
#use if -d ".git", "Test::FailWarnings";
use DBD::SQLite;
my @texts = ("il était une bergère",
"qui gardait ses moutons",
"elle fit un fromage",
"du lait de ses moutons");
my @tests = (
# query => expected results
["bergère" => 0 ],
["berg*" => 0 ],
["foobar" ],
["moutons" => 1, 3 ],
['"qui gardait"' => 1 ],
["moutons NOT lait" => 1 ],
["il était" => 0 ],
["(il OR elle) AND un*" => 0, 2 ],
);
BEGIN {
requires_unicode_support();
if (!has_fts()) {
plan skip_all => 'FTS is disabled for this DBD::SQLite';
}
if ($DBD::SQLite::sqlite_version_number >= 3011000 and $DBD::SQLite::sqlite_version_number < 3012000 and !has_compile_option('ENABLE_FTS5_TOKENIZER')) {
plan skip_all => 'FTS5 tokenizer is disabled for this DBD::SQLite';
}
}
# Perl may spit a warning on locale
# use Test::NoWarnings;
BEGIN {
# Sadly perl for windows (and probably sqlite, too) may hang
# if the system locale doesn't support european languages.
# en-us should be a safe default. if it doesn't work, use 'C'.
if ( $^O eq 'MSWin32') {
use POSIX 'locale_h';
setlocale(LC_COLLATE, 'en-us');
}
}
use locale;
sub locale_tokenizer { # see also: Search::Tokenizer
return sub {
my( $ctx, $string, $tokenizer_context_flags ) = @_;
my $regex = qr/\w+/;
#my $term_index = 0;
#
while( $string =~ /$regex/g) {
my ($start, $end) = ($-[0], $+[0]);
my $term = substr($string, $start, my $len = $end-$start);
my $flags = 0; # SQLITE_FTS5_TOKEN;
DBD::SQLite::db::fts5_xToken($ctx,$flags,$term,$start,$end);
};
};
}
use DBD::SQLite;
for my $use_unicode (0, 1) {
# connect
my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode );
for my $fts (qw/fts5/) {
# create fts table
$dbh->do(<<"") or die DBI::errstr;
CREATE VIRTUAL TABLE try_$fts
USING $fts(content, tokenize="perl 'main::locale_tokenizer'")
# populate it
my $insert_sth = $dbh->prepare(<<"") or die DBI::errstr;
INSERT INTO try_$fts(content) VALUES(?)
my @doc_ids;
for (my $i = 0; $i < @texts; $i++) {
$insert_sth->execute($texts[$i]);
$doc_ids[$i] = $dbh->last_insert_id("", "", "", "");
}
# queries
SKIP: {
my $sql = "SELECT rowid FROM try_$fts WHERE content MATCH ?";
for my $t (@tests) {
my ($query, @expected) = @$t;
@expected = map {$doc_ids[$_]} @expected;
my $results = $dbh->selectcol_arrayref($sql, undef, $query);
is_deeply($results, \@expected, "$query ($fts, unicode=$use_unicode)");
}
}
}
}
done_testing;