mirror of
https://github.com/DBD-SQLite/DBD-SQLite
synced 2025-06-07 06:08:38 -04:00
The test suite now passes without segfaults or anything
This commit is contained in:
parent
1ad93cacc0
commit
e521f92bf6
5 changed files with 226 additions and 43 deletions
25
SQLite.xs
25
SQLite.xs
|
@ -314,6 +314,31 @@ register_fts3_perl_tokenizer(dbh)
|
|||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
static int
|
||||
register_fts5_perl_tokenizer(dbh)
|
||||
SV *dbh
|
||||
ALIAS:
|
||||
DBD::SQLite::db::sqlite_register_fts5_perl_tokenizer = 1
|
||||
CODE:
|
||||
RETVAL = sqlite_db_register_fts5_perl_tokenizer(aTHX_ dbh);
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
static int
|
||||
fts5_xToken(pCtx,tflags,svToken,iStart,iEnd)
|
||||
SV *pCtx
|
||||
int tflags
|
||||
SV *svToken
|
||||
STRLEN iStart
|
||||
STRLEN iEnd
|
||||
ALIAS:
|
||||
DBD::SQLite::db::fts5_xToken = 1
|
||||
CODE:
|
||||
dTHX;
|
||||
RETVAL = perl_fts5_xToken(aTHX_ pCtx,tflags,svToken,iStart,iEnd);
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
HV*
|
||||
db_status(dbh, reset = 0)
|
||||
SV* dbh
|
||||
|
|
2
dbdimp.h
2
dbdimp.h
|
@ -182,6 +182,8 @@ HV* sqlite_db_table_column_metadata(pTHX_ SV *dbh, SV *dbname, SV *tablename, SV
|
|||
HV* _sqlite_db_status(pTHX_ SV *dbh, int reset);
|
||||
SV* sqlite_db_filename(pTHX_ SV *dbh);
|
||||
int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh);
|
||||
int sqlite_db_register_fts5_perl_tokenizer(pTHX_ SV *dbh);
|
||||
int perl_fts5_xToken(pTHX_ SV* pCtx, int tflags, SV* svToken, int iStart, int iEnd );
|
||||
HV* _sqlite_status(int reset);
|
||||
HV* _sqlite_st_status(pTHX_ SV *sth, int reset);
|
||||
int sqlite_db_create_module(pTHX_ SV *dbh, const char *name, const char *perl_class);
|
||||
|
|
|
@ -17,7 +17,7 @@ typedef struct perl_fts3_tokenizer_cursor {
|
|||
} perl_fts3_tokenizer_cursor;
|
||||
|
||||
typedef struct perl_Fts5Tokenizer {
|
||||
Fts5Tokenizer base;
|
||||
/* Fts5Tokenizer base; /* this is an empty struct, so we omit it entirely */
|
||||
SV *coderef; /* the perl tokenizer is a coderef that takes
|
||||
** a string and and some parameters and
|
||||
** in turn calls the xToken() function
|
||||
|
@ -25,6 +25,23 @@ typedef struct perl_Fts5Tokenizer {
|
|||
*/
|
||||
} perl_Fts5Tokenizer;
|
||||
|
||||
/* This is the structure where we store the information between calls
|
||||
* from Perl and callbacks to SQLite. We could instead pass these values
|
||||
* as opaque arguments to Perl and back, but this reduces the number of
|
||||
* opaque values handled by Perl to a single such value.
|
||||
*/
|
||||
typedef struct perl_cb_ctx {
|
||||
void * Ctx;
|
||||
int (*xToken)(
|
||||
void *pCtx, /* Copy of 2nd argument to xTokenize() */
|
||||
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
||||
const char *pToken, /* Pointer to buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Byte offset of token within input text */
|
||||
int iEnd /* Byte offset of end of token within input text */
|
||||
);
|
||||
} perl_cb_ctx;
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
** Will be called whenever a FTS3 table is created with
|
||||
|
@ -338,7 +355,6 @@ static int perl_fts5_tokenizer_Create(
|
|||
int n_retval;
|
||||
SV *retval;
|
||||
perl_Fts5Tokenizer *t;
|
||||
|
||||
if (!nArg) {
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
@ -346,7 +362,6 @@ static int perl_fts5_tokenizer_Create(
|
|||
t = (perl_Fts5Tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
|
||||
|
@ -358,11 +373,12 @@ static int perl_fts5_tokenizer_Create(
|
|||
|
||||
/* store a copy of the returned coderef into the tokenizer structure */
|
||||
if (n_retval != 1) {
|
||||
warn("tokenizer_Create returned %d arguments", n_retval);
|
||||
warn("tokenizer_Create returned %d arguments, expected a single coderef", n_retval);
|
||||
}
|
||||
retval = POPs;
|
||||
t->coderef = newSVsv(retval);
|
||||
*ppOut = &t->base;
|
||||
/* *ppOut = &t->base; */ /* Fts5Tokenizer is empty and gcc complains about that */
|
||||
*ppOut = (Fts5Tokenizer *) t;
|
||||
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
|
@ -405,18 +421,38 @@ static int perl_fts5_tokenizer_Tokenize(
|
|||
int n_retval;
|
||||
char *token;
|
||||
char *byteOffset;
|
||||
STRLEN n_a; /* this is required for older perls < 5.8.8 */
|
||||
I32 hop;
|
||||
|
||||
dTHX;
|
||||
dSP;
|
||||
|
||||
/* The implicit assumption here is that our callback will only be
|
||||
* invoked from a stack frame below this frame!
|
||||
*/
|
||||
perl_cb_ctx ctx;
|
||||
SV* ctxP;
|
||||
SV* text;
|
||||
|
||||
STRLEN n_a; /* this is required for older perls < 5.8.8 */
|
||||
I32 hop;
|
||||
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
|
||||
/* call the Perl tokenizer, and pass it our token callback */
|
||||
PUSHMARK(SP);
|
||||
PUTBACK;
|
||||
|
||||
ctx.Ctx = pCtx;
|
||||
ctx.xToken = xToken;
|
||||
ctxP = newSVpvn((const char *const)&ctx, sizeof(ctx));
|
||||
|
||||
text = newSVpvn(pText, nText);
|
||||
|
||||
// We pass four arguments
|
||||
//EXTEND(SP, 2);
|
||||
XPUSHs(sv_2mortal(ctxP));
|
||||
XPUSHs(sv_2mortal(text));
|
||||
XPUSHs(sv_2mortal(newSViv(flags)));
|
||||
// We need to properly wrap this so it is callable from Perl...
|
||||
// ... without needing actual local storage or a global variable...
|
||||
|
||||
// XXX Wrap the "found token" callback, and pass it to the user
|
||||
// Then, restructure the data if it is UTF-8
|
||||
|
@ -471,11 +507,10 @@ static int perl_fts5_tokenizer_Tokenize(
|
|||
//
|
||||
// result = SQLITE_OK;
|
||||
//
|
||||
|
||||
PUTBACK;
|
||||
n_retval = call_sv(c->coderef, G_ARRAY);
|
||||
SPAGAIN;
|
||||
|
||||
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
|
@ -483,6 +518,20 @@ static int perl_fts5_tokenizer_Tokenize(
|
|||
return result;
|
||||
}
|
||||
|
||||
int perl_fts5_xToken(pTHX_
|
||||
SV* pCtx,
|
||||
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
||||
SV* svToken, /* Pointer to buffer containing token */
|
||||
int iStart, /* Byte offset of token within input text */
|
||||
int iEnd /* Byte offset of end of token within input text */
|
||||
) {
|
||||
const char* chrToken = SvPV_nolen(svToken);
|
||||
STRLEN nToken = strlen(chrToken);
|
||||
perl_cb_ctx * p = (perl_cb_ctx *)SvPV_nolen( pCtx );
|
||||
return p->xToken(p->Ctx,tflags,chrToken,nToken,iStart,iEnd);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** The set of routines that implement the perl FTS5 tokenizer
|
||||
*/
|
||||
|
@ -533,8 +582,7 @@ int sqlite_db_register_fts5_perl_tokenizer(pTHX_ SV *dbh)
|
|||
fts5_api *pFts5Api = sqlite_fetch_fts5_api(aTHX_ dbh);
|
||||
fts5_tokenizer *p = &perl_fts5_tokenizer_Module;
|
||||
|
||||
// pFts5Api->xCreateTokenizer(pFts5Api,...);
|
||||
rc = pFts5Api->xCreateTokenizer(pFts5Api, "perl", 0, p, 0);
|
||||
|
||||
|
||||
return 0;
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -143,10 +143,12 @@ sub connect {
|
|||
$dbh->sqlite_collation_needed( \&install_collation );
|
||||
$dbh->sqlite_create_function( "REGEXP", 2, \®exp );
|
||||
$dbh->sqlite_register_fts3_perl_tokenizer();
|
||||
$dbh->sqlite_register_fts5_perl_tokenizer();
|
||||
} else {
|
||||
$dbh->func( \&install_collation, "collation_needed" );
|
||||
$dbh->func( "REGEXP", 2, \®exp, "create_function" );
|
||||
$dbh->func( "register_fts3_perl_tokenizer" );
|
||||
$dbh->func( "register_fts5_perl_tokenizer" );
|
||||
}
|
||||
|
||||
# HACK: Since PrintWarn = 0 doesn't seem to actually prevent warnings
|
||||
|
@ -1223,7 +1225,7 @@ store natively as a BLOB use the following code:
|
|||
|
||||
use DBI qw(:sql_types);
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbfile","","");
|
||||
|
||||
|
||||
my $blob = `cat foo.jpg`;
|
||||
my $sth = $dbh->prepare("INSERT INTO mytable VALUES (1, ?)");
|
||||
$sth->bind_param(1, $blob, SQL_BLOB);
|
||||
|
@ -1235,7 +1237,7 @@ And then retrieval just works:
|
|||
$sth->execute();
|
||||
my $row = $sth->fetch;
|
||||
my $blobo = $row->[1];
|
||||
|
||||
|
||||
# now $blobo == $blob
|
||||
|
||||
=head2 Functions And Bind Parameters
|
||||
|
@ -1264,7 +1266,7 @@ As shown above in the C<BLOB> section, you can always use
|
|||
C<bind_param()> to tell the type of a bind value.
|
||||
|
||||
use DBI qw(:sql_types); # Don't forget this
|
||||
|
||||
|
||||
my $sth = $dbh->prepare(q{
|
||||
SELECT bar FROM foo GROUP BY bar HAVING count(*) > ?;
|
||||
});
|
||||
|
@ -1454,13 +1456,13 @@ statement. To end it, call C<commit/rollback> methods, or issue
|
|||
the corresponding statements.
|
||||
|
||||
$dbh->{AutoCommit} = 1;
|
||||
|
||||
|
||||
$dbh->begin_work; # or $dbh->do('BEGIN TRANSACTION');
|
||||
|
||||
|
||||
# $dbh->{AutoCommit} is turned off temporarily during a transaction;
|
||||
|
||||
|
||||
$dbh->commit; # or $dbh->do('COMMIT');
|
||||
|
||||
|
||||
# $dbh->{AutoCommit} is turned on again;
|
||||
|
||||
=item When the AutoCommit flag is off
|
||||
|
@ -1474,15 +1476,15 @@ You can commit or roll it back freely. Another transaction will
|
|||
automatically begin if you execute another statement.
|
||||
|
||||
$dbh->{AutoCommit} = 0;
|
||||
|
||||
|
||||
# $dbh->do('BEGIN TRANSACTION') is not necessary, but possible
|
||||
|
||||
|
||||
...
|
||||
|
||||
|
||||
$dbh->commit; # or $dbh->do('COMMIT');
|
||||
|
||||
|
||||
# $dbh->{AutoCommit} stays intact;
|
||||
|
||||
|
||||
$dbh->{AutoCommit} = 1; # ends the transactional mode
|
||||
|
||||
=back
|
||||
|
@ -2090,38 +2092,38 @@ Here is a simple aggregate function which returns the variance
|
|||
(example adapted from pysqlite):
|
||||
|
||||
package variance;
|
||||
|
||||
|
||||
sub new { bless [], shift; }
|
||||
|
||||
|
||||
sub step {
|
||||
my ( $self, $value ) = @_;
|
||||
|
||||
|
||||
push @$self, $value;
|
||||
}
|
||||
|
||||
|
||||
sub finalize {
|
||||
my $self = $_[0];
|
||||
|
||||
|
||||
my $n = @$self;
|
||||
|
||||
|
||||
# Variance is NULL unless there is more than one row
|
||||
return undef unless $n || $n == 1;
|
||||
|
||||
|
||||
my $mu = 0;
|
||||
foreach my $v ( @$self ) {
|
||||
$mu += $v;
|
||||
}
|
||||
$mu /= $n;
|
||||
|
||||
|
||||
my $sigma = 0;
|
||||
foreach my $v ( @$self ) {
|
||||
$sigma += ($v - $mu)**2;
|
||||
}
|
||||
$sigma = $sigma / ($n - 1);
|
||||
|
||||
|
||||
return $sigma;
|
||||
}
|
||||
|
||||
|
||||
$dbh->sqlite_create_aggregate( "variance", 1, 'variance' );
|
||||
|
||||
The aggregate function can then be used as:
|
||||
|
@ -2390,13 +2392,13 @@ You may also pass 0 as an argument to reset the status.
|
|||
You can change how the connected database should behave like this:
|
||||
|
||||
use DBD::SQLite::Constants qw/:database_connection_configuration_options/;
|
||||
|
||||
|
||||
my $dbh = DBI->connect('dbi:SQLite::memory:');
|
||||
|
||||
# This disables language features that allow ordinary SQL
|
||||
# to deliberately corrupt the database file
|
||||
$dbh->sqlite_db_config( SQLITE_DBCONFIG_DEFENSIVE, 1 );
|
||||
|
||||
|
||||
# This disables two-arg version of fts3_tokenizer.
|
||||
$dbh->sqlite_db_config( SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 0 );
|
||||
|
||||
|
@ -2693,16 +2695,16 @@ then query which buildings overlap or are contained within a specified region:
|
|||
SELECT id FROM city_buildings
|
||||
WHERE minLong >= ? AND maxLong <= ?
|
||||
AND minLat >= ? AND maxLat <= ?
|
||||
|
||||
|
||||
# ... and those that overlap query coordinates
|
||||
my $overlap_sql = <<"";
|
||||
SELECT id FROM city_buildings
|
||||
WHERE maxLong >= ? AND minLong <= ?
|
||||
AND maxLat >= ? AND minLat <= ?
|
||||
|
||||
|
||||
my $contained = $dbh->selectcol_arrayref($contained_sql,undef,
|
||||
$minLong, $maxLong, $minLat, $maxLat);
|
||||
|
||||
|
||||
my $overlapping = $dbh->selectcol_arrayref($overlap_sql,undef,
|
||||
$minLong, $maxLong, $minLat, $maxLat);
|
||||
|
||||
|
@ -2750,10 +2752,10 @@ header like this:
|
|||
|
||||
use File::ShareDir 'dist_dir';
|
||||
use File::Spec::Functions 'catfile';
|
||||
|
||||
|
||||
# the whole sqlite3.h header
|
||||
my $sqlite3_h = catfile(dist_dir('DBD-SQLite'), 'sqlite3.h');
|
||||
|
||||
|
||||
# or only a particular header, amalgamated in sqlite3.c
|
||||
my $what_i_want = 'parse.h';
|
||||
my $sqlite3_c = catfile(dist_dir('DBD-SQLite'), 'sqlite3.c');
|
||||
|
|
106
t/67_fts5.t
Normal file
106
t/67_fts5.t
Normal file
|
@ -0,0 +1,106 @@
|
|||
use strict;
|
||||
use warnings;
|
||||
no if $] >= 5.022, "warnings", "locale";
|
||||
use lib "t/lib";
|
||||
use SQLiteTest;
|
||||
use Test::More;
|
||||
#use if -d ".git", "Test::FailWarnings";
|
||||
use DBD::SQLite;
|
||||
|
||||
my @texts = ("il était une bergère",
|
||||
"qui gardait ses moutons",
|
||||
"elle fit un fromage",
|
||||
"du lait de ses moutons");
|
||||
|
||||
my @tests = (
|
||||
# query => expected results
|
||||
["bergère" => 0 ],
|
||||
["berg*" => 0 ],
|
||||
["foobar" ],
|
||||
["moutons" => 1, 3 ],
|
||||
['"qui gardait"' => 1 ],
|
||||
["moutons NOT lait" => 1 ],
|
||||
["il était" => 0 ],
|
||||
["(il OR elle) AND un*" => 0, 2 ],
|
||||
);
|
||||
|
||||
BEGIN {
|
||||
requires_unicode_support();
|
||||
|
||||
if (!has_fts()) {
|
||||
plan skip_all => 'FTS is disabled for this DBD::SQLite';
|
||||
}
|
||||
if ($DBD::SQLite::sqlite_version_number >= 3011000 and $DBD::SQLite::sqlite_version_number < 3012000 and !has_compile_option('ENABLE_FTS5_TOKENIZER')) {
|
||||
plan skip_all => 'FTS5 tokenizer is disabled for this DBD::SQLite';
|
||||
}
|
||||
}
|
||||
|
||||
# Perl may spit a warning on locale
|
||||
# use Test::NoWarnings;
|
||||
|
||||
BEGIN {
|
||||
# Sadly perl for windows (and probably sqlite, too) may hang
|
||||
# if the system locale doesn't support european languages.
|
||||
# en-us should be a safe default. if it doesn't work, use 'C'.
|
||||
if ( $^O eq 'MSWin32') {
|
||||
use POSIX 'locale_h';
|
||||
setlocale(LC_COLLATE, 'en-us');
|
||||
}
|
||||
}
|
||||
|
||||
use locale;
|
||||
|
||||
sub locale_tokenizer { # see also: Search::Tokenizer
|
||||
return sub {
|
||||
my( $ctx, $string, $tokenizer_context_flags ) = @_;
|
||||
my $regex = qr/\w+/;
|
||||
#my $term_index = 0;
|
||||
#
|
||||
while( $string =~ /$regex/g) {
|
||||
my ($start, $end) = ($-[0], $+[0]);
|
||||
my $term = substr($string, $start, my $len = $end-$start);
|
||||
my $flags = 0; # SQLITE_FTS5_TOKEN;
|
||||
DBD::SQLite::db::fts5_xToken($ctx,$flags,$term,$start,$end);
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
use DBD::SQLite;
|
||||
|
||||
for my $use_unicode (0, 1) {
|
||||
|
||||
# connect
|
||||
my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode );
|
||||
|
||||
for my $fts (qw/fts5/) {
|
||||
|
||||
# create fts table
|
||||
$dbh->do(<<"") or die DBI::errstr;
|
||||
CREATE VIRTUAL TABLE try_$fts
|
||||
USING $fts(content, tokenize="perl 'main::locale_tokenizer'")
|
||||
|
||||
# populate it
|
||||
my $insert_sth = $dbh->prepare(<<"") or die DBI::errstr;
|
||||
INSERT INTO try_$fts(content) VALUES(?)
|
||||
|
||||
my @doc_ids;
|
||||
for (my $i = 0; $i < @texts; $i++) {
|
||||
$insert_sth->execute($texts[$i]);
|
||||
$doc_ids[$i] = $dbh->last_insert_id("", "", "", "");
|
||||
}
|
||||
|
||||
# queries
|
||||
SKIP: {
|
||||
my $sql = "SELECT rowid FROM try_$fts WHERE content MATCH ?";
|
||||
|
||||
for my $t (@tests) {
|
||||
my ($query, @expected) = @$t;
|
||||
@expected = map {$doc_ids[$_]} @expected;
|
||||
my $results = $dbh->selectcol_arrayref($sql, undef, $query);
|
||||
is_deeply($results, \@expected, "$query ($fts, unicode=$use_unicode)");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done_testing;
|
Loading…
Add table
Reference in a new issue