1
0
Fork 0
mirror of https://github.com/DBD-SQLite/DBD-SQLite synced 2025-06-07 14:19:10 -04:00
This commit is contained in:
Max Maischein 2025-02-21 20:10:55 +00:00 committed by GitHub
commit 66614ed0fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1160 additions and 76 deletions

View file

@ -3,9 +3,11 @@ Changes
constants.inc constants.inc
dbdimp.c dbdimp.c
dbdimp.h dbdimp.h
dbdimp_tokenizer.inc dbdimp_fts3_tokenizer.inc
dbdimp_fts5_tokenizer.inc
dbdimp_virtual_table.inc dbdimp_virtual_table.inc
fts3_tokenizer.h fts3_tokenizer.h
fts5.h
inc/Test/FailWarnings.pm inc/Test/FailWarnings.pm
lib/DBD/SQLite.pm lib/DBD/SQLite.pm
lib/DBD/SQLite/Constants.pm lib/DBD/SQLite/Constants.pm

View file

@ -402,10 +402,10 @@ WriteMakefile(
), ),
OBJECT => ( $sqlite_local OBJECT => ( $sqlite_local
? '$(O_FILES)' ? '$(O_FILES)'
: 'SQLite.o dbdimp.o' : 'SQLite.o dbdimp_fts3.o dbdimp_fts5.o'
), ),
depend => { depend => {
'dbdimp.o' => 'dbdimp_tokenizer.inc dbdimp_virtual_table.inc', 'dbdimp.o' => 'dbdimp_fts3_tokenizer.inc dbdimp_fts5_tokenizer.inc dbdimp_virtual_table.inc',
}, },
clean => { clean => {
FILES => 'SQLite.xsi config.h tv.log *.old', FILES => 'SQLite.xsi config.h tv.log *.old',

View file

@ -314,6 +314,31 @@ register_fts3_perl_tokenizer(dbh)
OUTPUT: OUTPUT:
RETVAL RETVAL
static int
register_fts5_perl_tokenizer(dbh)
SV *dbh
ALIAS:
DBD::SQLite::db::sqlite_register_fts5_perl_tokenizer = 1
CODE:
RETVAL = sqlite_db_register_fts5_perl_tokenizer(aTHX_ dbh);
OUTPUT:
RETVAL
static int
fts5_xToken(pCtx,tflags,svToken,iStart,iEnd)
SV *pCtx
int tflags
SV *svToken
STRLEN iStart
STRLEN iEnd
ALIAS:
DBD::SQLite::db::fts5_xToken = 1
CODE:
dTHX;
RETVAL = perl_fts5_xToken(aTHX_ pCtx,tflags,svToken,iStart,iEnd);
OUTPUT:
RETVAL
HV* HV*
db_status(dbh, reset = 0) db_status(dbh, reset = 0)
SV* dbh SV* dbh

View file

@ -20,5 +20,6 @@
#include "sqlite3.h" #include "sqlite3.h"
#include "fts3_tokenizer.h" #include "fts3_tokenizer.h"
#include "fts5.h"
#endif #endif

View file

@ -1293,6 +1293,7 @@ _const_flags_for_file_open_operations()
SQLITE_OPEN_READONLY = SQLITE_OPEN_READONLY SQLITE_OPEN_READONLY = SQLITE_OPEN_READONLY
SQLITE_OPEN_READWRITE = SQLITE_OPEN_READWRITE SQLITE_OPEN_READWRITE = SQLITE_OPEN_READWRITE
SQLITE_OPEN_CREATE = SQLITE_OPEN_CREATE SQLITE_OPEN_CREATE = SQLITE_OPEN_CREATE
SQLITE_OPEN_SUPER_JOURNAL = SQLITE_OPEN_SUPER_JOURNAL
SQLITE_OPEN_NOMUTEX = SQLITE_OPEN_NOMUTEX SQLITE_OPEN_NOMUTEX = SQLITE_OPEN_NOMUTEX
CODE: CODE:
RETVAL = ix; RETVAL = ix;
@ -1471,6 +1472,19 @@ _const_flags_for_file_open_operations_3037000_zero()
#if SQLITE_VERSION_NUMBER >= 3008003 #if SQLITE_VERSION_NUMBER >= 3008003
IV
_const_fts5_tokenizer()
ALIAS:
FTS5_TOKENIZE_QUERY = FTS5_TOKENIZE_QUERY
FTS5_TOKENIZE_PREFIX = FTS5_TOKENIZE_PREFIX
FTS5_TOKENIZE_DOCUMENT = FTS5_TOKENIZE_DOCUMENT
FTS5_TOKENIZE_AUX = FTS5_TOKENIZE_AUX
FTS5_TOKEN_COLOCATED = FTS5_TOKEN_COLOCATED
CODE:
RETVAL = ix;
OUTPUT:
RETVAL
IV IV
_const_function_flags_3008003() _const_function_flags_3008003()
ALIAS: ALIAS:
@ -1820,6 +1834,7 @@ _const__flags_for_file_open_operations()
OPEN_READONLY = SQLITE_OPEN_READONLY OPEN_READONLY = SQLITE_OPEN_READONLY
OPEN_READWRITE = SQLITE_OPEN_READWRITE OPEN_READWRITE = SQLITE_OPEN_READWRITE
OPEN_CREATE = SQLITE_OPEN_CREATE OPEN_CREATE = SQLITE_OPEN_CREATE
OPEN_SUPER_JOURNAL = SQLITE_OPEN_SUPER_JOURNAL
OPEN_NOMUTEX = SQLITE_OPEN_NOMUTEX OPEN_NOMUTEX = SQLITE_OPEN_NOMUTEX
CODE: CODE:
RETVAL = ix; RETVAL = ix;

View file

@ -2990,7 +2990,8 @@ sqlite_db_txn_state(pTHX_ SV *dbh, SV *schema)
#endif #endif
} }
#include "dbdimp_tokenizer.inc" #include "dbdimp_fts3_tokenizer.inc"
#include "dbdimp_fts5_tokenizer.inc"
#include "dbdimp_virtual_table.inc" #include "dbdimp_virtual_table.inc"
/* end */ /* end */

View file

@ -182,6 +182,8 @@ HV* sqlite_db_table_column_metadata(pTHX_ SV *dbh, SV *dbname, SV *tablename, SV
HV* _sqlite_db_status(pTHX_ SV *dbh, int reset); HV* _sqlite_db_status(pTHX_ SV *dbh, int reset);
SV* sqlite_db_filename(pTHX_ SV *dbh); SV* sqlite_db_filename(pTHX_ SV *dbh);
int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh); int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh);
int sqlite_db_register_fts5_perl_tokenizer(pTHX_ SV *dbh);
int perl_fts5_xToken(pTHX_ SV* pCtx, int tflags, SV* svToken, int iStart, int iEnd );
HV* _sqlite_status(int reset); HV* _sqlite_status(int reset);
HV* _sqlite_st_status(pTHX_ SV *sth, int reset); HV* _sqlite_st_status(pTHX_ SV *sth, int reset);
int sqlite_db_create_module(pTHX_ SV *dbh, const char *name, const char *perl_class); int sqlite_db_create_module(pTHX_ SV *dbh, const char *name, const char *perl_class);

View file

@ -1,10 +1,10 @@
typedef struct perl_tokenizer { typedef struct perl_fts3_tokenizer {
sqlite3_tokenizer base; sqlite3_tokenizer base;
SV *coderef; /* the perl tokenizer is a coderef that takes SV *coderef; /* the perl tokenizer is a coderef that takes
a string and returns a cursor coderef */ a string and returns a cursor coderef */
} perl_tokenizer; } perl_fts3_tokenizer;
typedef struct perl_tokenizer_cursor { typedef struct perl_fts3_tokenizer_cursor {
sqlite3_tokenizer_cursor base; sqlite3_tokenizer_cursor base;
SV *coderef; /* ref to the closure that returns terms */ SV *coderef; /* ref to the closure that returns terms */
char *pToken; /* storage for a copy of the last token */ char *pToken; /* storage for a copy of the last token */
@ -14,7 +14,24 @@ typedef struct perl_tokenizer_cursor {
const char *pInput; /* input we are tokenizing */ const char *pInput; /* input we are tokenizing */
const char *currentByte; /* pointer into pInput */ const char *currentByte; /* pointer into pInput */
int currentChar; /* char position corresponding to currentByte */ int currentChar; /* char position corresponding to currentByte */
} perl_tokenizer_cursor; } perl_fts3_tokenizer_cursor;
/* This is the structure where we store the information between calls
* from Perl and callbacks to SQLite. We could instead pass these values
* as opaque arguments to Perl and back, but this reduces the number of
* opaque values handled by Perl to a single such value.
*/
typedef struct perl_cb_ctx {
void * Ctx;
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input text */
);
} perl_cb_ctx;
/* /*
** Create a new tokenizer instance. ** Create a new tokenizer instance.
@ -22,7 +39,7 @@ typedef struct perl_tokenizer_cursor {
** CREATE .. USING fts3( ... , tokenize=perl qualified::function::name) ** CREATE .. USING fts3( ... , tokenize=perl qualified::function::name)
** where qualified::function::name is a fully qualified perl function ** where qualified::function::name is a fully qualified perl function
*/ */
static int perl_tokenizer_Create( static int perl_fts3_tokenizer_Create(
int argc, const char * const *argv, int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer sqlite3_tokenizer **ppTokenizer
){ ){
@ -30,13 +47,13 @@ static int perl_tokenizer_Create(
dSP; dSP;
int n_retval; int n_retval;
SV *retval; SV *retval;
perl_tokenizer *t; perl_fts3_tokenizer *t;
if (!argc) { if (!argc) {
return SQLITE_ERROR; return SQLITE_ERROR;
} }
t = (perl_tokenizer *) sqlite3_malloc(sizeof(*t)); t = (perl_fts3_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM; if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t)); memset(t, 0, sizeof(*t));
@ -67,9 +84,9 @@ static int perl_tokenizer_Create(
/* /*
** Destroy a tokenizer ** Destroy a tokenizer
*/ */
static int perl_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){ static int perl_fts3_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){
dTHX; dTHX;
perl_tokenizer *t = (perl_tokenizer *) pTokenizer; perl_fts3_tokenizer *t = (perl_fts3_tokenizer *) pTokenizer;
sv_free(t->coderef); sv_free(t->coderef);
sqlite3_free(t); sqlite3_free(t);
return SQLITE_OK; return SQLITE_OK;
@ -82,7 +99,7 @@ static int perl_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){
** This is passed to the tokenizer instance, which then returns a ** This is passed to the tokenizer instance, which then returns a
** closure implementing the cursor (so the cursor is again a coderef). ** closure implementing the cursor (so the cursor is again a coderef).
*/ */
static int perl_tokenizer_Open( static int perl_fts3_tokenizer_Open(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */ const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
@ -118,11 +135,11 @@ static int perl_tokenizer_Open(
DBD_SQLITE_UTF8_DECODE_IF_NEEDED(perl_string, MY_CXT.last_dbh_string_mode); DBD_SQLITE_UTF8_DECODE_IF_NEEDED(perl_string, MY_CXT.last_dbh_string_mode);
perl_tokenizer *t = (perl_tokenizer *)pTokenizer; perl_fts3_tokenizer *t = (perl_fts3_tokenizer *)pTokenizer;
/* allocate and initialize the cursor struct */ /* allocate and initialize the cursor struct */
perl_tokenizer_cursor *c; perl_fts3_tokenizer_cursor *c;
c = (perl_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); c = (perl_fts3_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
memset(c, 0, sizeof(*c)); memset(c, 0, sizeof(*c));
*ppCursor = &c->base; *ppCursor = &c->base;
@ -158,10 +175,10 @@ static int perl_tokenizer_Open(
/* /*
** Close a tokenization cursor previously opened by a call to ** Close a tokenization cursor previously opened by a call to
** perl_tokenizer_Open() above. ** perl_fts3_tokenizer_Open() above.
*/ */
static int perl_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){ static int perl_fts3_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){
perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor; perl_fts3_tokenizer_cursor *c = (perl_fts3_tokenizer_cursor *) pCursor;
dTHX; dTHX;
sv_free(c->coderef); sv_free(c->coderef);
@ -172,9 +189,9 @@ static int perl_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){
/* /*
** Extract the next token from a tokenization cursor. The cursor must ** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to perl_tokenizer_Open(). ** have been opened by a prior call to perl_fts3_tokenizer_Open().
*/ */
static int perl_tokenizer_Next( static int perl_fts3_tokenizer_Next(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by perl_tokenizer_Open */ sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by perl_tokenizer_Open */
const char **ppToken, /* OUT: Normalized text for token */ const char **ppToken, /* OUT: Normalized text for token */
int *pnBytes, /* OUT: Number of bytes in normalized text */ int *pnBytes, /* OUT: Number of bytes in normalized text */
@ -182,7 +199,7 @@ static int perl_tokenizer_Next(
int *piEndOffset, /* Ending offset of token. IN : char offset; OUT : byte offset */ int *piEndOffset, /* Ending offset of token. IN : char offset; OUT : byte offset */
int *piPosition /* OUT: Number of tokens returned before this one */ int *piPosition /* OUT: Number of tokens returned before this one */
){ ){
perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor; perl_fts3_tokenizer_cursor *c = (perl_fts3_tokenizer_cursor *) pCursor;
int result; int result;
int n_retval; int n_retval;
char *token; char *token;
@ -270,13 +287,13 @@ static int perl_tokenizer_Next(
/* /*
** The set of routines that implement the perl tokenizer ** The set of routines that implement the perl tokenizer
*/ */
sqlite3_tokenizer_module perl_tokenizer_Module = { sqlite3_tokenizer_module perl_fts3_tokenizer_Module = {
0, 0,
perl_tokenizer_Create, perl_fts3_tokenizer_Create,
perl_tokenizer_Destroy, perl_fts3_tokenizer_Destroy,
perl_tokenizer_Open, perl_fts3_tokenizer_Open,
perl_tokenizer_Close, perl_fts3_tokenizer_Close,
perl_tokenizer_Next perl_fts3_tokenizer_Next
}; };
/* /*
@ -289,7 +306,7 @@ int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh)
int rc; int rc;
sqlite3_stmt *pStmt; sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
sqlite3_tokenizer_module *p = &perl_tokenizer_Module; sqlite3_tokenizer_module *p = &perl_fts3_tokenizer_Module;
if (!DBIc_ACTIVE(imp_dbh)) { if (!DBIc_ACTIVE(imp_dbh)) {
sqlite_error(dbh, -2, "attempt to register fts3 tokenizer on inactive database handle"); sqlite_error(dbh, -2, "attempt to register fts3 tokenizer on inactive database handle");

253
dbdimp_fts5_tokenizer.inc Normal file
View file

@ -0,0 +1,253 @@
typedef struct perl_Fts5Tokenizer {
/* Fts5Tokenizer base; */ /* this is an empty struct, so we omit it entirely */
SV *coderef; /* the perl tokenizer is a coderef that takes
** a string and and some parameters and
** in turn calls the xToken() function
** passed to it
*/
} perl_Fts5Tokenizer;
/*
** Create a new tokenizer instance.
** Will be called whenever a FTS5 table is created with
** CREATE .. USING fts5( ... , tokenize=perl qualified::function::name)
** where qualified::function::name is a fully qualified perl function
*/
static int perl_fts5_tokenizer_Create(
void* pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut
){
dTHX;
dSP;
int n_retval;
SV *retval;
perl_Fts5Tokenizer *t;
if (!nArg) {
return SQLITE_ERROR;
}
t = (perl_Fts5Tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
ENTER;
SAVETMPS;
/* call the qualified::function::name */
PUSHMARK(SP);
PUTBACK;
n_retval = call_pv(azArg[0], G_SCALAR);
SPAGAIN;
/* store a copy of the returned coderef into the tokenizer structure */
if (n_retval != 1) {
warn("tokenizer_Create returned %d arguments, expected a single coderef", n_retval);
}
retval = POPs;
t->coderef = newSVsv(retval);
/* *ppOut = &t->base; */ /* Fts5Tokenizer is empty and gcc complains about that */
*ppOut = (Fts5Tokenizer *) t;
PUTBACK;
FREETMPS;
LEAVE;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static void perl_fts5_tokenizer_Delete(Fts5Tokenizer *pTokenizer){
dTHX;
perl_Fts5Tokenizer *t = (perl_Fts5Tokenizer *) pTokenizer;
sv_free(t->coderef);
sqlite3_free(t);
return;
}
/*
** This does a tokenizing run over the string. Found tokens (and synonyms)
** are stored by calling xToken()
*/
static int perl_fts5_tokenizer_Tokenize(
Fts5Tokenizer* tokenizer,
void *pCtx,
int flags, /* Mask of FTS5_TOKENIZE_* flags */
const char *pText, int nText,
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input text */
)
){
perl_Fts5Tokenizer *c = (perl_Fts5Tokenizer *) tokenizer;
char *token;
char *byteOffset;
dTHX;
dSP;
/* newSVpvn() will create a copy of this buffer, but ideally we would
* directly write into the PV part of that copied buffer instead
*/
perl_cb_ctx ctx;
SV* ctxP;
SV* text;
STRLEN n_a; /* this is required for older perls < 5.8.8 */
I32 hop;
ENTER;
SAVETMPS;
/* call the Perl tokenizer, and pass it our token callback */
PUSHMARK(SP);
ctx.Ctx = pCtx;
ctx.xToken = xToken;
ctxP = newSVpvn((const char *const)&ctx, sizeof(ctx));
text = newSVpvn(pText, nText);
// We pass four arguments
//EXTEND(SP, 2);
XPUSHs(sv_2mortal(ctxP));
XPUSHs(sv_2mortal(text));
XPUSHs(sv_2mortal(newSViv(flags)));
// We need to properly wrap this so it is callable from Perl...
// ... without needing actual local storage or a global variable...
// XXX Wrap the "found token" callback, and pass it to the user
// Then, restructure the data if it is UTF-8
// First, do all of this in Perl so it is easier to debug
///* if we get back an empty list, there is no more token */
//if (n_retval == 0) {
// result = SQLITE_DONE;
//}
///* otherwise, get token details from the return list */
//else {
// if (n_retval != 5) {
// warn("tokenizer cursor returned %d arguments", n_retval);
// }
// *piPosition = POPi;
// *piEndOffset = POPi;
// *piStartOffset = POPi;
// *pnBytes = POPi;
// token = POPpx;
//
// if (c->pInput) { /* if working with utf8 data */
//
// /* recompute *pnBytes in bytes, not in chars */
// *pnBytes = strlen(token);
//
// /* recompute start/end offsets in bytes, not in chars */
// hop = *piStartOffset - c->lastCharOffset;
// byteOffset = (char*)utf8_hop((U8*)c->lastByteOffset, hop);
// hop = *piEndOffset - *piStartOffset;
// *piStartOffset = byteOffset - c->pInput;
// byteOffset = (char*)utf8_hop((U8*)byteOffset, hop);
// *piEndOffset = byteOffset - c->pInput;
//
// /* remember where we are for next round */
// c->lastCharOffset = *piEndOffset,
// c->lastByteOffset = byteOffset;
// }
//
// /* make sure we have enough storage for copying the token */
// if (*pnBytes > c->nTokenAllocated ){
// char *pNew;
// c->nTokenAllocated = *pnBytes + 20;
// pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
// if( !pNew ) return SQLITE_NOMEM;
// c->pToken = pNew;
// }
//
// /* need to copy the token into the C cursor before perl frees that
// memory */
// memcpy(c->pToken, token, *pnBytes);
// *ppToken = c->pToken;
//
// result = SQLITE_OK;
//
PUTBACK;
call_sv(c->coderef, G_VOID);
SPAGAIN;
PUTBACK;
FREETMPS;
LEAVE;
return SQLITE_OK;
}
int perl_fts5_xToken(pTHX_
SV* pCtx,
int tflags, /* Mask of FTS5_TOKEN_* flags */
SV* svToken, /* Pointer to buffer containing token */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input text */
) {
STRLEN nToken;
const char* chrToken = SvPV(svToken, nToken);
perl_cb_ctx * p = (perl_cb_ctx *)SvPV_nolen( pCtx );
return p->xToken(p->Ctx,tflags,chrToken,nToken,iStart,iEnd);
}
/*
** The set of routines that implement the perl FTS5 tokenizer
*/
fts5_tokenizer perl_fts5_tokenizer_Module = {
perl_fts5_tokenizer_Create,
perl_fts5_tokenizer_Delete,
perl_fts5_tokenizer_Tokenize
};
/*
** Fetch the FTS5 API pointers
*/
fts5_api* sqlite_fetch_fts5_api(pTHX_ SV *dbh)
{
D_imp_dbh(dbh);
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts5(?)";
fts5_api *pFts5Api = 0;
if (!DBIc_ACTIVE(imp_dbh)) {
sqlite_error(dbh, -2, "attempt to register fts5 tokenizer on inactive database handle");
return FALSE;
}
rc = sqlite3_prepare_v2(imp_dbh->db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return 0;
}
sqlite3_bind_pointer(pStmt, 1, (void*)&pFts5Api, "fts5_api_ptr", NULL);
sqlite3_step(pStmt);
sqlite3_finalize(pStmt);
return pFts5Api;
}
/*
** Register the perl tokenizer with FTS5
*/
int sqlite_db_register_fts5_perl_tokenizer(pTHX_ SV *dbh)
{
D_imp_dbh(dbh);
int rc;
fts5_api *pFts5Api = sqlite_fetch_fts5_api(aTHX_ dbh);
fts5_tokenizer *p = &perl_fts5_tokenizer_Module;
rc = pFts5Api->xCreateTokenizer(pFts5Api, "perl", 0, p, 0);
return rc;
}

580
fts5.h Normal file
View file

@ -0,0 +1,580 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Interfaces to extend FTS5. Using the interfaces defined in this file,
** FTS5 may be extended with:
**
** * custom tokenizers, and
** * custom auxiliary functions.
*/
#ifndef _FTS5_H
#define _FTS5_H
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************************************************************************
** CUSTOM AUXILIARY FUNCTIONS
**
** Virtual table implementations may overload SQL functions by implementing
** the sqlite3_module.xFindFunction() method.
*/
typedef struct Fts5ExtensionApi Fts5ExtensionApi;
typedef struct Fts5Context Fts5Context;
typedef struct Fts5PhraseIter Fts5PhraseIter;
/*
* Wrap fts5_xToken in a callback that takes an array of arrayrefs (?)
* ... instead of the user calling fts5_xToken themselves
* /
typedef void (*fts5_extension_function)(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
);
struct Fts5PhraseIter {
const unsigned char *a;
const unsigned char *b;
};
/*
** EXTENSION API FUNCTIONS
**
** xUserData(pFts):
** Return a copy of the context pointer the extension function was
** registered with.
**
** xColumnTotalSize(pFts, iCol, pnToken):
** If parameter iCol is less than zero, set output variable *pnToken
** to the total number of tokens in the FTS5 table. Or, if iCol is
** non-negative but less than the number of columns in the table, return
** the total number of tokens in column iCol, considering all rows in
** the FTS5 table.
**
** If parameter iCol is greater than or equal to the number of columns
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
** an OOM condition or IO error), an appropriate SQLite error code is
** returned.
**
** xColumnCount(pFts):
** Return the number of columns in the table.
**
** xColumnSize(pFts, iCol, pnToken):
** If parameter iCol is less than zero, set output variable *pnToken
** to the total number of tokens in the current row. Or, if iCol is
** non-negative but less than the number of columns in the table, set
** *pnToken to the number of tokens in column iCol of the current row.
**
** If parameter iCol is greater than or equal to the number of columns
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
** an OOM condition or IO error), an appropriate SQLite error code is
** returned.
**
** This function may be quite inefficient if used with an FTS5 table
** created with the "columnsize=0" option.
**
** xColumnText:
** This function attempts to retrieve the text of column iCol of the
** current document. If successful, (*pz) is set to point to a buffer
** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
** if an error occurs, an SQLite error code is returned and the final values
** of (*pz) and (*pn) are undefined.
**
** xPhraseCount:
** Returns the number of phrases in the current query expression.
**
** xPhraseSize:
** Returns the number of tokens in phrase iPhrase of the query. Phrases
** are numbered starting from zero.
**
** xInstCount:
** Set *pnInst to the total number of occurrences of all phrases within
** the query within the current row. Return SQLITE_OK if successful, or
** an error code (i.e. SQLITE_NOMEM) if an error occurs.
**
** This API can be quite slow if used with an FTS5 table created with the
** "detail=none" or "detail=column" option. If the FTS5 table is created
** with either "detail=none" or "detail=column" and "content=" option
** (i.e. if it is a contentless table), then this API always returns 0.
**
** xInst:
** Query for the details of phrase match iIdx within the current row.
** Phrase matches are numbered starting from zero, so the iIdx argument
** should be greater than or equal to zero and smaller than the value
** output by xInstCount().
**
** Usually, output parameter *piPhrase is set to the phrase number, *piCol
** to the column in which it occurs and *piOff the token offset of the
** first token of the phrase. Returns SQLITE_OK if successful, or an error
** code (i.e. SQLITE_NOMEM) if an error occurs.
**
** This API can be quite slow if used with an FTS5 table created with the
** "detail=none" or "detail=column" option.
**
** xRowid:
** Returns the rowid of the current row.
**
** xTokenize:
** Tokenize text using the tokenizer belonging to the FTS5 table.
**
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
** This API function is used to query the FTS table for phrase iPhrase
** of the current query. Specifically, a query equivalent to:
**
** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
**
** with $p set to a phrase equivalent to the phrase iPhrase of the
** current query is executed. Any column filter that applies to
** phrase iPhrase of the current query is included in $p. For each
** row visited, the callback function passed as the fourth argument
** is invoked. The context and API objects passed to the callback
** function may be used to access the properties of each matched row.
** Invoking Api.xUserData() returns a copy of the pointer passed as
** the third argument to pUserData.
**
** If the callback function returns any value other than SQLITE_OK, the
** query is abandoned and the xQueryPhrase function returns immediately.
** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
** Otherwise, the error code is propagated upwards.
**
** If the query runs to completion without incident, SQLITE_OK is returned.
** Or, if some error occurs before the query completes or is aborted by
** the callback, an SQLite error code is returned.
**
**
** xSetAuxdata(pFts5, pAux, xDelete)
**
** Save the pointer passed as the second argument as the extension function's
** "auxiliary data". The pointer may then be retrieved by the current or any
** future invocation of the same fts5 extension function made as part of
** the same MATCH query using the xGetAuxdata() API.
**
** Each extension function is allocated a single auxiliary data slot for
** each FTS query (MATCH expression). If the extension function is invoked
** more than once for a single FTS query, then all invocations share a
** single auxiliary data context.
**
** If there is already an auxiliary data pointer when this function is
** invoked, then it is replaced by the new pointer. If an xDelete callback
** was specified along with the original pointer, it is invoked at this
** point.
**
** The xDelete callback, if one is specified, is also invoked on the
** auxiliary data pointer after the FTS5 query has finished.
**
** If an error (e.g. an OOM condition) occurs within this function,
** the auxiliary data is set to NULL and an error code returned. If the
** xDelete parameter was not NULL, it is invoked on the auxiliary data
** pointer before returning.
**
**
** xGetAuxdata(pFts5, bClear)
**
** Returns the current auxiliary data pointer for the fts5 extension
** function. See the xSetAuxdata() method for details.
**
** If the bClear argument is non-zero, then the auxiliary data is cleared
** (set to NULL) before this function returns. In this case the xDelete,
** if any, is not invoked.
**
**
** xRowCount(pFts5, pnRow)
**
** This function is used to retrieve the total number of rows in the table.
** In other words, the same value that would be returned by:
**
** SELECT count(*) FROM ftstable;
**
** xPhraseFirst()
** This function is used, along with type Fts5PhraseIter and the xPhraseNext
** method, to iterate through all instances of a single query phrase within
** the current row. This is the same information as is accessible via the
** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
** to use, this API may be faster under some circumstances. To iterate
** through instances of phrase iPhrase, use the following code:
**
** Fts5PhraseIter iter;
** int iCol, iOff;
** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
** iCol>=0;
** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
** ){
** // An instance of phrase iPhrase at offset iOff of column iCol
** }
**
** The Fts5PhraseIter structure is defined above. Applications should not
** modify this structure directly - it should only be used as shown above
** with the xPhraseFirst() and xPhraseNext() API methods (and by
** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
**
** This API can be quite slow if used with an FTS5 table created with the
** "detail=none" or "detail=column" option. If the FTS5 table is created
** with either "detail=none" or "detail=column" and "content=" option
** (i.e. if it is a contentless table), then this API always iterates
** through an empty set (all calls to xPhraseFirst() set iCol to -1).
**
** xPhraseNext()
** See xPhraseFirst above.
**
** xPhraseFirstColumn()
** This function and xPhraseNextColumn() are similar to the xPhraseFirst()
** and xPhraseNext() APIs described above. The difference is that instead
** of iterating through all instances of a phrase in the current row, these
** APIs are used to iterate through the set of columns in the current row
** that contain one or more instances of a specified phrase. For example:
**
** Fts5PhraseIter iter;
** int iCol;
** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
** iCol>=0;
** pApi->xPhraseNextColumn(pFts, &iter, &iCol)
** ){
** // Column iCol contains at least one instance of phrase iPhrase
** }
**
** This API can be quite slow if used with an FTS5 table created with the
** "detail=none" option. If the FTS5 table is created with either
** "detail=none" "content=" option (i.e. if it is a contentless table),
** then this API always iterates through an empty set (all calls to
** xPhraseFirstColumn() set iCol to -1).
**
** The information accessed using this API and its companion
** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
** (or xInst/xInstCount). The chief advantage of this API is that it is
** significantly more efficient than those alternatives when used with
** "detail=column" tables.
**
** xPhraseNextColumn()
** See xPhraseFirstColumn above.
*/
struct Fts5ExtensionApi {
int iVersion; /* Currently always set to 3 */
void *(*xUserData)(Fts5Context*);
int (*xColumnCount)(Fts5Context*);
int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
int (*xTokenize)(Fts5Context*,
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
);
int (*xPhraseCount)(Fts5Context*);
int (*xPhraseSize)(Fts5Context*, int iPhrase);
int (*xInstCount)(Fts5Context*, int *pnInst);
int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
sqlite3_int64 (*xRowid)(Fts5Context*);
int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
);
int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
void *(*xGetAuxdata)(Fts5Context*, int bClear);
int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
};
/*
** CUSTOM AUXILIARY FUNCTIONS
*************************************************************************/
/*************************************************************************
** CUSTOM TOKENIZERS
**
** Applications may also register custom tokenizer types. A tokenizer
** is registered by providing fts5 with a populated instance of the
** following structure. All structure methods must be defined, setting
** any member of the fts5_tokenizer struct to NULL leads to undefined
** behaviour. The structure methods are expected to function as follows:
**
** xCreate:
** This function is used to allocate and initialize a tokenizer instance.
** A tokenizer instance is required to actually tokenize text.
**
** The first argument passed to this function is a copy of the (void*)
** pointer provided by the application when the fts5_tokenizer object
** was registered with FTS5 (the third argument to xCreateTokenizer()).
** The second and third arguments are an array of nul-terminated strings
** containing the tokenizer arguments, if any, specified following the
** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
** to create the FTS5 table.
**
** The final argument is an output variable. If successful, (*ppOut)
** should be set to point to the new tokenizer handle and SQLITE_OK
** returned. If an error occurs, some value other than SQLITE_OK should
** be returned. In this case, fts5 assumes that the final value of *ppOut
** is undefined.
**
** xDelete:
** This function is invoked to delete a tokenizer handle previously
** allocated using xCreate(). Fts5 guarantees that this function will
** be invoked exactly once for each successful call to xCreate().
**
** xTokenize:
** This function is expected to tokenize the nText byte string indicated
** by argument pText. pText may or may not be nul-terminated. The first
** argument passed to this function is a pointer to an Fts5Tokenizer object
** returned by an earlier call to xCreate().
**
** The second argument indicates the reason that FTS5 is requesting
** tokenization of the supplied text. This is always one of the following
** four values:
**
** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
** or removed from the FTS table. The tokenizer is being invoked to
** determine the set of tokens to add to (or delete from) the
** FTS index.
**
** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
** against the FTS index. The tokenizer is being called to tokenize
** a bareword or quoted string specified as part of the query.
**
** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
** followed by a "*" character, indicating that the last token
** returned by the tokenizer will be treated as a token prefix.
**
** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
** satisfy an fts5_api.xTokenize() request made by an auxiliary
** function. Or an fts5_api.xColumnSize() request made by the same
** on a columnsize=0 database.
** </ul>
**
** For each token in the input string, the supplied callback xToken() must
** be invoked. The first argument to it should be a copy of the pointer
** passed as the second argument to xTokenize(). The third and fourth
** arguments are a pointer to a buffer containing the token text, and the
** size of the token in bytes. The 4th and 5th arguments are the byte offsets
** of the first byte of and first byte immediately following the text from
** which the token is derived within the input.
**
** The second argument passed to the xToken() callback ("tflags") should
** normally be set to 0. The exception is if the tokenizer supports
** synonyms. In this case see the discussion below for details.
**
** FTS5 assumes the xToken() callback is invoked for each token in the
** order that they occur within the input text.
**
** If an xToken() callback returns any value other than SQLITE_OK, then
** the tokenization should be abandoned and the xTokenize() method should
** immediately return a copy of the xToken() return value. Or, if the
** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
** if an error occurs with the xTokenize() implementation itself, it
** may abandon the tokenization and return any error code other than
** SQLITE_OK or SQLITE_DONE.
**
** SYNONYM SUPPORT
**
** Custom tokenizers may also support synonyms. Consider a case in which a
** user wishes to query for a phrase such as "first place". Using the
** built-in tokenizers, the FTS5 query 'first + place' will match instances
** of "first place" within the document set, but not alternative forms
** such as "1st place". In some applications, it would be better to match
** all instances of "first place" or "1st place" regardless of which form
** the user specified in the MATCH query text.
**
** There are several ways to approach this in FTS5:
**
** <ol><li> By mapping all synonyms to a single token. In this case, using
** the above example, this means that the tokenizer returns the
** same token for inputs "first" and "1st". Say that token is in
** fact "first", so that when the user inserts the document "I won
** 1st place" entries are added to the index for tokens "i", "won",
** "first" and "place". If the user then queries for '1st + place',
** the tokenizer substitutes "first" for "1st" and the query works
** as expected.
**
** <li> By querying the index for all synonyms of each query term
** separately. In this case, when tokenizing query text, the
** tokenizer may provide multiple synonyms for a single term
** within the document. FTS5 then queries the index for each
** synonym individually. For example, faced with the query:
**
** <codeblock>
** ... MATCH 'first place'</codeblock>
**
** the tokenizer offers both "1st" and "first" as synonyms for the
** first token in the MATCH query and FTS5 effectively runs a query
** similar to:
**
** <codeblock>
** ... MATCH '(first OR 1st) place'</codeblock>
**
** except that, for the purposes of auxiliary functions, the query
** still appears to contain just two phrases - "(first OR 1st)"
** being treated as a single phrase.
**
** <li> By adding multiple synonyms for a single term to the FTS index.
** Using this method, when tokenizing document text, the tokenizer
** provides multiple synonyms for each token. So that when a
** document such as "I won first place" is tokenized, entries are
** added to the FTS index for "i", "won", "first", "1st" and
** "place".
**
** This way, even if the tokenizer does not provide synonyms
** when tokenizing query text (it should not - to do so would be
** inefficient), it doesn't matter if the user queries for
** 'first + place' or '1st + place', as there are entries in the
** FTS index corresponding to both forms of the first token.
** </ol>
**
** Whether it is parsing document or query text, any call to xToken that
** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
** is considered to supply a synonym for the previous token. For example,
** when parsing the document "I won first place", a tokenizer that supports
** synonyms would call xToken() 5 times, as follows:
**
** <codeblock>
** xToken(pCtx, 0, "i", 1, 0, 1);
** xToken(pCtx, 0, "won", 3, 2, 5);
** xToken(pCtx, 0, "first", 5, 6, 11);
** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
** xToken(pCtx, 0, "place", 5, 12, 17);
**</codeblock>
**
** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
** xToken() is called. Multiple synonyms may be specified for a single token
** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
** There is no limit to the number of synonyms that may be provided for a
** single token.
**
** In many cases, method (1) above is the best approach. It does not add
** extra data to the FTS index or require FTS5 to query for multiple terms,
** so it is efficient in terms of disk space and query speed. However, it
** does not support prefix queries very well. If, as suggested above, the
** token "first" is substituted for "1st" by the tokenizer, then the query:
**
** <codeblock>
** ... MATCH '1s*'</codeblock>
**
** will not match documents that contain the token "1st" (as the tokenizer
** will probably not map "1s" to any prefix of "first").
**
** For full prefix support, method (3) may be preferred. In this case,
** because the index contains entries for both "first" and "1st", prefix
** queries such as 'fi*' or '1s*' will match correctly. However, because
** extra entries are added to the FTS index, this method uses more space
** within the database.
**
** Method (2) offers a midpoint between (1) and (3). Using this method,
** a query such as '1s*' will match documents that contain the literal
** token "1st", but not "first" (assuming the tokenizer is not able to
** provide synonyms for prefixes). However, a non-prefix query like '1st'
** will match against "1st" and "first". This method does not require
** extra disk space, as no extra entries are added to the FTS index.
** On the other hand, it may require more CPU cycles to run MATCH queries,
** as separate queries of the FTS index are required for each synonym.
**
** When using methods (2) or (3), it is important that the tokenizer only
** provide synonyms when tokenizing document text (method (2)) or query
** text (method (3)), not both. Doing so will not cause any errors, but is
** inefficient.
*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
void (*xDelete)(Fts5Tokenizer*);
int (*xTokenize)(Fts5Tokenizer*,
void *pCtx,
int flags, /* Mask of FTS5_TOKENIZE_* flags */
const char *pText, int nText,
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input text */
)
);
};
/* Flags that may be passed as the third argument to xTokenize() */
#define FTS5_TOKENIZE_QUERY 0x0001
#define FTS5_TOKENIZE_PREFIX 0x0002
#define FTS5_TOKENIZE_DOCUMENT 0x0004
#define FTS5_TOKENIZE_AUX 0x0008
/* Flags that may be passed by the tokenizer implementation back to FTS5
** as the third argument to the supplied xToken callback. */
#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/
/*************************************************************************
** FTS5 EXTENSION REGISTRATION API
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
int iVersion; /* Currently always set to 2 */
/* Create a new tokenizer */
int (*xCreateTokenizer)(
fts5_api *pApi,
const char *zName,
void *pContext,
fts5_tokenizer *pTokenizer,
void (*xDestroy)(void*)
);
/* Find an existing tokenizer */
int (*xFindTokenizer)(
fts5_api *pApi,
const char *zName,
void **ppContext,
fts5_tokenizer *pTokenizer
);
/* Create a new auxiliary function */
int (*xCreateFunction)(
fts5_api *pApi,
const char *zName,
void *pContext,
fts5_extension_function xFunction,
void (*xDestroy)(void*)
);
};
/*
** END OF REGISTRATION API
*************************************************************************/
#ifdef __cplusplus
} /* end of the 'extern "C"' block */
#endif
#endif /* _FTS5_H */

View file

@ -51,6 +51,7 @@ sub driver {
DBD::SQLite::db->install_method('sqlite_enable_load_extension'); DBD::SQLite::db->install_method('sqlite_enable_load_extension');
DBD::SQLite::db->install_method('sqlite_load_extension'); DBD::SQLite::db->install_method('sqlite_load_extension');
DBD::SQLite::db->install_method('sqlite_register_fts3_perl_tokenizer'); DBD::SQLite::db->install_method('sqlite_register_fts3_perl_tokenizer');
DBD::SQLite::db->install_method('sqlite_register_fts5_perl_tokenizer');
DBD::SQLite::db->install_method('sqlite_trace', { O => 0x0004 }); DBD::SQLite::db->install_method('sqlite_trace', { O => 0x0004 });
DBD::SQLite::db->install_method('sqlite_profile', { O => 0x0004 }); DBD::SQLite::db->install_method('sqlite_profile', { O => 0x0004 });
DBD::SQLite::db->install_method('sqlite_table_column_metadata', { O => 0x0004 }); DBD::SQLite::db->install_method('sqlite_table_column_metadata', { O => 0x0004 });
@ -142,10 +143,12 @@ sub connect {
$dbh->sqlite_collation_needed( \&install_collation ); $dbh->sqlite_collation_needed( \&install_collation );
$dbh->sqlite_create_function( "REGEXP", 2, \&regexp ); $dbh->sqlite_create_function( "REGEXP", 2, \&regexp );
$dbh->sqlite_register_fts3_perl_tokenizer(); $dbh->sqlite_register_fts3_perl_tokenizer();
$dbh->sqlite_register_fts5_perl_tokenizer();
} else { } else {
$dbh->func( \&install_collation, "collation_needed" ); $dbh->func( \&install_collation, "collation_needed" );
$dbh->func( "REGEXP", 2, \&regexp, "create_function" ); $dbh->func( "REGEXP", 2, \&regexp, "create_function" );
$dbh->func( "register_fts3_perl_tokenizer" ); $dbh->func( "register_fts3_perl_tokenizer" );
$dbh->func( "register_fts5_perl_tokenizer" );
} }
# HACK: Since PrintWarn = 0 doesn't seem to actually prevent warnings # HACK: Since PrintWarn = 0 doesn't seem to actually prevent warnings
@ -551,7 +554,7 @@ my @FOREIGN_KEY_INFO_ODBC = (
# Maybe we could add an option so that the user can choose which field # Maybe we could add an option so that the user can choose which field
# names will be returned; the DBI spec is not very clear about ODBC vs. CLI. # names will be returned; the DBI spec is not very clear about ODBC vs. CLI.
my @FOREIGN_KEY_INFO_SQL_CLI = qw( my @FOREIGN_KEY_INFO_SQL_CLI = qw(
UK_TABLE_CAT UK_TABLE_CAT
UK_TABLE_SCHEM UK_TABLE_SCHEM
UK_TABLE_NAME UK_TABLE_NAME
UK_COLUMN_NAME UK_COLUMN_NAME
@ -765,7 +768,7 @@ sub statistics_info {
TABLE_CAT => undef, TABLE_CAT => undef,
TABLE_SCHEM => $db->{name}, TABLE_SCHEM => $db->{name},
TABLE_NAME => $tbname, TABLE_NAME => $tbname,
NON_UNIQUE => $row->{unique} ? 0 : 1, NON_UNIQUE => $row->{unique} ? 0 : 1,
INDEX_QUALIFIER => undef, INDEX_QUALIFIER => undef,
INDEX_NAME => $row->{name}, INDEX_NAME => $row->{name},
TYPE => 'btree', # see https://www.sqlite.org/version3.html esp. "Traditional B-trees are still used for indices" TYPE => 'btree', # see https://www.sqlite.org/version3.html esp. "Traditional B-trees are still used for indices"
@ -1222,7 +1225,7 @@ store natively as a BLOB use the following code:
use DBI qw(:sql_types); use DBI qw(:sql_types);
my $dbh = DBI->connect("dbi:SQLite:dbfile","",""); my $dbh = DBI->connect("dbi:SQLite:dbfile","","");
my $blob = `cat foo.jpg`; my $blob = `cat foo.jpg`;
my $sth = $dbh->prepare("INSERT INTO mytable VALUES (1, ?)"); my $sth = $dbh->prepare("INSERT INTO mytable VALUES (1, ?)");
$sth->bind_param(1, $blob, SQL_BLOB); $sth->bind_param(1, $blob, SQL_BLOB);
@ -1234,7 +1237,7 @@ And then retrieval just works:
$sth->execute(); $sth->execute();
my $row = $sth->fetch; my $row = $sth->fetch;
my $blobo = $row->[1]; my $blobo = $row->[1];
# now $blobo == $blob # now $blobo == $blob
=head2 Functions And Bind Parameters =head2 Functions And Bind Parameters
@ -1263,7 +1266,7 @@ As shown above in the C<BLOB> section, you can always use
C<bind_param()> to tell the type of a bind value. C<bind_param()> to tell the type of a bind value.
use DBI qw(:sql_types); # Don't forget this use DBI qw(:sql_types); # Don't forget this
my $sth = $dbh->prepare(q{ my $sth = $dbh->prepare(q{
SELECT bar FROM foo GROUP BY bar HAVING count(*) > ?; SELECT bar FROM foo GROUP BY bar HAVING count(*) > ?;
}); });
@ -1334,7 +1337,7 @@ bind values with no explicit type.
SQLite supports several placeholder expressions, including C<?> SQLite supports several placeholder expressions, including C<?>
and C<:AAAA>. Consult the L<DBI> and SQLite documentation for and C<:AAAA>. Consult the L<DBI> and SQLite documentation for
details. details.
L<https://www.sqlite.org/lang_expr.html#varparam> L<https://www.sqlite.org/lang_expr.html#varparam>
@ -1345,7 +1348,7 @@ named) placeholders to avoid confusion.
my $sth = $dbh->prepare( my $sth = $dbh->prepare(
'update TABLE set a=?1 where b=?2 and a IS NOT ?1' 'update TABLE set a=?1 where b=?2 and a IS NOT ?1'
); );
$sth->execute(1, 2); $sth->execute(1, 2);
=head2 Pragma =head2 Pragma
@ -1453,13 +1456,13 @@ statement. To end it, call C<commit/rollback> methods, or issue
the corresponding statements. the corresponding statements.
$dbh->{AutoCommit} = 1; $dbh->{AutoCommit} = 1;
$dbh->begin_work; # or $dbh->do('BEGIN TRANSACTION'); $dbh->begin_work; # or $dbh->do('BEGIN TRANSACTION');
# $dbh->{AutoCommit} is turned off temporarily during a transaction; # $dbh->{AutoCommit} is turned off temporarily during a transaction;
$dbh->commit; # or $dbh->do('COMMIT'); $dbh->commit; # or $dbh->do('COMMIT');
# $dbh->{AutoCommit} is turned on again; # $dbh->{AutoCommit} is turned on again;
=item When the AutoCommit flag is off =item When the AutoCommit flag is off
@ -1473,15 +1476,15 @@ You can commit or roll it back freely. Another transaction will
automatically begin if you execute another statement. automatically begin if you execute another statement.
$dbh->{AutoCommit} = 0; $dbh->{AutoCommit} = 0;
# $dbh->do('BEGIN TRANSACTION') is not necessary, but possible # $dbh->do('BEGIN TRANSACTION') is not necessary, but possible
... ...
$dbh->commit; # or $dbh->do('COMMIT'); $dbh->commit; # or $dbh->do('COMMIT');
# $dbh->{AutoCommit} stays intact; # $dbh->{AutoCommit} stays intact;
$dbh->{AutoCommit} = 1; # ends the transactional mode $dbh->{AutoCommit} = 1; # ends the transactional mode
=back =back
@ -1520,7 +1523,7 @@ As the L<DBI> doc says, you almost certainly do B<not> need to
call L<DBI/finish> method if you fetch all rows (probably in a loop). call L<DBI/finish> method if you fetch all rows (probably in a loop).
However, there are several exceptions to this rule, and rolling-back However, there are several exceptions to this rule, and rolling-back
of an unfinished C<SELECT> statement is one of such exceptional of an unfinished C<SELECT> statement is one of such exceptional
cases. cases.
SQLite prohibits C<ROLLBACK> of unfinished C<SELECT> statements in SQLite prohibits C<ROLLBACK> of unfinished C<SELECT> statements in
a transaction (See L<http://sqlite.org/lang_transaction.html> for a transaction (See L<http://sqlite.org/lang_transaction.html> for
@ -1550,7 +1553,7 @@ statements (a C<dump>) to a statement handle (via C<prepare> or C<do>),
L<DBD::SQLite> only processes the first statement, and discards the L<DBD::SQLite> only processes the first statement, and discards the
rest. rest.
If you need to process multiple statements at a time, set If you need to process multiple statements at a time, set
a C<sqlite_allow_multiple_statements> attribute of a database handle a C<sqlite_allow_multiple_statements> attribute of a database handle
to true when you connect to a database, and C<do> method takes care to true when you connect to a database, and C<do> method takes care
of the rest (since 1.30_01, and without creating DBI's statement of the rest (since 1.30_01, and without creating DBI's statement
@ -1784,7 +1787,7 @@ keys of temporary tables).
undef, $fk_schema, $fk_table); undef, $fk_schema, $fk_table);
Returns information about foreign key constraints, as specified in Returns information about foreign key constraints, as specified in
L<DBI/foreign_key_info>, but with some limitations : L<DBI/foreign_key_info>, but with some limitations :
=over =over
@ -1849,7 +1852,7 @@ a C<PRAGMA> command; see L</"Foreign keys"> earlier in this manual.
$unique_only, $quick); $unique_only, $quick);
Returns information about a table and it's indexes, as specified in Returns information about a table and it's indexes, as specified in
L<DBI/statistics_info>, but with some limitations : L<DBI/statistics_info>, but with some limitations :
=over =over
@ -2089,38 +2092,38 @@ Here is a simple aggregate function which returns the variance
(example adapted from pysqlite): (example adapted from pysqlite):
package variance; package variance;
sub new { bless [], shift; } sub new { bless [], shift; }
sub step { sub step {
my ( $self, $value ) = @_; my ( $self, $value ) = @_;
push @$self, $value; push @$self, $value;
} }
sub finalize { sub finalize {
my $self = $_[0]; my $self = $_[0];
my $n = @$self; my $n = @$self;
# Variance is NULL unless there is more than one row # Variance is NULL unless there is more than one row
return undef unless $n || $n == 1; return undef unless $n || $n == 1;
my $mu = 0; my $mu = 0;
foreach my $v ( @$self ) { foreach my $v ( @$self ) {
$mu += $v; $mu += $v;
} }
$mu /= $n; $mu /= $n;
my $sigma = 0; my $sigma = 0;
foreach my $v ( @$self ) { foreach my $v ( @$self ) {
$sigma += ($v - $mu)**2; $sigma += ($v - $mu)**2;
} }
$sigma = $sigma / ($n - 1); $sigma = $sigma / ($n - 1);
return $sigma; return $sigma;
} }
$dbh->sqlite_create_aggregate( "variance", 1, 'variance' ); $dbh->sqlite_create_aggregate( "variance", 1, 'variance' );
The aggregate function can then be used as: The aggregate function can then be used as:
@ -2389,13 +2392,13 @@ You may also pass 0 as an argument to reset the status.
You can change how the connected database should behave like this: You can change how the connected database should behave like this:
use DBD::SQLite::Constants qw/:database_connection_configuration_options/; use DBD::SQLite::Constants qw/:database_connection_configuration_options/;
my $dbh = DBI->connect('dbi:SQLite::memory:'); my $dbh = DBI->connect('dbi:SQLite::memory:');
# This disables language features that allow ordinary SQL # This disables language features that allow ordinary SQL
# to deliberately corrupt the database file # to deliberately corrupt the database file
$dbh->sqlite_db_config( SQLITE_DBCONFIG_DEFENSIVE, 1 ); $dbh->sqlite_db_config( SQLITE_DBCONFIG_DEFENSIVE, 1 );
# This disables two-arg version of fts3_tokenizer. # This disables two-arg version of fts3_tokenizer.
$dbh->sqlite_db_config( SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 0 ); $dbh->sqlite_db_config( SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 0 );
@ -2417,7 +2420,7 @@ Virtual tables are explained in L<DBD::SQLite::VirtualTable>.
Sets a new run-time limit for the category, and returns the current limit. Sets a new run-time limit for the category, and returns the current limit.
If the new value is a negative number (or omitted), the limit is unchanged If the new value is a negative number (or omitted), the limit is unchanged
and just returns the current limit. Category ids (SQLITE_LIMIT_LENGTH, and just returns the current limit. Category ids (SQLITE_LIMIT_LENGTH,
SQLITE_LIMIT_VARIABLE_NUMBER, etc) can be imported from DBD::SQLite::Constants. SQLITE_LIMIT_VARIABLE_NUMBER, etc) can be imported from DBD::SQLite::Constants.
=head2 $dbh->sqlite_get_autocommit() =head2 $dbh->sqlite_get_autocommit()
@ -2692,18 +2695,18 @@ then query which buildings overlap or are contained within a specified region:
SELECT id FROM city_buildings SELECT id FROM city_buildings
WHERE minLong >= ? AND maxLong <= ? WHERE minLong >= ? AND maxLong <= ?
AND minLat >= ? AND maxLat <= ? AND minLat >= ? AND maxLat <= ?
# ... and those that overlap query coordinates # ... and those that overlap query coordinates
my $overlap_sql = <<""; my $overlap_sql = <<"";
SELECT id FROM city_buildings SELECT id FROM city_buildings
WHERE maxLong >= ? AND minLong <= ? WHERE maxLong >= ? AND minLong <= ?
AND maxLat >= ? AND minLat <= ? AND maxLat >= ? AND minLat <= ?
my $contained = $dbh->selectcol_arrayref($contained_sql,undef, my $contained = $dbh->selectcol_arrayref($contained_sql,undef,
$minLong, $maxLong, $minLat, $maxLat); $minLong, $maxLong, $minLat, $maxLat);
my $overlapping = $dbh->selectcol_arrayref($overlap_sql,undef, my $overlapping = $dbh->selectcol_arrayref($overlap_sql,undef,
$minLong, $maxLong, $minLat, $maxLat); $minLong, $maxLong, $minLat, $maxLat);
For more detail, please see the SQLite R-Tree page For more detail, please see the SQLite R-Tree page
(L<https://www.sqlite.org/rtree.html>). Note that custom R-Tree (L<https://www.sqlite.org/rtree.html>). Note that custom R-Tree
@ -2723,7 +2726,7 @@ virtual tables. These can have many interesting uses
for joining regular DBMS data with some other kind of data within your for joining regular DBMS data with some other kind of data within your
Perl programs. Bundled with the present distribution are : Perl programs. Bundled with the present distribution are :
=over =over
=item * =item *
@ -2749,10 +2752,10 @@ header like this:
use File::ShareDir 'dist_dir'; use File::ShareDir 'dist_dir';
use File::Spec::Functions 'catfile'; use File::Spec::Functions 'catfile';
# the whole sqlite3.h header # the whole sqlite3.h header
my $sqlite3_h = catfile(dist_dir('DBD-SQLite'), 'sqlite3.h'); my $sqlite3_h = catfile(dist_dir('DBD-SQLite'), 'sqlite3.h');
# or only a particular header, amalgamated in sqlite3.c # or only a particular header, amalgamated in sqlite3.c
my $what_i_want = 'parse.h'; my $what_i_want = 'parse.h';
my $sqlite3_c = catfile(dist_dir('DBD-SQLite'), 'sqlite3.c'); my $sqlite3_c = catfile(dist_dir('DBD-SQLite'), 'sqlite3.c');

View file

@ -190,6 +190,15 @@ our @EXPORT_OK = (
SQLITE_OPEN_URI SQLITE_OPEN_URI
/, /,
# fts5_tokenizer
qw/
FTS5_TOKENIZE_AUX
FTS5_TOKENIZE_DOCUMENT
FTS5_TOKENIZE_PREFIX
FTS5_TOKENIZE_QUERY
FTS5_TOKEN_COLOCATED
/,
# function_flags # function_flags
qw/ qw/
SQLITE_DETERMINISTIC SQLITE_DETERMINISTIC
@ -357,6 +366,11 @@ our %EXPORT_TAGS = (
SQLITE_ERROR_SNAPSHOT SQLITE_ERROR_SNAPSHOT
SQLITE_FLOAT SQLITE_FLOAT
SQLITE_FORMAT SQLITE_FORMAT
FTS5_TOKENIZE_AUX
FTS5_TOKENIZE_DOCUMENT
FTS5_TOKENIZE_PREFIX
FTS5_TOKENIZE_QUERY
FTS5_TOKEN_COLOCATED
SQLITE_FULL SQLITE_FULL
SQLITE_FUNCTION SQLITE_FUNCTION
SQLITE_IGNORE SQLITE_IGNORE
@ -650,6 +664,14 @@ our %EXPORT_TAGS = (
SQLITE_OPEN_URI SQLITE_OPEN_URI
/], /],
fts5_tokenizer => [qw/
FTS5_TOKENIZE_AUX
FTS5_TOKENIZE_DOCUMENT
FTS5_TOKENIZE_PREFIX
FTS5_TOKENIZE_QUERY
FTS5_TOKEN_COLOCATED
/],
function_flags => [qw/ function_flags => [qw/
SQLITE_DETERMINISTIC SQLITE_DETERMINISTIC
SQLITE_DIRECTONLY SQLITE_DIRECTONLY
@ -736,7 +758,7 @@ DBD::SQLite::Constants - common SQLite constants
=head1 DESCRIPTION =head1 DESCRIPTION
You can import necessary SQLite constants from this module. Available tags are C<all>, C<allowed_return_values_from_sqlite3_txn_state>, C<authorizer_action_codes>, C<authorizer_return_codes>, C<version> (C<compile_time_library_version_numbers>), C<database_connection_configuration_options>, C<dbd_sqlite_string_mode>, C<extended_result_codes>, C<file_open> (C<flags_for_file_open_operations>), C<function_flags>, C<datatypes> (C<fundamental_datatypes>), C<result_codes>, C<run_time_limit_categories>. See L<http://sqlite.org/c3ref/constlist.html> for the complete list of constants. You can import necessary SQLite constants from this module. Available tags are C<all>, C<allowed_return_values_from_sqlite3_txn_state>, C<authorizer_action_codes>, C<authorizer_return_codes>, C<version> (C<compile_time_library_version_numbers>), C<database_connection_configuration_options>, C<dbd_sqlite_string_mode>, C<extended_result_codes>, C<file_open> (C<flags_for_file_open_operations>), C<fts5_tokenizer>, C<function_flags>, C<datatypes> (C<fundamental_datatypes>), C<result_codes>, C<run_time_limit_categories>. See L<http://sqlite.org/c3ref/constlist.html> for the complete list of constants.
This module does not export anything by default. This module does not export anything by default.
@ -1078,6 +1100,8 @@ This module does not export anything by default.
=item SQLITE_OPEN_CREATE =item SQLITE_OPEN_CREATE
=item SQLITE_OPEN_SUPER_JOURNAL
=item SQLITE_OPEN_NOMUTEX =item SQLITE_OPEN_NOMUTEX
=item SQLITE_OPEN_FULLMUTEX =item SQLITE_OPEN_FULLMUTEX
@ -1098,6 +1122,22 @@ This module does not export anything by default.
=back =back
=head2 fts5_tokenizer
=over 4
=item SQLITE_FTS5_TOKENIZE_QUERY
=item SQLITE_FTS5_TOKENIZE_PREFIX
=item SQLITE_FTS5_TOKENIZE_DOCUMENT
=item SQLITE_FTS5_TOKENIZE_AUX
=item SQLITE_FTS5_TOKEN_COLOCATED
=back
=head2 function_flags =head2 function_flags
=over 4 =over 4

110
t/67_fts5.t Normal file
View file

@ -0,0 +1,110 @@
use strict;
use warnings;
no if $] >= 5.022, "warnings", "locale";
use lib "t/lib";
use SQLiteTest;
use Test::More;
#use if -d ".git", "Test::FailWarnings";
use DBD::SQLite;
use utf8; # our source code is UTF-8 encoded
my @texts = ("il était une bergère",
"qui gardait ses moutons",
"elle fit un fromage",
"du lait de ses moutons",
"anrechenbare quellensteuer hier");
my @tests = (
# query => expected results
["bergère" => 0 ],
["berg*" => 0 ],
["foobar" ],
["moutons" => 1, 3 ],
['"qui gardait"' => 1 ],
["moutons NOT lait" => 1 ],
["il était" => 0 ],
["(il OR elle) AND un*" => 0, 2 ],
["anrechenbare" => 4 ],
);
BEGIN {
requires_unicode_support();
if (!has_fts()) {
plan skip_all => 'FTS is disabled for this DBD::SQLite';
}
if ($DBD::SQLite::sqlite_version_number >= 3011000 and $DBD::SQLite::sqlite_version_number < 3012000 and !has_compile_option('ENABLE_FTS5_TOKENIZER')) {
plan skip_all => 'FTS5 tokenizer is disabled for this DBD::SQLite';
}
}
# Perl may spit a warning on locale
# use Test::NoWarnings;
BEGIN {
# Sadly perl for windows (and probably sqlite, too) may hang
# if the system locale doesn't support european languages.
# en-us should be a safe default. if it doesn't work, use 'C'.
if ( $^O eq 'MSWin32') {
use POSIX 'locale_h';
setlocale(LC_COLLATE, 'en-us');
}
}
use DBD::SQLite::Constants ':fts5_tokenizer';
use locale;
sub locale_tokenizer { # see also: Search::Tokenizer
return sub {
my( $ctx, $string, $tokenizer_context_flags ) = @_;
my $regex = qr/\w+/;
#my $term_index = 0;
#
while( $string =~ /$regex/g) {
my ($start, $end) = ($-[0], $+[0]);
my $term = substr($string, $start, my $len = $end-$start);
my $flags = 0;
#my $flags = FTS5_TOKEN_COLOCATED;
DBD::SQLite::db::fts5_xToken($ctx,$flags,$term,$start,$end);
};
};
}
for my $use_unicode (0, 1) {
# connect
my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode );
for my $fts (qw/fts5/) {
# create fts table
$dbh->do(<<"") or die DBI::errstr;
CREATE VIRTUAL TABLE try_$fts
USING $fts(content, tokenize="perl 'main::locale_tokenizer'")
# populate it
my $insert_sth = $dbh->prepare(<<"") or die DBI::errstr;
INSERT INTO try_$fts(content) VALUES(?)
my @doc_ids;
for (my $i = 0; $i < @texts; $i++) {
$insert_sth->execute($texts[$i]);
$doc_ids[$i] = $dbh->last_insert_id("", "", "", "");
}
# queries
SKIP: {
my $sql = "SELECT rowid FROM try_$fts WHERE content MATCH ?";
for my $t (@tests) {
my ($query, @expected) = @$t;
@expected = map {$doc_ids[$_]} @expected;
my $results = $dbh->selectcol_arrayref($sql, undef, $query);
is_deeply($results, \@expected, "$query ($fts, unicode=$use_unicode)");
}
}
}
}
done_testing;

View file

@ -143,6 +143,11 @@ my %since = (
STMTSTATUS_RUN => '3020000', STMTSTATUS_RUN => '3020000',
STMTSTATUS_MEMUSED => '3020000', STMTSTATUS_MEMUSED => '3020000',
DBCONFIG_ENABLE_QPSG => '3020000', DBCONFIG_ENABLE_QPSG => '3020000',
SQLITE_FTS5_TOKEN => '3020000',
FTS5_TOKENIZE_QUERY => '3020000',
FTS5_TOKENIZE_PREFIX => '3020000',
FTS5_TOKENIZE_DOCUMENT => '3020000',
FTS5_TOKENIZE_AUX => '3020000',
IOERR_BEGIN_ATOMIC => '3021000', IOERR_BEGIN_ATOMIC => '3021000',
IOERR_COMMIT_ATOMIC => '3021000', IOERR_COMMIT_ATOMIC => '3021000',
IOERR_ROLLBACK_ATOMIC => '3021000', IOERR_ROLLBACK_ATOMIC => '3021000',
@ -313,6 +318,17 @@ sub extract_constants {
} }
unshift @{$constants{_authorizer_return_codes}}, 'OK'; unshift @{$constants{_authorizer_return_codes}}, 'OK';
# Fudge in the FTS5 constants, as these don't follow the common pattern
$constants{fts5_tokenizer} ||= [];
push @{$constants{fts5_tokenizer}},
'FTS5_TOKENIZE_QUERY',
'FTS5_TOKENIZE_PREFIX',
'FTS5_TOKENIZE_DOCUMENT',
'FTS5_TOKENIZE_AUX',
'FTS5_TOKEN_COLOCATED'
;
%constants; %constants;
} }
@ -335,7 +351,7 @@ sub srcdir {
sub download_url { sub download_url {
my $version = shift; my $version = shift;
my $year = $version->year; my $year = $version->year;
join '', join '',
"http://www.sqlite.org/", "http://www.sqlite.org/",
($version->year ? $version->year."/" : ""), ($version->year ? $version->year."/" : ""),
"sqlite-".($version->archive_type)."-$version".$version->extension; "sqlite-".($version->archive_type)."-$version".$version->extension;

View file

@ -26,6 +26,7 @@ my @dbd_sqlite_constants = (
); );
my %constants = extract_constants(); my %constants = extract_constants();
write_inc(%constants); write_inc(%constants);
write_pm(%constants); write_pm(%constants);
@ -86,9 +87,18 @@ _const_$tag()
END END
for my $name (@$list) { for my $name (@$list) {
my $prefix = $tag =~ /^_/ ? "" : "SQLITE_"; my $prefix;
my $prefix2 = "SQLITE_";
if( $tag =~ /^_/ ) {
$prefix = "";
} elsif( $tag =~ /^fts5_/ ) {
$prefix = "";
$prefix2 = "";
} else {
$prefix = "SQLITE_";
};
print $fh <<"END"; print $fh <<"END";
$prefix$name = SQLITE_$name $prefix$name = $prefix2$name
END END
} }
@ -111,7 +121,16 @@ END
my $ix = 1; my $ix = 1;
for my $name (@{$constants{$tag}}) { for my $name (@{$constants{$tag}}) {
my $prefix = $tag =~ /^_/ ? "" : "SQLITE_"; my $prefix;
my $prefix2 = "SQLITE_";
if( $tag =~ /^_/ ) {
$prefix = "";
} elsif( $tag =~ /^fts5_/ ) {
$prefix = "";
$prefix2 = "";
} else {
$prefix = "SQLITE_";
};
print $fh <<"END"; print $fh <<"END";
$prefix$name = $ix $prefix$name = $ix
END END
@ -166,7 +185,7 @@ END
print $fh <<"END"; print $fh <<"END";
# $tag # $tag
qw/ qw/
@{[join "\n", map {" SQLITE_$_"} sort @{$constants{$tag}}]} @{[join "\n", map {/^FTS5_/ ? " $_" : " SQLITE_$_"} sort @{$constants{$tag}}]}
/, /,
END END