mirror of
https://github.com/DBD-SQLite/DBD-SQLite
synced 2025-06-08 22:58:17 -04:00
moved fts3 things to the end of the dbdimp.c as they are private methods
This commit is contained in:
parent
6e64382593
commit
019e78ec3a
1 changed files with 296 additions and 320 deletions
616
dbdimp.c
616
dbdimp.c
|
@ -18,8 +18,8 @@ DBISTATE_DECLARE;
|
|||
#define croak_if_db_is_null() if (!imp_dbh->db) croak("imp_dbh->db is NULL at line %d in %s", __LINE__, __FILE__)
|
||||
#define croak_if_stmt_is_null() if (!imp_sth->stmt) croak("imp_sth->stmt is NULL at line %d in %s", __LINE__, __FILE__)
|
||||
#else
|
||||
#define croak_if_db_is_null()
|
||||
#define croak_if_stmt_is_null()
|
||||
#define croak_if_db_is_null()
|
||||
#define croak_if_stmt_is_null()
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -497,309 +497,6 @@ sqlite_db_last_insert_id(SV *dbh, imp_dbh_t *imp_dbh, SV *catalog, SV *schema, S
|
|||
return sv_2mortal(newSViv((IV)sqlite3_last_insert_rowid(imp_dbh->db)));
|
||||
}
|
||||
|
||||
/* ======================================================================
|
||||
* EXPERIMENTAL bindings for FTS3 TOKENIZERS
|
||||
* ====================================================================== */
|
||||
|
||||
typedef struct perl_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
SV *coderef; /* the perl tokenizer is a coderef that takes
|
||||
a string and returns a cursor coderef */
|
||||
} perl_tokenizer;
|
||||
|
||||
typedef struct perl_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
SV *coderef; /* ref to the closure that returns terms */
|
||||
char *pToken; /* storage for a copy of the last token */
|
||||
int nTokenAllocated; /* space allocated to pToken buffer */
|
||||
|
||||
/* members below are only used if the input string is in utf8 */
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
const char *lastByteOffset; /* offset into pInput */
|
||||
int lastCharOffset; /* char offset corresponding to lastByteOffset */
|
||||
|
||||
} perl_tokenizer_cursor;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
** Will be called whenever a FTS3 table is created with
|
||||
** CREATE .. USING fts3( ... , tokenize=perl qualified::function::name)
|
||||
** where qualified::function::name is a fully qualified perl function
|
||||
*/
|
||||
static int perl_tokenizer_Create(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
dTHX;
|
||||
dSP;
|
||||
int n_retval;
|
||||
SV *retval;
|
||||
|
||||
perl_tokenizer *t;
|
||||
t = (perl_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
|
||||
/* call the qualified::function::name */
|
||||
PUSHMARK(SP);
|
||||
PUTBACK;
|
||||
n_retval = call_pv(argv[0], G_SCALAR);
|
||||
SPAGAIN;
|
||||
|
||||
/* store a copy of the returned coderef into the tokenizer structure */
|
||||
if (n_retval != 1) {
|
||||
warn("tokenizer_Create returned %d arguments", n_retval);
|
||||
}
|
||||
retval = POPs;
|
||||
t->coderef = newSVsv(retval);
|
||||
*ppTokenizer = &t->base;
|
||||
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int perl_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){
|
||||
dTHX;
|
||||
perl_tokenizer *t = (perl_tokenizer *) pTokenizer;
|
||||
sv_free(t->coderef);
|
||||
sqlite3_free(t);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is supposed to be pInput[0..nBytes-1] ..
|
||||
** except that nBytes passed by fts3 is -1 (don't know why) !
|
||||
** This is passed to the tokenizer instance, which then returns a
|
||||
** closure implementing the cursor (so the cursor is again a coderef).
|
||||
*/
|
||||
static int perl_tokenizer_Open(
|
||||
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
|
||||
const char *pInput, int nBytes, /* Input buffer */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
|
||||
){
|
||||
dTHX;
|
||||
dSP;
|
||||
U32 flags;
|
||||
SV *perl_string;
|
||||
int n_retval;
|
||||
|
||||
perl_tokenizer *t = (perl_tokenizer *)pTokenizer;
|
||||
|
||||
/* allocate and initialize the cursor struct */
|
||||
perl_tokenizer_cursor *c;
|
||||
c = (perl_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
memset(c, 0, sizeof(*c));
|
||||
*ppCursor = &c->base;
|
||||
|
||||
/* flags for creating the Perl SV containing the input string */
|
||||
flags = SVs_TEMP; /* will call sv_2mortal */
|
||||
|
||||
/* special handling if working with utf8 strings */
|
||||
if (last_executed_dbh->unicode) { /* global var ... no better way ! */
|
||||
|
||||
/* data to keep track of byte offsets */
|
||||
c->lastByteOffset = c->pInput = pInput;
|
||||
c->lastCharOffset = 0;
|
||||
|
||||
/* string passed to Perl needs to be flagged as utf8 */
|
||||
flags |= SVf_UTF8;
|
||||
}
|
||||
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
|
||||
/* build a Perl copy of the input string */
|
||||
if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */
|
||||
nBytes = strlen(pInput);
|
||||
}
|
||||
perl_string = newSVpvn_flags(pInput, nBytes, flags);
|
||||
|
||||
/* call the tokenizer coderef */
|
||||
PUSHMARK(SP);
|
||||
XPUSHs(perl_string);
|
||||
PUTBACK;
|
||||
n_retval = call_sv(t->coderef, G_SCALAR);
|
||||
SPAGAIN;
|
||||
|
||||
/* store the cursor coderef returned by the tokenizer */
|
||||
if (n_retval != 1) {
|
||||
warn("tokenizer returned %d arguments", n_retval);
|
||||
}
|
||||
c->coderef = newSVsv(POPs);
|
||||
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** perl_tokenizer_Open() above.
|
||||
*/
|
||||
static int perl_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){
|
||||
perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor;
|
||||
|
||||
dTHX;
|
||||
sv_free(c->coderef);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to perl_tokenizer_Open().
|
||||
*/
|
||||
static int perl_tokenizer_Next(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by perl_tokenizer_Open */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor;
|
||||
int result;
|
||||
int n_retval;
|
||||
STRLEN n_a;
|
||||
char *token;
|
||||
char *byteOffset;
|
||||
I32 hop;
|
||||
|
||||
dTHX;
|
||||
dSP;
|
||||
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
|
||||
/* call the cursor */
|
||||
PUSHMARK(SP);
|
||||
PUTBACK;
|
||||
n_retval = call_sv(c->coderef, G_ARRAY);
|
||||
SPAGAIN;
|
||||
|
||||
/* if we get back an empty list, there is no more token */
|
||||
if (n_retval == 0) {
|
||||
result = SQLITE_DONE;
|
||||
}
|
||||
/* otherwise, get token details from the return list */
|
||||
else {
|
||||
if (n_retval != 5) {
|
||||
warn("tokenizer cursor returned %d arguments", n_retval);
|
||||
}
|
||||
*piPosition = POPi;
|
||||
*piEndOffset = POPi;
|
||||
*piStartOffset = POPi;
|
||||
*pnBytes = POPi;
|
||||
token = POPpx;
|
||||
|
||||
if (c->pInput) { /* if working with utf8 data */
|
||||
|
||||
/* recompute *pnBytes in bytes, not in chars */
|
||||
*pnBytes = strlen(token);
|
||||
|
||||
/* recompute start/end offsets in bytes, not in chars */
|
||||
hop = *piStartOffset - c->lastCharOffset;
|
||||
byteOffset = utf8_hop(c->lastByteOffset, hop);
|
||||
hop = *piEndOffset - *piStartOffset;
|
||||
*piStartOffset = byteOffset - c->pInput;
|
||||
byteOffset = utf8_hop(byteOffset, hop);
|
||||
*piEndOffset = byteOffset - c->pInput;
|
||||
/* remember where we are for next round */
|
||||
c->lastCharOffset = *piEndOffset,
|
||||
c->lastByteOffset = byteOffset;
|
||||
}
|
||||
|
||||
/* make sure we have enough storage for copying the token */
|
||||
if (*pnBytes > c->nTokenAllocated ){
|
||||
char *pNew;
|
||||
c->nTokenAllocated = *pnBytes + 20;
|
||||
pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
|
||||
if( !pNew ) return SQLITE_NOMEM;
|
||||
c->pToken = pNew;
|
||||
}
|
||||
|
||||
/* need to copy the token into the C cursor before perl frees that
|
||||
memory */
|
||||
memcpy(c->pToken, token, *pnBytes);
|
||||
*ppToken = c->pToken;
|
||||
|
||||
result = SQLITE_OK;
|
||||
}
|
||||
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** The set of routines that implement the perl tokenizer
|
||||
*/
|
||||
sqlite3_tokenizer_module perl_tokenizer_Module = {
|
||||
0,
|
||||
perl_tokenizer_Create,
|
||||
perl_tokenizer_Destroy,
|
||||
perl_tokenizer_Open,
|
||||
perl_tokenizer_Close,
|
||||
perl_tokenizer_Next
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
** Register the perl tokenizer with FTS3
|
||||
*/
|
||||
int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh)
|
||||
{
|
||||
D_imp_dbh(dbh);
|
||||
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
|
||||
sqlite3_tokenizer_module *p = &perl_tokenizer_Module;
|
||||
|
||||
rc = sqlite3_prepare_v2(imp_dbh->db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, "perl", -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
|
||||
/* ======================================================================
|
||||
* END # EXPERIMENTAL bindings for FTS3 TOKENIZERS
|
||||
* ====================================================================== */
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int
|
||||
sqlite_st_prepare(SV *sth, imp_sth_t *imp_sth, char *statement, SV *attribs)
|
||||
{
|
||||
|
@ -1526,7 +1223,7 @@ sqlite_db_create_function(pTHX_ SV *dbh, const char *name, int argc, SV *func)
|
|||
rc = sqlite3_create_function( imp_dbh->db, name, argc, SQLITE_UTF8,
|
||||
func_sv,
|
||||
imp_dbh->unicode ? sqlite_db_func_dispatcher_unicode
|
||||
: sqlite_db_func_dispatcher_no_unicode,
|
||||
: sqlite_db_func_dispatcher_no_unicode,
|
||||
NULL, NULL );
|
||||
if ( rc != SQLITE_OK ) {
|
||||
sqlite_error(dbh, rc, form("sqlite_create_function failed with error %s", sqlite3_errmsg(imp_dbh->db)));
|
||||
|
@ -1585,7 +1282,7 @@ sqlite_db_aggr_new_dispatcher(pTHX_ sqlite3_context *context, aggrInfo *aggr_inf
|
|||
} else if ( count != 1 ) {
|
||||
int i;
|
||||
|
||||
aggr_info->err = newSVpvf("new() should return one value, got %d",
|
||||
aggr_info->err = newSVpvf("new() should return one value, got %d",
|
||||
count );
|
||||
/* Clear the stack */
|
||||
for ( i=0; i < count; i++ ) {
|
||||
|
@ -1629,7 +1326,7 @@ sqlite_db_aggr_step_dispatcher(sqlite3_context *context,
|
|||
sqlite_db_aggr_new_dispatcher(aTHX_ context, aggr);
|
||||
}
|
||||
|
||||
if ( aggr->err || !aggr->aggr_inst )
|
||||
if ( aggr->err || !aggr->aggr_inst )
|
||||
goto cleanup;
|
||||
|
||||
PUSHMARK(SP);
|
||||
|
@ -1694,7 +1391,7 @@ sqlite_db_aggr_finalize_dispatcher( sqlite3_context *context )
|
|||
aggr->aggr_inst = NULL;
|
||||
aggr->err = NULL;
|
||||
sqlite_db_aggr_new_dispatcher(aTHX_ context, aggr);
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! aggr->err && aggr->aggr_inst ) {
|
||||
PUSHMARK(SP);
|
||||
|
@ -1755,7 +1452,7 @@ sqlite_db_create_aggregate(pTHX_ SV *dbh, const char *name, int argc, SV *aggr_p
|
|||
rc = sqlite3_create_function( imp_dbh->db, name, argc, SQLITE_UTF8,
|
||||
aggr_pkg_copy,
|
||||
NULL,
|
||||
sqlite_db_aggr_step_dispatcher,
|
||||
sqlite_db_aggr_step_dispatcher,
|
||||
sqlite_db_aggr_finalize_dispatcher
|
||||
);
|
||||
|
||||
|
@ -1858,10 +1555,10 @@ sqlite_db_create_collation(pTHX_ SV *dbh, const char *name, SV *func)
|
|||
av_push( imp_dbh->functions, func_sv );
|
||||
|
||||
/* Register the func within sqlite3 */
|
||||
rv = sqlite3_create_collation(
|
||||
rv = sqlite3_create_collation(
|
||||
imp_dbh->db, name, SQLITE_UTF8,
|
||||
func_sv,
|
||||
imp_dbh->unicode ? sqlite_db_collation_dispatcher_utf8
|
||||
func_sv,
|
||||
imp_dbh->unicode ? sqlite_db_collation_dispatcher_utf8
|
||||
: sqlite_db_collation_dispatcher
|
||||
);
|
||||
|
||||
|
@ -1911,7 +1608,7 @@ sqlite_db_collation_needed(pTHX_ SV *dbh, SV *callback)
|
|||
sv_setsv(imp_dbh->collation_needed_callback, callback);
|
||||
|
||||
/* Register the func within sqlite3 */
|
||||
(void) sqlite3_collation_needed( imp_dbh->db,
|
||||
(void) sqlite3_collation_needed( imp_dbh->db,
|
||||
(void*) (SvOK(callback) ? dbh : NULL),
|
||||
sqlite_db_collation_needed_dispatcher );
|
||||
}
|
||||
|
@ -1960,7 +1657,7 @@ sqlite_db_progress_handler(pTHX_ SV *dbh, int n_opcodes, SV *handler)
|
|||
av_push( imp_dbh->functions, handler_sv );
|
||||
|
||||
/* Register the func within sqlite3 */
|
||||
sqlite3_progress_handler( imp_dbh->db, n_opcodes,
|
||||
sqlite3_progress_handler( imp_dbh->db, n_opcodes,
|
||||
sqlite_db_generic_callback_dispatcher,
|
||||
handler_sv );
|
||||
}
|
||||
|
@ -1986,7 +1683,7 @@ sqlite_db_commit_hook(pTHX_ SV *dbh, SV *hook)
|
|||
av_push( imp_dbh->functions, hook_sv );
|
||||
|
||||
/* Register the hook within sqlite3 */
|
||||
retval = sqlite3_commit_hook( imp_dbh->db,
|
||||
retval = sqlite3_commit_hook( imp_dbh->db,
|
||||
sqlite_db_generic_callback_dispatcher,
|
||||
hook_sv );
|
||||
}
|
||||
|
@ -2013,7 +1710,7 @@ sqlite_db_rollback_hook(pTHX_ SV *dbh, SV *hook)
|
|||
av_push( imp_dbh->functions, hook_sv );
|
||||
|
||||
/* Register the hook within sqlite3 */
|
||||
retval = sqlite3_rollback_hook( imp_dbh->db,
|
||||
retval = sqlite3_rollback_hook( imp_dbh->db,
|
||||
(void(*)(void *))
|
||||
sqlite_db_generic_callback_dispatcher,
|
||||
hook_sv );
|
||||
|
@ -2023,7 +1720,7 @@ sqlite_db_rollback_hook(pTHX_ SV *dbh, SV *hook)
|
|||
}
|
||||
|
||||
void
|
||||
sqlite_db_update_dispatcher( void *callback, int op,
|
||||
sqlite_db_update_dispatcher( void *callback, int op,
|
||||
char const *database, char const *table,
|
||||
sqlite3_int64 rowid )
|
||||
{
|
||||
|
@ -2067,7 +1764,7 @@ sqlite_db_update_hook(pTHX_ SV *dbh, SV *hook)
|
|||
av_push( imp_dbh->functions, hook_sv );
|
||||
|
||||
/* Register the hook within sqlite3 */
|
||||
retval = sqlite3_update_hook( imp_dbh->db,
|
||||
retval = sqlite3_update_hook( imp_dbh->db,
|
||||
sqlite_db_update_dispatcher,
|
||||
hook_sv );
|
||||
}
|
||||
|
@ -2138,7 +1835,7 @@ sqlite_db_set_authorizer(pTHX_ SV *dbh, SV *authorizer)
|
|||
av_push( imp_dbh->functions, authorizer_sv );
|
||||
|
||||
/* Register the hook within sqlite3 */
|
||||
retval = sqlite3_set_authorizer( imp_dbh->db,
|
||||
retval = sqlite3_set_authorizer( imp_dbh->db,
|
||||
sqlite_db_authorizer_dispatcher,
|
||||
authorizer_sv );
|
||||
}
|
||||
|
@ -2232,4 +1929,283 @@ sqlite_db_backup_to_file(pTHX_ SV *dbh, char *filename)
|
|||
#endif
|
||||
}
|
||||
|
||||
typedef struct perl_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
SV *coderef; /* the perl tokenizer is a coderef that takes
|
||||
a string and returns a cursor coderef */
|
||||
} perl_tokenizer;
|
||||
|
||||
typedef struct perl_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
SV *coderef; /* ref to the closure that returns terms */
|
||||
char *pToken; /* storage for a copy of the last token */
|
||||
int nTokenAllocated; /* space allocated to pToken buffer */
|
||||
|
||||
/* members below are only used if the input string is in utf8 */
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
const char *lastByteOffset; /* offset into pInput */
|
||||
int lastCharOffset; /* char offset corresponding to lastByteOffset */
|
||||
} perl_tokenizer_cursor;
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
** Will be called whenever a FTS3 table is created with
|
||||
** CREATE .. USING fts3( ... , tokenize=perl qualified::function::name)
|
||||
** where qualified::function::name is a fully qualified perl function
|
||||
*/
|
||||
static int perl_tokenizer_Create(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
dTHX;
|
||||
dSP;
|
||||
int n_retval;
|
||||
SV *retval;
|
||||
|
||||
perl_tokenizer *t;
|
||||
t = (perl_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
|
||||
/* call the qualified::function::name */
|
||||
PUSHMARK(SP);
|
||||
PUTBACK;
|
||||
n_retval = call_pv(argv[0], G_SCALAR);
|
||||
SPAGAIN;
|
||||
|
||||
/* store a copy of the returned coderef into the tokenizer structure */
|
||||
if (n_retval != 1) {
|
||||
warn("tokenizer_Create returned %d arguments", n_retval);
|
||||
}
|
||||
retval = POPs;
|
||||
t->coderef = newSVsv(retval);
|
||||
*ppTokenizer = &t->base;
|
||||
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int perl_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){
|
||||
dTHX;
|
||||
perl_tokenizer *t = (perl_tokenizer *) pTokenizer;
|
||||
sv_free(t->coderef);
|
||||
sqlite3_free(t);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is supposed to be pInput[0..nBytes-1] ..
|
||||
** except that nBytes passed by fts3 is -1 (don't know why) !
|
||||
** This is passed to the tokenizer instance, which then returns a
|
||||
** closure implementing the cursor (so the cursor is again a coderef).
|
||||
*/
|
||||
static int perl_tokenizer_Open(
|
||||
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
|
||||
const char *pInput, int nBytes, /* Input buffer */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
|
||||
){
|
||||
dTHX;
|
||||
dSP;
|
||||
U32 flags;
|
||||
SV *perl_string;
|
||||
int n_retval;
|
||||
|
||||
perl_tokenizer *t = (perl_tokenizer *)pTokenizer;
|
||||
|
||||
/* allocate and initialize the cursor struct */
|
||||
perl_tokenizer_cursor *c;
|
||||
c = (perl_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
memset(c, 0, sizeof(*c));
|
||||
*ppCursor = &c->base;
|
||||
|
||||
/* flags for creating the Perl SV containing the input string */
|
||||
flags = SVs_TEMP; /* will call sv_2mortal */
|
||||
|
||||
/* special handling if working with utf8 strings */
|
||||
if (last_executed_dbh->unicode) { /* global var ... no better way ! */
|
||||
|
||||
/* data to keep track of byte offsets */
|
||||
c->lastByteOffset = c->pInput = pInput;
|
||||
c->lastCharOffset = 0;
|
||||
|
||||
/* string passed to Perl needs to be flagged as utf8 */
|
||||
flags |= SVf_UTF8;
|
||||
}
|
||||
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
|
||||
/* build a Perl copy of the input string */
|
||||
if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */
|
||||
nBytes = strlen(pInput);
|
||||
}
|
||||
perl_string = newSVpvn_flags(pInput, nBytes, flags);
|
||||
|
||||
/* call the tokenizer coderef */
|
||||
PUSHMARK(SP);
|
||||
XPUSHs(perl_string);
|
||||
PUTBACK;
|
||||
n_retval = call_sv(t->coderef, G_SCALAR);
|
||||
SPAGAIN;
|
||||
|
||||
/* store the cursor coderef returned by the tokenizer */
|
||||
if (n_retval != 1) {
|
||||
warn("tokenizer returned %d arguments", n_retval);
|
||||
}
|
||||
c->coderef = newSVsv(POPs);
|
||||
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** perl_tokenizer_Open() above.
|
||||
*/
|
||||
static int perl_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){
|
||||
perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor;
|
||||
|
||||
dTHX;
|
||||
sv_free(c->coderef);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to perl_tokenizer_Open().
|
||||
*/
|
||||
static int perl_tokenizer_Next(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by perl_tokenizer_Open */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor;
|
||||
int result;
|
||||
int n_retval;
|
||||
STRLEN n_a;
|
||||
char *token;
|
||||
char *byteOffset;
|
||||
I32 hop;
|
||||
|
||||
dTHX;
|
||||
dSP;
|
||||
|
||||
ENTER;
|
||||
SAVETMPS;
|
||||
|
||||
/* call the cursor */
|
||||
PUSHMARK(SP);
|
||||
PUTBACK;
|
||||
n_retval = call_sv(c->coderef, G_ARRAY);
|
||||
SPAGAIN;
|
||||
|
||||
/* if we get back an empty list, there is no more token */
|
||||
if (n_retval == 0) {
|
||||
result = SQLITE_DONE;
|
||||
}
|
||||
/* otherwise, get token details from the return list */
|
||||
else {
|
||||
if (n_retval != 5) {
|
||||
warn("tokenizer cursor returned %d arguments", n_retval);
|
||||
}
|
||||
*piPosition = POPi;
|
||||
*piEndOffset = POPi;
|
||||
*piStartOffset = POPi;
|
||||
*pnBytes = POPi;
|
||||
token = POPpx;
|
||||
|
||||
if (c->pInput) { /* if working with utf8 data */
|
||||
|
||||
/* recompute *pnBytes in bytes, not in chars */
|
||||
*pnBytes = strlen(token);
|
||||
|
||||
/* recompute start/end offsets in bytes, not in chars */
|
||||
hop = *piStartOffset - c->lastCharOffset;
|
||||
byteOffset = utf8_hop(c->lastByteOffset, hop);
|
||||
hop = *piEndOffset - *piStartOffset;
|
||||
*piStartOffset = byteOffset - c->pInput;
|
||||
byteOffset = utf8_hop(byteOffset, hop);
|
||||
*piEndOffset = byteOffset - c->pInput;
|
||||
|
||||
/* remember where we are for next round */
|
||||
c->lastCharOffset = *piEndOffset,
|
||||
c->lastByteOffset = byteOffset;
|
||||
}
|
||||
|
||||
/* make sure we have enough storage for copying the token */
|
||||
if (*pnBytes > c->nTokenAllocated ){
|
||||
char *pNew;
|
||||
c->nTokenAllocated = *pnBytes + 20;
|
||||
pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
|
||||
if( !pNew ) return SQLITE_NOMEM;
|
||||
c->pToken = pNew;
|
||||
}
|
||||
|
||||
/* need to copy the token into the C cursor before perl frees that
|
||||
memory */
|
||||
memcpy(c->pToken, token, *pnBytes);
|
||||
*ppToken = c->pToken;
|
||||
|
||||
result = SQLITE_OK;
|
||||
}
|
||||
|
||||
PUTBACK;
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the perl tokenizer
|
||||
*/
|
||||
sqlite3_tokenizer_module perl_tokenizer_Module = {
|
||||
0,
|
||||
perl_tokenizer_Create,
|
||||
perl_tokenizer_Destroy,
|
||||
perl_tokenizer_Open,
|
||||
perl_tokenizer_Close,
|
||||
perl_tokenizer_Next
|
||||
};
|
||||
|
||||
/*
|
||||
** Register the perl tokenizer with FTS3
|
||||
*/
|
||||
int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh)
|
||||
{
|
||||
D_imp_dbh(dbh);
|
||||
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
|
||||
sqlite3_tokenizer_module *p = &perl_tokenizer_Module;
|
||||
|
||||
rc = sqlite3_prepare_v2(imp_dbh->db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, "perl", -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
/* end */
|
||||
|
|
Loading…
Add table
Reference in a new issue