diff --git a/dbdimp.c b/dbdimp.c index 7d141c1..ca83cce 100644 --- a/dbdimp.c +++ b/dbdimp.c @@ -18,8 +18,8 @@ DBISTATE_DECLARE; #define croak_if_db_is_null() if (!imp_dbh->db) croak("imp_dbh->db is NULL at line %d in %s", __LINE__, __FILE__) #define croak_if_stmt_is_null() if (!imp_sth->stmt) croak("imp_sth->stmt is NULL at line %d in %s", __LINE__, __FILE__) #else - #define croak_if_db_is_null() - #define croak_if_stmt_is_null() + #define croak_if_db_is_null() + #define croak_if_stmt_is_null() #endif @@ -497,309 +497,6 @@ sqlite_db_last_insert_id(SV *dbh, imp_dbh_t *imp_dbh, SV *catalog, SV *schema, S return sv_2mortal(newSViv((IV)sqlite3_last_insert_rowid(imp_dbh->db))); } -/* ====================================================================== - * EXPERIMENTAL bindings for FTS3 TOKENIZERS - * ====================================================================== */ - -typedef struct perl_tokenizer { - sqlite3_tokenizer base; - SV *coderef; /* the perl tokenizer is a coderef that takes - a string and returns a cursor coderef */ -} perl_tokenizer; - -typedef struct perl_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - SV *coderef; /* ref to the closure that returns terms */ - char *pToken; /* storage for a copy of the last token */ - int nTokenAllocated; /* space allocated to pToken buffer */ - - /* members below are only used if the input string is in utf8 */ - const char *pInput; /* input we are tokenizing */ - const char *lastByteOffset; /* offset into pInput */ - int lastCharOffset; /* char offset corresponding to lastByteOffset */ - -} perl_tokenizer_cursor; - - -/* -** Create a new tokenizer instance. -** Will be called whenever a FTS3 table is created with -** CREATE .. USING fts3( ... , tokenize=perl qualified::function::name) -** where qualified::function::name is a fully qualified perl function -*/ -static int perl_tokenizer_Create( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - dTHX; - dSP; - int n_retval; - SV *retval; - - perl_tokenizer *t; - t = (perl_tokenizer *) sqlite3_malloc(sizeof(*t)); - if( t==NULL ) return SQLITE_NOMEM; - memset(t, 0, sizeof(*t)); - - ENTER; - SAVETMPS; - - /* call the qualified::function::name */ - PUSHMARK(SP); - PUTBACK; - n_retval = call_pv(argv[0], G_SCALAR); - SPAGAIN; - - /* store a copy of the returned coderef into the tokenizer structure */ - if (n_retval != 1) { - warn("tokenizer_Create returned %d arguments", n_retval); - } - retval = POPs; - t->coderef = newSVsv(retval); - *ppTokenizer = &t->base; - - PUTBACK; - FREETMPS; - LEAVE; - - return SQLITE_OK; -} - - -/* -** Destroy a tokenizer -*/ -static int perl_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){ - dTHX; - perl_tokenizer *t = (perl_tokenizer *) pTokenizer; - sv_free(t->coderef); - sqlite3_free(t); - return SQLITE_OK; -} - - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is supposed to be pInput[0..nBytes-1] .. -** except that nBytes passed by fts3 is -1 (don't know why) ! -** This is passed to the tokenizer instance, which then returns a -** closure implementing the cursor (so the cursor is again a coderef). -*/ -static int perl_tokenizer_Open( - sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ - const char *pInput, int nBytes, /* Input buffer */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ -){ - dTHX; - dSP; - U32 flags; - SV *perl_string; - int n_retval; - - perl_tokenizer *t = (perl_tokenizer *)pTokenizer; - - /* allocate and initialize the cursor struct */ - perl_tokenizer_cursor *c; - c = (perl_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); - memset(c, 0, sizeof(*c)); - *ppCursor = &c->base; - - /* flags for creating the Perl SV containing the input string */ - flags = SVs_TEMP; /* will call sv_2mortal */ - - /* special handling if working with utf8 strings */ - if (last_executed_dbh->unicode) { /* global var ... no better way ! */ - - /* data to keep track of byte offsets */ - c->lastByteOffset = c->pInput = pInput; - c->lastCharOffset = 0; - - /* string passed to Perl needs to be flagged as utf8 */ - flags |= SVf_UTF8; - } - - ENTER; - SAVETMPS; - - /* build a Perl copy of the input string */ - if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */ - nBytes = strlen(pInput); - } - perl_string = newSVpvn_flags(pInput, nBytes, flags); - - /* call the tokenizer coderef */ - PUSHMARK(SP); - XPUSHs(perl_string); - PUTBACK; - n_retval = call_sv(t->coderef, G_SCALAR); - SPAGAIN; - - /* store the cursor coderef returned by the tokenizer */ - if (n_retval != 1) { - warn("tokenizer returned %d arguments", n_retval); - } - c->coderef = newSVsv(POPs); - - PUTBACK; - FREETMPS; - LEAVE; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** perl_tokenizer_Open() above. -*/ -static int perl_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){ - perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor; - - dTHX; - sv_free(c->coderef); - sqlite3_free(c); - return SQLITE_OK; -} - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to perl_tokenizer_Open(). -*/ -static int perl_tokenizer_Next( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by perl_tokenizer_Open */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor; - int result; - int n_retval; - STRLEN n_a; - char *token; - char *byteOffset; - I32 hop; - - dTHX; - dSP; - - ENTER; - SAVETMPS; - - /* call the cursor */ - PUSHMARK(SP); - PUTBACK; - n_retval = call_sv(c->coderef, G_ARRAY); - SPAGAIN; - - /* if we get back an empty list, there is no more token */ - if (n_retval == 0) { - result = SQLITE_DONE; - } - /* otherwise, get token details from the return list */ - else { - if (n_retval != 5) { - warn("tokenizer cursor returned %d arguments", n_retval); - } - *piPosition = POPi; - *piEndOffset = POPi; - *piStartOffset = POPi; - *pnBytes = POPi; - token = POPpx; - - if (c->pInput) { /* if working with utf8 data */ - - /* recompute *pnBytes in bytes, not in chars */ - *pnBytes = strlen(token); - - /* recompute start/end offsets in bytes, not in chars */ - hop = *piStartOffset - c->lastCharOffset; - byteOffset = utf8_hop(c->lastByteOffset, hop); - hop = *piEndOffset - *piStartOffset; - *piStartOffset = byteOffset - c->pInput; - byteOffset = utf8_hop(byteOffset, hop); - *piEndOffset = byteOffset - c->pInput; - /* remember where we are for next round */ - c->lastCharOffset = *piEndOffset, - c->lastByteOffset = byteOffset; - } - - /* make sure we have enough storage for copying the token */ - if (*pnBytes > c->nTokenAllocated ){ - char *pNew; - c->nTokenAllocated = *pnBytes + 20; - pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); - if( !pNew ) return SQLITE_NOMEM; - c->pToken = pNew; - } - - /* need to copy the token into the C cursor before perl frees that - memory */ - memcpy(c->pToken, token, *pnBytes); - *ppToken = c->pToken; - - result = SQLITE_OK; - } - - PUTBACK; - FREETMPS; - LEAVE; - - return result; -} - - -/* -** The set of routines that implement the perl tokenizer -*/ -sqlite3_tokenizer_module perl_tokenizer_Module = { - 0, - perl_tokenizer_Create, - perl_tokenizer_Destroy, - perl_tokenizer_Open, - perl_tokenizer_Close, - perl_tokenizer_Next -}; - - -/* -** Register the perl tokenizer with FTS3 -*/ -int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh) -{ - D_imp_dbh(dbh); - - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; - sqlite3_tokenizer_module *p = &perl_tokenizer_Module; - - rc = sqlite3_prepare_v2(imp_dbh->db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, "perl", -1, SQLITE_STATIC); - sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); - sqlite3_step(pStmt); - - return sqlite3_finalize(pStmt); -} - - -/* ====================================================================== - * END # EXPERIMENTAL bindings for FTS3 TOKENIZERS - * ====================================================================== */ - - - - - - - - - - - int sqlite_st_prepare(SV *sth, imp_sth_t *imp_sth, char *statement, SV *attribs) { @@ -1526,7 +1223,7 @@ sqlite_db_create_function(pTHX_ SV *dbh, const char *name, int argc, SV *func) rc = sqlite3_create_function( imp_dbh->db, name, argc, SQLITE_UTF8, func_sv, imp_dbh->unicode ? sqlite_db_func_dispatcher_unicode - : sqlite_db_func_dispatcher_no_unicode, + : sqlite_db_func_dispatcher_no_unicode, NULL, NULL ); if ( rc != SQLITE_OK ) { sqlite_error(dbh, rc, form("sqlite_create_function failed with error %s", sqlite3_errmsg(imp_dbh->db))); @@ -1585,7 +1282,7 @@ sqlite_db_aggr_new_dispatcher(pTHX_ sqlite3_context *context, aggrInfo *aggr_inf } else if ( count != 1 ) { int i; - aggr_info->err = newSVpvf("new() should return one value, got %d", + aggr_info->err = newSVpvf("new() should return one value, got %d", count ); /* Clear the stack */ for ( i=0; i < count; i++ ) { @@ -1629,7 +1326,7 @@ sqlite_db_aggr_step_dispatcher(sqlite3_context *context, sqlite_db_aggr_new_dispatcher(aTHX_ context, aggr); } - if ( aggr->err || !aggr->aggr_inst ) + if ( aggr->err || !aggr->aggr_inst ) goto cleanup; PUSHMARK(SP); @@ -1694,7 +1391,7 @@ sqlite_db_aggr_finalize_dispatcher( sqlite3_context *context ) aggr->aggr_inst = NULL; aggr->err = NULL; sqlite_db_aggr_new_dispatcher(aTHX_ context, aggr); - } + } if ( ! aggr->err && aggr->aggr_inst ) { PUSHMARK(SP); @@ -1755,7 +1452,7 @@ sqlite_db_create_aggregate(pTHX_ SV *dbh, const char *name, int argc, SV *aggr_p rc = sqlite3_create_function( imp_dbh->db, name, argc, SQLITE_UTF8, aggr_pkg_copy, NULL, - sqlite_db_aggr_step_dispatcher, + sqlite_db_aggr_step_dispatcher, sqlite_db_aggr_finalize_dispatcher ); @@ -1858,10 +1555,10 @@ sqlite_db_create_collation(pTHX_ SV *dbh, const char *name, SV *func) av_push( imp_dbh->functions, func_sv ); /* Register the func within sqlite3 */ - rv = sqlite3_create_collation( + rv = sqlite3_create_collation( imp_dbh->db, name, SQLITE_UTF8, - func_sv, - imp_dbh->unicode ? sqlite_db_collation_dispatcher_utf8 + func_sv, + imp_dbh->unicode ? sqlite_db_collation_dispatcher_utf8 : sqlite_db_collation_dispatcher ); @@ -1911,7 +1608,7 @@ sqlite_db_collation_needed(pTHX_ SV *dbh, SV *callback) sv_setsv(imp_dbh->collation_needed_callback, callback); /* Register the func within sqlite3 */ - (void) sqlite3_collation_needed( imp_dbh->db, + (void) sqlite3_collation_needed( imp_dbh->db, (void*) (SvOK(callback) ? dbh : NULL), sqlite_db_collation_needed_dispatcher ); } @@ -1960,7 +1657,7 @@ sqlite_db_progress_handler(pTHX_ SV *dbh, int n_opcodes, SV *handler) av_push( imp_dbh->functions, handler_sv ); /* Register the func within sqlite3 */ - sqlite3_progress_handler( imp_dbh->db, n_opcodes, + sqlite3_progress_handler( imp_dbh->db, n_opcodes, sqlite_db_generic_callback_dispatcher, handler_sv ); } @@ -1986,7 +1683,7 @@ sqlite_db_commit_hook(pTHX_ SV *dbh, SV *hook) av_push( imp_dbh->functions, hook_sv ); /* Register the hook within sqlite3 */ - retval = sqlite3_commit_hook( imp_dbh->db, + retval = sqlite3_commit_hook( imp_dbh->db, sqlite_db_generic_callback_dispatcher, hook_sv ); } @@ -2013,7 +1710,7 @@ sqlite_db_rollback_hook(pTHX_ SV *dbh, SV *hook) av_push( imp_dbh->functions, hook_sv ); /* Register the hook within sqlite3 */ - retval = sqlite3_rollback_hook( imp_dbh->db, + retval = sqlite3_rollback_hook( imp_dbh->db, (void(*)(void *)) sqlite_db_generic_callback_dispatcher, hook_sv ); @@ -2023,7 +1720,7 @@ sqlite_db_rollback_hook(pTHX_ SV *dbh, SV *hook) } void -sqlite_db_update_dispatcher( void *callback, int op, +sqlite_db_update_dispatcher( void *callback, int op, char const *database, char const *table, sqlite3_int64 rowid ) { @@ -2067,7 +1764,7 @@ sqlite_db_update_hook(pTHX_ SV *dbh, SV *hook) av_push( imp_dbh->functions, hook_sv ); /* Register the hook within sqlite3 */ - retval = sqlite3_update_hook( imp_dbh->db, + retval = sqlite3_update_hook( imp_dbh->db, sqlite_db_update_dispatcher, hook_sv ); } @@ -2138,7 +1835,7 @@ sqlite_db_set_authorizer(pTHX_ SV *dbh, SV *authorizer) av_push( imp_dbh->functions, authorizer_sv ); /* Register the hook within sqlite3 */ - retval = sqlite3_set_authorizer( imp_dbh->db, + retval = sqlite3_set_authorizer( imp_dbh->db, sqlite_db_authorizer_dispatcher, authorizer_sv ); } @@ -2232,4 +1929,283 @@ sqlite_db_backup_to_file(pTHX_ SV *dbh, char *filename) #endif } +typedef struct perl_tokenizer { + sqlite3_tokenizer base; + SV *coderef; /* the perl tokenizer is a coderef that takes + a string and returns a cursor coderef */ +} perl_tokenizer; + +typedef struct perl_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + SV *coderef; /* ref to the closure that returns terms */ + char *pToken; /* storage for a copy of the last token */ + int nTokenAllocated; /* space allocated to pToken buffer */ + + /* members below are only used if the input string is in utf8 */ + const char *pInput; /* input we are tokenizing */ + const char *lastByteOffset; /* offset into pInput */ + int lastCharOffset; /* char offset corresponding to lastByteOffset */ +} perl_tokenizer_cursor; + +/* +** Create a new tokenizer instance. +** Will be called whenever a FTS3 table is created with +** CREATE .. USING fts3( ... , tokenize=perl qualified::function::name) +** where qualified::function::name is a fully qualified perl function +*/ +static int perl_tokenizer_Create( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + dTHX; + dSP; + int n_retval; + SV *retval; + + perl_tokenizer *t; + t = (perl_tokenizer *) sqlite3_malloc(sizeof(*t)); + if( t==NULL ) return SQLITE_NOMEM; + memset(t, 0, sizeof(*t)); + + ENTER; + SAVETMPS; + + /* call the qualified::function::name */ + PUSHMARK(SP); + PUTBACK; + n_retval = call_pv(argv[0], G_SCALAR); + SPAGAIN; + + /* store a copy of the returned coderef into the tokenizer structure */ + if (n_retval != 1) { + warn("tokenizer_Create returned %d arguments", n_retval); + } + retval = POPs; + t->coderef = newSVsv(retval); + *ppTokenizer = &t->base; + + PUTBACK; + FREETMPS; + LEAVE; + + return SQLITE_OK; +} + +/* +** Destroy a tokenizer +*/ +static int perl_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){ + dTHX; + perl_tokenizer *t = (perl_tokenizer *) pTokenizer; + sv_free(t->coderef); + sqlite3_free(t); + return SQLITE_OK; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is supposed to be pInput[0..nBytes-1] .. +** except that nBytes passed by fts3 is -1 (don't know why) ! +** This is passed to the tokenizer instance, which then returns a +** closure implementing the cursor (so the cursor is again a coderef). +*/ +static int perl_tokenizer_Open( + sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ + const char *pInput, int nBytes, /* Input buffer */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ +){ + dTHX; + dSP; + U32 flags; + SV *perl_string; + int n_retval; + + perl_tokenizer *t = (perl_tokenizer *)pTokenizer; + + /* allocate and initialize the cursor struct */ + perl_tokenizer_cursor *c; + c = (perl_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); + memset(c, 0, sizeof(*c)); + *ppCursor = &c->base; + + /* flags for creating the Perl SV containing the input string */ + flags = SVs_TEMP; /* will call sv_2mortal */ + + /* special handling if working with utf8 strings */ + if (last_executed_dbh->unicode) { /* global var ... no better way ! */ + + /* data to keep track of byte offsets */ + c->lastByteOffset = c->pInput = pInput; + c->lastCharOffset = 0; + + /* string passed to Perl needs to be flagged as utf8 */ + flags |= SVf_UTF8; + } + + ENTER; + SAVETMPS; + + /* build a Perl copy of the input string */ + if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */ + nBytes = strlen(pInput); + } + perl_string = newSVpvn_flags(pInput, nBytes, flags); + + /* call the tokenizer coderef */ + PUSHMARK(SP); + XPUSHs(perl_string); + PUTBACK; + n_retval = call_sv(t->coderef, G_SCALAR); + SPAGAIN; + + /* store the cursor coderef returned by the tokenizer */ + if (n_retval != 1) { + warn("tokenizer returned %d arguments", n_retval); + } + c->coderef = newSVsv(POPs); + + PUTBACK; + FREETMPS; + LEAVE; + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to +** perl_tokenizer_Open() above. +*/ +static int perl_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){ + perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor; + + dTHX; + sv_free(c->coderef); + sqlite3_free(c); + return SQLITE_OK; +} + +/* +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to perl_tokenizer_Open(). +*/ +static int perl_tokenizer_Next( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by perl_tokenizer_Open */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor; + int result; + int n_retval; + STRLEN n_a; + char *token; + char *byteOffset; + I32 hop; + + dTHX; + dSP; + + ENTER; + SAVETMPS; + + /* call the cursor */ + PUSHMARK(SP); + PUTBACK; + n_retval = call_sv(c->coderef, G_ARRAY); + SPAGAIN; + + /* if we get back an empty list, there is no more token */ + if (n_retval == 0) { + result = SQLITE_DONE; + } + /* otherwise, get token details from the return list */ + else { + if (n_retval != 5) { + warn("tokenizer cursor returned %d arguments", n_retval); + } + *piPosition = POPi; + *piEndOffset = POPi; + *piStartOffset = POPi; + *pnBytes = POPi; + token = POPpx; + + if (c->pInput) { /* if working with utf8 data */ + + /* recompute *pnBytes in bytes, not in chars */ + *pnBytes = strlen(token); + + /* recompute start/end offsets in bytes, not in chars */ + hop = *piStartOffset - c->lastCharOffset; + byteOffset = utf8_hop(c->lastByteOffset, hop); + hop = *piEndOffset - *piStartOffset; + *piStartOffset = byteOffset - c->pInput; + byteOffset = utf8_hop(byteOffset, hop); + *piEndOffset = byteOffset - c->pInput; + + /* remember where we are for next round */ + c->lastCharOffset = *piEndOffset, + c->lastByteOffset = byteOffset; + } + + /* make sure we have enough storage for copying the token */ + if (*pnBytes > c->nTokenAllocated ){ + char *pNew; + c->nTokenAllocated = *pnBytes + 20; + pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); + if( !pNew ) return SQLITE_NOMEM; + c->pToken = pNew; + } + + /* need to copy the token into the C cursor before perl frees that + memory */ + memcpy(c->pToken, token, *pnBytes); + *ppToken = c->pToken; + + result = SQLITE_OK; + } + + PUTBACK; + FREETMPS; + LEAVE; + + return result; +} + +/* +** The set of routines that implement the perl tokenizer +*/ +sqlite3_tokenizer_module perl_tokenizer_Module = { + 0, + perl_tokenizer_Create, + perl_tokenizer_Destroy, + perl_tokenizer_Open, + perl_tokenizer_Close, + perl_tokenizer_Next +}; + +/* +** Register the perl tokenizer with FTS3 +*/ +int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh) +{ + D_imp_dbh(dbh); + + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; + sqlite3_tokenizer_module *p = &perl_tokenizer_Module; + + rc = sqlite3_prepare_v2(imp_dbh->db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + sqlite3_bind_text(pStmt, 1, "perl", -1, SQLITE_STATIC); + sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); + sqlite3_step(pStmt); + + return sqlite3_finalize(pStmt); +} + /* end */