From 1ad93cacc0a596c9784d5baafa403af473de9d70 Mon Sep 17 00:00:00 2001 From: Max Maischein Date: Sun, 30 Aug 2020 07:59:53 +0200 Subject: [PATCH] Fix all type errors --- dbdimp_tokenizer.inc | 175 ++++++++++++++++++++++--------------------- lib/DBD/SQLite.pm | 1 + 2 files changed, 89 insertions(+), 87 deletions(-) diff --git a/dbdimp_tokenizer.inc b/dbdimp_tokenizer.inc index 6020ec5..a6ee4bd 100644 --- a/dbdimp_tokenizer.inc +++ b/dbdimp_tokenizer.inc @@ -16,23 +16,14 @@ typedef struct perl_fts3_tokenizer_cursor { int currentChar; /* char position corresponding to currentByte */ } perl_fts3_tokenizer_cursor; -typedef struct perl_fts5_tokenizer { - fts5_tokenizer base; +typedef struct perl_Fts5Tokenizer { + Fts5Tokenizer base; SV *coderef; /* the perl tokenizer is a coderef that takes - a string and returns a cursor coderef */ -} perl_fts5_tokenizer; - -typedef struct perl_fts5_tokenizer_cursor { - // fts5_tokenizer_cursor base; - SV *coderef; /* ref to the closure that returns terms */ - char *pToken; /* storage for a copy of the last token */ - int nTokenAllocated; /* space allocated to pToken buffer */ - - /* members below are only used if the input string is in utf8 */ - const char *pInput; /* input we are tokenizing */ - const char *lastByteOffset; /* offset into pInput */ - int lastCharOffset; /* char offset corresponding to lastByteOffset */ -} perl_fts5_tokenizer_cursor; + ** a string and and some parameters and + ** in turn calls the xToken() function + ** passed to it + */ +} perl_Fts5Tokenizer; /* ** Create a new tokenizer instance. @@ -340,20 +331,19 @@ int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh) ** where qualified::function::name is a fully qualified perl function */ static int perl_fts5_tokenizer_Create( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer + void* pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ dTHX; dSP; int n_retval; SV *retval; - perl_fts3_tokenizer *t; + perl_Fts5Tokenizer *t; - if (!argc) { + if (!nArg) { return SQLITE_ERROR; } - t = (perl_fts3_tokenizer *) sqlite3_malloc(sizeof(*t)); + t = (perl_Fts5Tokenizer *) sqlite3_malloc(sizeof(*t)); if( t==NULL ) return SQLITE_NOMEM; memset(t, 0, sizeof(*t)); @@ -363,7 +353,7 @@ static int perl_fts5_tokenizer_Create( /* call the qualified::function::name */ PUSHMARK(SP); PUTBACK; - n_retval = call_pv(argv[0], G_SCALAR); + n_retval = call_pv(azArg[0], G_SCALAR); SPAGAIN; /* store a copy of the returned coderef into the tokenizer structure */ @@ -372,7 +362,7 @@ static int perl_fts5_tokenizer_Create( } retval = POPs; t->coderef = newSVsv(retval); - *ppTokenizer = &t->base; + *ppOut = &t->base; PUTBACK; FREETMPS; @@ -384,27 +374,33 @@ static int perl_fts5_tokenizer_Create( /* ** Destroy a tokenizer */ -static int perl_fts5_tokenizer_Delete(Fts5Tokenizer *pTokenizer){ +static void perl_fts5_tokenizer_Delete(Fts5Tokenizer *pTokenizer){ dTHX; - perl_fts5_tokenizer *t = (perl_fts5_tokenizer *) pTokenizer; + perl_Fts5Tokenizer *t = (perl_Fts5Tokenizer *) pTokenizer; sv_free(t->coderef); sqlite3_free(t); - return SQLITE_OK; + return; } /* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to perl_fts3_tokenizer_Open(). +** This does a tokenizing run over the string. Found tokens (and synonyms) +** are stored by calling xToken() */ static int perl_fts5_tokenizer_Tokenize( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by perl_fts3_tokenizer_Open */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ + Fts5Tokenizer* tokenizer, + void *pCtx, + int flags, /* Mask of FTS5_TOKENIZE_* flags */ + const char *pText, int nText, + int (*xToken)( + void *pCtx, /* Copy of 2nd argument to xTokenize() */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ + const char *pToken, /* Pointer to buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Byte offset of token within input text */ + int iEnd /* Byte offset of end of token within input text */ + ) ){ - perl_fts3_tokenizer_cursor *c = (perl_fts3_tokenizer_cursor *) pCursor; + perl_Fts5Tokenizer *c = (perl_Fts5Tokenizer *) tokenizer; int result; int n_retval; char *token; @@ -418,61 +414,67 @@ static int perl_fts5_tokenizer_Tokenize( ENTER; SAVETMPS; - /* call the cursor */ + /* call the Perl tokenizer, and pass it our token callback */ PUSHMARK(SP); PUTBACK; + + // XXX Wrap the "found token" callback, and pass it to the user + // Then, restructure the data if it is UTF-8 + // First, do all of this in Perl so it is easier to debug + + ///* if we get back an empty list, there is no more token */ + //if (n_retval == 0) { + // result = SQLITE_DONE; + //} + ///* otherwise, get token details from the return list */ + //else { + // if (n_retval != 5) { + // warn("tokenizer cursor returned %d arguments", n_retval); + // } + // *piPosition = POPi; + // *piEndOffset = POPi; + // *piStartOffset = POPi; + // *pnBytes = POPi; + // token = POPpx; + // + // if (c->pInput) { /* if working with utf8 data */ + // + // /* recompute *pnBytes in bytes, not in chars */ + // *pnBytes = strlen(token); + // + // /* recompute start/end offsets in bytes, not in chars */ + // hop = *piStartOffset - c->lastCharOffset; + // byteOffset = (char*)utf8_hop((U8*)c->lastByteOffset, hop); + // hop = *piEndOffset - *piStartOffset; + // *piStartOffset = byteOffset - c->pInput; + // byteOffset = (char*)utf8_hop((U8*)byteOffset, hop); + // *piEndOffset = byteOffset - c->pInput; + // + // /* remember where we are for next round */ + // c->lastCharOffset = *piEndOffset, + // c->lastByteOffset = byteOffset; + // } + // + // /* make sure we have enough storage for copying the token */ + // if (*pnBytes > c->nTokenAllocated ){ + // char *pNew; + // c->nTokenAllocated = *pnBytes + 20; + // pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); + // if( !pNew ) return SQLITE_NOMEM; + // c->pToken = pNew; + // } + // + // /* need to copy the token into the C cursor before perl frees that + // memory */ + // memcpy(c->pToken, token, *pnBytes); + // *ppToken = c->pToken; + // + // result = SQLITE_OK; + // + n_retval = call_sv(c->coderef, G_ARRAY); SPAGAIN; - /* if we get back an empty list, there is no more token */ - if (n_retval == 0) { - result = SQLITE_DONE; - } - /* otherwise, get token details from the return list */ - else { - if (n_retval != 5) { - warn("tokenizer cursor returned %d arguments", n_retval); - } - *piPosition = POPi; - *piEndOffset = POPi; - *piStartOffset = POPi; - *pnBytes = POPi; - token = POPpx; - - if (c->pInput) { /* if working with utf8 data */ - - /* recompute *pnBytes in bytes, not in chars */ - *pnBytes = strlen(token); - - /* recompute start/end offsets in bytes, not in chars */ - hop = *piStartOffset - c->lastCharOffset; - byteOffset = (char*)utf8_hop((U8*)c->lastByteOffset, hop); - hop = *piEndOffset - *piStartOffset; - *piStartOffset = byteOffset - c->pInput; - byteOffset = (char*)utf8_hop((U8*)byteOffset, hop); - *piEndOffset = byteOffset - c->pInput; - - /* remember where we are for next round */ - c->lastCharOffset = *piEndOffset, - c->lastByteOffset = byteOffset; - } - - /* make sure we have enough storage for copying the token */ - if (*pnBytes > c->nTokenAllocated ){ - char *pNew; - c->nTokenAllocated = *pnBytes + 20; - pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); - if( !pNew ) return SQLITE_NOMEM; - c->pToken = pNew; - } - - /* need to copy the token into the C cursor before perl frees that - memory */ - memcpy(c->pToken, token, *pnBytes); - *ppToken = c->pToken; - - result = SQLITE_OK; - } PUTBACK; FREETMPS; @@ -485,7 +487,6 @@ static int perl_fts5_tokenizer_Tokenize( ** The set of routines that implement the perl FTS5 tokenizer */ fts5_tokenizer perl_fts5_tokenizer_Module = { - 0, perl_fts5_tokenizer_Create, perl_fts5_tokenizer_Delete, perl_fts5_tokenizer_Tokenize @@ -530,7 +531,7 @@ int sqlite_db_register_fts5_perl_tokenizer(pTHX_ SV *dbh) int rc; fts5_api *pFts5Api = sqlite_fetch_fts5_api(aTHX_ dbh); - sqlite3_tokenizer_module *p = &perl_fts5_tokenizer_Module; + fts5_tokenizer *p = &perl_fts5_tokenizer_Module; // pFts5Api->xCreateTokenizer(pFts5Api,...); diff --git a/lib/DBD/SQLite.pm b/lib/DBD/SQLite.pm index fd14348..634b4ec 100644 --- a/lib/DBD/SQLite.pm +++ b/lib/DBD/SQLite.pm @@ -51,6 +51,7 @@ sub driver { DBD::SQLite::db->install_method('sqlite_enable_load_extension'); DBD::SQLite::db->install_method('sqlite_load_extension'); DBD::SQLite::db->install_method('sqlite_register_fts3_perl_tokenizer'); + DBD::SQLite::db->install_method('sqlite_register_fts5_perl_tokenizer'); DBD::SQLite::db->install_method('sqlite_trace', { O => 0x0004 }); DBD::SQLite::db->install_method('sqlite_profile', { O => 0x0004 }); DBD::SQLite::db->install_method('sqlite_table_column_metadata', { O => 0x0004 });