45 const char * start_ =
nullptr;
46 const char * next_ =
nullptr;
47 const char * end_ =
nullptr;
48 bool is_inited_ =
false;
60#define true_word_char(ctype, character) ((ctype) & (_MY_U | _MY_L | _MY_NMR) || (character) == '_')
85 OBP_LOG_WARN(
"init twice. ret=%d, param=%p, this=%p", ret, param,
this);
88 ||
nullptr == fulltext
91 OBP_LOG_WARN(
"invalid arguments, ret=%d, param=%p", ret, param);
96 end_ = start_ + ft_length;
120 OBP_LOG_WARN(
"space ft parser isn't initialized. ret=%d, is_inited=%d", ret, is_inited_);
122 const char *start = start_;
123 const char *next = next_;
124 const char *end = end_;
129 mbl =
obp_charset_ctype(cs, &ctype, (
unsigned char *)next, (
unsigned char *)end);
133 next += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
142 mbl =
obp_charset_ctype(cs, &ctype, (
unsigned char *)next, (
unsigned char *)end);
147 next += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
151 word_len = next - start;
166 OBP_LOG_TRACE(
"next word. start=%p, next=%p, end=%p", start_, next_, end_);
180 ret = parser->
init(param);
204 if (word ==
nullptr || word_len ==
nullptr || char_cnt ==
nullptr || word_freq ==
nullptr) {
208 ret = parser->
get_next_token((
const char *&)(*word), *word_len, *char_cnt, *word_freq);
216 if (flag ==
nullptr) {
251 "This is an example ftparser.");
int init(ObPluginDatum param)
ObSpaceFTParser()=default
virtual ~ObSpaceFTParser()
int get_next_token(const char *&word, int64_t &word_len, int64_t &char_len, int64_t &word_freq)
int ftparser_scan_end(ObPluginFTParserParamPtr param)
int ftparser_next_token(ObPluginFTParserParamPtr param, char **word, int64_t *word_len, int64_t *char_cnt, int64_t *word_freq)
int plugin_init(ObPluginParamPtr plugin)
plugin init function
int ftparser_get_add_word_flag(uint64_t *flag)
int ftparser_scan_begin(ObPluginFTParserParamPtr param)
OBP_PUBLIC_API ObPluginCharsetInfoPtr obp_ftparser_charset_info(ObPluginFTParserParamPtr param)
OBP_PUBLIC_API void obp_ftparser_set_user_data(ObPluginFTParserParamPtr param, ObPluginDatum user_data)
set user data
OBP_PUBLIC_API ObPluginDatum obp_ftparser_user_data(ObPluginFTParserParamPtr param)
The user data of fulltext parameter
#define OBP_REGISTER_FTPARSER(param, name, descriptor, description)
OBP_PUBLIC_API const char * obp_ftparser_fulltext(ObPluginFTParserParamPtr param)
the fulltext is the text you should split it to tokens @NOTE the fulltext is not terminated by '\0'.
OBP_PUBLIC_API int64_t obp_ftparser_fulltext_length(ObPluginFTParserParamPtr param)
get the charsetinfo object from param
ObPluginDatum ObPluginFTParserParamPtr
full text parser add word flag
@ OBP_FTPARSER_AWF_STOPWORD
convert characters from uppercase to lowercase.
@ OBP_FTPARSER_AWF_GROUPBY_WORD
@ OBP_FTPARSER_AWF_MIN_MAX_WORD
filter by sotp word table.
@ OBP_FTPARSER_AWF_CASEDOWN
distinct and word aggregation
#define OBP_LOG_INFO(fmt, args...)
#define OBP_LOG_WARN(fmt, args...)
const int OBP_SUCCESS
this is the adaptor errno of oceanbase errno
OBP_PUBLIC_API int obp_charset_ctype(ObPluginCharsetInfoPtr cs, int *ctype, const unsigned char *str, const unsigned char *end)
Get the ctype of the char
const int OBP_INVALID_ARGUMENT
#define OBP_LOG_TRACE(fmt, args...)
ObPluginDatum ObPluginCharsetInfoPtr
const int OBP_PLUGIN_ERROR
#define OBP_AUTHOR_OCEANBASE
#define OBP_MAKE_VERSION(major, minor, patch)
#define OBP_LICENSE_MULAN_PSL_V2
ObPluginDatum ObPluginParamPtr
@NOTE all API should be declared as C interface
#define OBP_DECLARE_PLUGIN(name)
this is used to define a plugin
void * ObPluginDatum
Used for param type
#define true_word_char(ctype, character)
fulltext parser descriptor interface for domain index splitting a document into many tokenizations....