OceanBase Plugin Development Kit
OceanBase Plugin Development Kit
载入中...
搜索中...
未找到
ob_plugin_ftparser.h
浏览该文件的文档.
1/*
2 * Copyright (c) 2023 OceanBase
3 * OceanBase is licensed under Mulan PubL v2.
4 * You can use this software according to the terms and conditions of the Mulan PubL v2.
5 * You may obtain a copy of Mulan PubL v2 at:
6 * http://license.coscl.org.cn/MulanPubL-2.0
7 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
8 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
9 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
10 * See the Mulan PubL v2 for more details.
11 */
12
13#pragma once
14
15#include "oceanbase/ob_plugin.h"
17
28#define OBP_FTPARSER_INTERFACE_VERSION OBP_MAKE_VERSION(0, 1, 0)
29
30#define OBP_FTPARSER_INTERFACE_VERSION_0_2_0 OBP_MAKE_VERSION(0, 2, 0)
31
36#define OBP_FTPARSER_INTERFACE_VERSION_CURRENT OBP_FTPARSER_INTERFACE_VERSION_0_2_0
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41
44
62
68{
70 int (*init)(ObPluginParamPtr param);
71
73 int (*deinit)(ObPluginParamPtr param);
74
75 int (*scan_begin)(ObPluginFTParserParamPtr param);
76 int (*scan_end)(ObPluginFTParserParamPtr param);
81 int (*next_token)(ObPluginFTParserParamPtr param, char **word, int64_t *word_len, int64_t *char_cnt, int64_t *word_freq);
82
84 int (*get_add_word_flag)(uint64_t *flag);
85
95 int (*check_if_charset_supported)(ObPluginCharsetInfoPtr cs);
96};
97
100
107
110
113
118
124
130
141 const char *name,
142 ObPluginVersion version,
143 ObPluginFTParser *ftparser,
144 int64_t ftparser_sizeof,
145 const char *description);
146
147#define OBP_REGISTER_FTPARSER(param, name, descriptor, description) \
148 obp_register_plugin_ftparser(param, \
149 name, \
150 OBP_FTPARSER_INTERFACE_VERSION_CURRENT, \
151 &descriptor, \
152 (int64_t)sizeof(descriptor), \
153 description);
154
155#ifdef __cplusplus
156} // extern "C"
157#endif
158
OBP_PUBLIC_API ObPluginCharsetInfoPtr obp_ftparser_charset_info(ObPluginFTParserParamPtr param)
OBP_PUBLIC_API void obp_ftparser_set_user_data(ObPluginFTParserParamPtr param, ObPluginDatum user_data)
set user data
OBP_PUBLIC_API ObPluginDatum obp_ftparser_user_data(ObPluginFTParserParamPtr param)
The user data of fulltext parameter
OBP_PUBLIC_API uint64_t obp_ftparser_parser_version(ObPluginFTParserParamPtr param)
ObPluginFTPaserAddWordFlag
OBP_PUBLIC_API const char * obp_ftparser_fulltext(ObPluginFTParserParamPtr param)
the fulltext is the text you should split it to tokens @NOTE the fulltext is not terminated by '\0'.
OBP_PUBLIC_API int obp_register_plugin_ftparser(ObPluginParamPtr param, const char *name, ObPluginVersion version, ObPluginFTParser *ftparser, int64_t ftparser_sizeof, const char *description)
register fulltext parser plugin @NOTE use OBP_REGISTER_FTPARSER instead
OBP_PUBLIC_API int64_t obp_ftparser_fulltext_length(ObPluginFTParserParamPtr param)
get the charsetinfo object from param
ObPluginDatum ObPluginFTParserParamPtr
full text parser add word flag
OBP_PUBLIC_API ObPluginParamPtr obp_ftparser_plugin_param(ObPluginFTParserParamPtr param)
get the plugin parameter through fulltext parser parameter
@ OBP_FTPARSER_AWF_NONE
filter words that are less than a minimum or greater than a maximum word length.
@ OBP_FTPARSER_AWF_STOPWORD
convert characters from uppercase to lowercase.
@ OBP_FTPARSER_AWF_GROUPBY_WORD
@ OBP_FTPARSER_AWF_MIN_MAX_WORD
filter by sotp word table.
@ OBP_FTPARSER_AWF_CASEDOWN
distinct and word aggregation
ObPluginDatum ObPluginCharsetInfoPtr
uint64_t ObPluginVersion
The version type
ObPluginDatum ObPluginParamPtr
@NOTE all API should be declared as C interface
#define OBP_PUBLIC_API
void * ObPluginDatum
Used for param type
fulltext parser descriptor interface for domain index splitting a document into many tokenizations....