MiniOB 1
MiniOB is one mini database, helping developers to learn how database works.
载入中...
搜索中...
未找到
codec.h
1/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved.
2miniob is licensed under Mulan PSL v2.
3You can use this software according to the terms and conditions of the Mulan PSL v2.
4You may obtain a copy of Mulan PSL v2 at:
5 http://license.coscl.org.cn/MulanPSL2
6THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
7EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
8MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
9See the Mulan PSL v2 for more details. */
10
11#pragma once
12
13#include <cmath>
14#include "common/lang/span.h"
15#include "common/lang/string.h"
16#include "common/lang/vector.h"
17#include "common/log/log.h"
18#include "common/sys/rc.h"
19#include "common/value.h"
20#include <cstring>
21#include <algorithm>
22
23using byte_t = unsigned char;
24using bytes = vector<byte_t>;
25using float64_t = double_t;
26
27// reference: https://github.com/code0xff/orderedcodepp
29{
30public:
31 static const byte_t term[];
32 static const byte_t lit00[];
33 static const byte_t litff[];
34 static const byte_t inf[];
35 static const byte_t msb[];
36
37 static const byte_t increasing = 0x00;
38 static const byte_t decreasing = 0xff;
39
40 struct infinity
41 {
42 bool operator==(const infinity &i) const { return true; }
43
44 bool operator==(infinity &&i) { return true; }
45 };
46
47 template <typename T>
48 struct decr
49 {
50 T val;
51
52 bool operator==(const decr<T> &o) const { return val == o.val; }
53
54 bool operator==(decr<T> o) { return val == o.val; }
55 };
56
58 {
59 string s;
60 bool inf;
61 };
62
63 struct trailing_string : string
64 {};
65
66 static void invert(span<byte_t> &s)
67 {
68 std::for_each(s.begin(), s.end(), [](byte_t &c) { c ^= 0xff; });
69 }
70
71 static RC append(bytes &s, uint64_t x)
72 {
73 vector<byte_t> buf(9);
74 auto i = 8;
75 for (; x > 0; x >>= 8) {
76 buf[i--] = static_cast<byte_t>(x);
77 }
78 buf[i] = static_cast<byte_t>(8 - i);
79 s.insert(s.end(), buf.begin() + i, buf.end());
80 return RC::SUCCESS;
81 }
82
83 static RC append(bytes &s, int64_t x)
84 {
85 if (x >= -64 && x < 64) {
86 s.insert(s.end(), static_cast<byte_t>(x ^ 0x80));
87 return RC::SUCCESS;
88 }
89 bool neg = x < 0;
90 if (neg) {
91 x = ~x;
92 }
93 auto n = 1;
94 bytes buf(10);
95 auto i = 9;
96 for (; x > 0; x >>= 8) {
97 buf[i--] = static_cast<byte_t>(x);
98 n++;
99 }
100 bool lfb = n > 7;
101 if (lfb) {
102 n -= 7;
103 }
104 if (buf[i + 1] < 1 << (8 - n)) {
105 n--;
106 i++;
107 }
108 buf[i] |= msb[n];
109 if (lfb) {
110 buf[--i] = 0xff;
111 }
112 if (neg) {
113 span<byte_t> sp(&buf[i], buf.size() - i);
114 invert(sp);
115 }
116 s.insert(s.end(), buf.begin() + i, buf.end());
117 return RC::SUCCESS;
118 }
119
120 static RC append(bytes &s, float64_t x)
121 {
122 RC rc = RC::SUCCESS;
123 if (std::isnan(x)) {
124 LOG_WARN("append: NaN");
125 return RC::INVALID_ARGUMENT;
126 }
127 uint64_t b;
128 memcpy(&b, &x, sizeof(x));
129 auto i = int64_t(b);
130 if (i < 0) {
131 i = std::numeric_limits<int64_t>::min() - i;
132 }
133 if (OB_FAIL(append(s, i))) {
134 LOG_WARN("append: append failed, i=%ld, x=%lf", i, x);
135 return rc;
136 }
137 return rc;
138 }
139
140 static RC append(bytes &s, const string &x)
141 {
142 auto l = x.begin();
143 for (auto c = x.begin(); c < x.end(); c++) {
144 switch (byte_t(*c)) {
145 case 0x00:
146 s.insert(s.end(), l, c);
147 s.insert(s.end(), &lit00[0], &lit00[0] + 2);
148 l = c + 1;
149 break;
150 case 0xff:
151 s.insert(s.end(), l, c);
152 s.insert(s.end(), &litff[0], &litff[0] + 2);
153 l = c + 1;
154 }
155 }
156 s.insert(s.end(), l, x.end());
157 s.insert(s.end(), &term[0], &term[0] + 2);
158 return RC::SUCCESS;
159 }
160
161 static RC append(bytes &s, const trailing_string &x)
162 {
163 s.insert(s.end(), x.begin(), x.end());
164 return RC::SUCCESS;
165 }
166
167 static RC append(bytes &s, const infinity &_)
168 {
169 s.insert(s.end(), &inf[0], &inf[0] + 2);
170 return RC::SUCCESS;
171 }
172
173 static RC append(bytes &s, const string_or_infinity &x)
174 {
175 RC rc = RC::SUCCESS;
176 if (x.inf) {
177 if (!x.s.empty()) {
178 LOG_WARN("orderedcode: string_or_infinity has non-zero string and non-zero infinity");
179 return RC::INVALID_ARGUMENT;
180 }
181 if (OB_FAIL(append(s, infinity{}))) {
182 LOG_WARN("orderedcode: append infinity failed");
183 return rc;
184 }
185 } else {
186 if (OB_FAIL(append(s, x.s))) {
187 LOG_WARN("orderedcode: append string failed");
188 return rc;
189 }
190 }
191 return rc;
192 }
193
194 static RC parse(span<byte_t> &s, byte_t dir, int64_t &dst)
195 {
196 if (s.empty()) {
197 LOG_WARN("orderedcode: corrupt input");
198 return RC::INVALID_ARGUMENT;
199 }
200 byte_t c = s[0] ^ dir;
201 if (c >= 0x40 && c < 0xc0) {
202 dst = int64_t(int8_t(c ^ 0x80));
203 s = s.subspan(1);
204 return RC::SUCCESS;
205 }
206 bool neg = (c & 0x80) == 0;
207 if (neg) {
208 c = ~c;
209 dir = ~dir;
210 }
211 size_t n = 0;
212 if (c == 0xff) {
213 if (s.size() == 1) {
214 LOG_WARN("orderedcode: corrupt input");
215 return RC::INVALID_ARGUMENT;
216 }
217 s = s.subspan(1);
218 c = s[0] ^ dir;
219 if (c > 0xc0) {
220 LOG_WARN("orderedcode: corrupt input");
221 return RC::INVALID_ARGUMENT;
222 }
223 n = 7;
224 }
225 for (byte_t mask = 0x80; (c & mask) != 0; mask >>= 1) {
226 c &= ~mask;
227 n++;
228 }
229 if (s.size() < n) {
230 LOG_WARN("orderedcode: corrupt input");
231 return RC::INVALID_ARGUMENT;
232 }
233 int64_t x = c;
234 for (size_t i = 1; i < n; i++) {
235 c = s[i] ^ dir;
236 x = x << 8 | c;
237 }
238 if (neg) {
239 x = ~x;
240 }
241 dst = x;
242 s = s.subspan(n);
243 return RC::SUCCESS;
244 }
245
246 static RC parse(span<byte_t> &s, byte_t dir, uint64_t &dst)
247 {
248 RC rc = RC::SUCCESS;
249 if (s.empty()) {
250 LOG_WARN("orderedcode: corrupt input");
251 return RC::INVALID_ARGUMENT;
252 }
253 byte_t n = s[0] ^ dir;
254 if (n > 8 || (int)s.size() < (1 + n)) {
255 LOG_WARN("orderedcode: corrupt input");
256 return RC::INVALID_ARGUMENT;
257 }
258 uint64_t x = 0;
259 for (size_t i = 0; i < n; i++) {
260 x = x << 8 | (s[1 + i] ^ dir);
261 }
262 dst = x;
263 s = s.subspan(1 + n);
264 return rc;
265 }
266
267 static RC parse(span<byte_t> &s, byte_t dir, infinity &_)
268 {
269 RC rc = RC::SUCCESS;
270 if (s.size() < 2) {
271 LOG_WARN("orderedcode: corrupt input");
272 return RC::INVALID_ARGUMENT;
273 }
274 if ((s[0] ^ dir) != inf[0] || (s[1] ^ dir) != inf[1]) {
275 LOG_WARN("orderedcode: corrupt input");
276 return RC::INVALID_ARGUMENT;
277 }
278 s = s.subspan(2);
279 return rc;
280 }
281
282 static RC parse(span<byte_t> &s, byte_t dir, string &dst)
283 {
284 bytes buf;
285 for (auto l = 0, i = 0; i < (int)s.size();) {
286 switch (s[i] ^ dir) {
287 case 0x00:
288 if (i + 1 >= (int)s.size()) {
289 LOG_WARN("orderedcode: corrupt input");
290 return RC::INVALID_ARGUMENT;
291 }
292 switch (s[i + 1] ^ dir) {
293 case 0x01:
294 dst.clear();
295 if (l == 0 && dir == increasing) {
296 dst.insert(dst.end(), s.begin(), s.begin() + i);
297 s = s.subspan(i + 2);
298 return RC::SUCCESS;
299 }
300 buf.insert(buf.end(), s.begin() + l, s.begin() + i);
301 if (dir == decreasing) {
302 span<byte_t> sp(buf);
303 invert(sp);
304 }
305 dst.insert(dst.end(), buf.begin(), buf.end());
306 s = s.subspan(i + 2);
307 return RC::SUCCESS;
308 case 0xff:
309 buf.insert(buf.end(), s.begin() + l, s.begin() + i);
310 buf.insert(buf.end(), static_cast<byte_t>(0x00 ^ dir));
311 i += 2;
312 l = i;
313 break;
314 default: LOG_WARN("orderedcode: corrupt input"); return RC::INVALID_ARGUMENT;
315 }
316 break;
317 case 0xff:
318 if (i + 1 >= (int)s.size() || ((s[i + 1] ^ dir) != 0x00)) {
319 LOG_WARN("orderedcode: corrupt input");
320 return RC::INVALID_ARGUMENT;
321 }
322 buf.insert(buf.end(), s.begin() + l, s.begin() + i);
323 buf.insert(buf.end(), static_cast<byte_t>(0xff ^ dir));
324 i += 2;
325 l = i;
326 break;
327 default: i++;
328 }
329 }
330 LOG_WARN("orderedcode: corrupt input");
331 return RC::INVALID_ARGUMENT;
332 }
333
334 static RC parse(span<byte_t> &s, byte_t dir, float64_t &dst)
335 {
336 RC rc = RC::SUCCESS;
337 int64_t i = 0;
338 parse(s, dir, i);
339 if (i < 0) {
340 i = ((int64_t)-1 << 63) - i;
341 }
342 memcpy(&dst, &i, sizeof(i));
343 if (std::isnan(dst)) {
344 rc = RC::INVALID_ARGUMENT;
345 }
346 return rc;
347 }
348
349 static RC parse(span<byte_t> &s, byte_t dir, string_or_infinity &dst)
350 {
351 RC rc = RC::SUCCESS;
352 try {
353 infinity _;
354 rc = parse(s, dir, _);
355 dst.inf = true;
356 return rc;
357 } catch (...) {
358 rc = parse(s, dir, dst.s);
359 return rc;
360 }
361 }
362
363 static RC parse(span<byte_t> &s, byte_t dir, trailing_string &dst)
364 {
365 dst.clear();
366 if (dir == increasing) {
367 dst.insert(dst.end(), s.begin(), s.end());
368 } else {
369 invert(s);
370 dst.insert(dst.end(), s.begin(), s.end());
371 }
372 return RC::SUCCESS;
373 }
374};
375
376class Codec
377{
378public:
379 static RC encode_without_rid(int64_t table_id, bytes &encoded_key)
380 {
381 RC rc = RC::SUCCESS;
382 if (OB_FAIL(OrderedCode::append(encoded_key, table_prefix))) {
383 LOG_WARN("append failed");
384 } else if (OB_FAIL(OrderedCode::append(encoded_key, table_id))) {
385 LOG_WARN("append failed");
386 }
387 return rc;
388 }
389 static RC encode(int64_t table_id, uint64_t rid, bytes &encoded_key)
390 {
391 RC rc = RC::SUCCESS;
392 if (OB_FAIL(OrderedCode::append(encoded_key, table_prefix))) {
393 LOG_WARN("append failed");
394 } else if (OB_FAIL(OrderedCode::append(encoded_key, table_id))) {
395 LOG_WARN("append failed");
396 } else if (OB_FAIL(OrderedCode::append(encoded_key, rowkey_prefix))) {
397 LOG_WARN("append failed");
398 } else if (OB_FAIL(OrderedCode::append(encoded_key, rid))) {
399 LOG_WARN("append failed");
400 }
401 return rc;
402 }
403
404 static RC encode_table_prefix(int64_t table_id, bytes &encoded_key)
405 {
406 RC rc = RC::SUCCESS;
407 if (OB_FAIL(OrderedCode::append(encoded_key, table_prefix))) {
408 LOG_WARN("append failed");
409 } else if (OB_FAIL(OrderedCode::append(encoded_key, table_id))) {
410 LOG_WARN("append failed");
411 } else if (OB_FAIL(OrderedCode::append(encoded_key, rowkey_prefix))) {
412 LOG_WARN("append failed");
413 }
414 return rc;
415 }
416
417 static RC encode_value(const Value &val, bytes &dst)
418 {
419 RC rc = RC::SUCCESS;
420 switch (val.attr_type()) {
421 case AttrType::INTS:
422 if (OB_FAIL(OrderedCode::append(dst, (int64_t)val.get_int()))) {
423 LOG_WARN("append failed");
424 }
425 break;
426 case AttrType::FLOATS:
427 if (OB_FAIL(OrderedCode::append(dst, (double)val.get_float()))) {
428 LOG_WARN("append failed");
429 }
430 break;
431 case AttrType::CHARS:
432 if (OB_FAIL(OrderedCode::append(dst, val.get_string()))) {
433 LOG_WARN("append failed");
434 }
435 break;
436 default: return RC::INVALID_ARGUMENT;
437 }
438 return rc;
439 }
440
441 static RC encode_int(int64_t val, bytes &dst)
442 {
443 RC rc = RC::SUCCESS;
444 if (OB_FAIL(OrderedCode::append(dst, val))) {
445 LOG_WARN("append failed");
446 }
447 return rc;
448 }
449
450 static RC decode(bytes &encoded_key, int64_t &table_id)
451 {
452 RC rc = RC::SUCCESS;
453 span<byte_t> sp(encoded_key);
454 string table_prefix;
455 if (OB_FAIL(OrderedCode::parse(sp, OrderedCode::increasing, table_prefix))) {
456 LOG_WARN("parse failed");
457 return rc;
458 } else if (OB_FAIL(OrderedCode::parse(sp, OrderedCode::increasing, table_id))) {
459 LOG_WARN("parse failed");
460 return rc;
461 }
462 return rc;
463 }
464
465 static constexpr const char *table_prefix = "t";
466 static constexpr const char *rowkey_prefix = "r";
467};
468
469// template<typename T>
470// void append(bytes& s, decr<T> d) {
471// size_t n = s.size();
472// append(s, d.val);
473// span<byte_t> sp(&s[n], s.size() - n);
474// invert(sp);
475// }
476
477// template<typename It, typename... Its>
478// void append(bytes& s, It it, Its... its) {
479// append(s, it);
480// append(s, its...);
481// }
482
483// template<typename T>
484// RC parse(span<byte_t>& s, decr<T>& dst) {
485// return parse(s, decreasing, dst.val);
486// }
487
488// template<typename It>
489// RC parse(span<byte_t>& s, It& it) {
490// return parse(s, increasing, it);
491// }
492
493// template<typename It, typename... Its>
494// RC parse(span<byte_t>& s, It& it, Its&... its) {
495// RC rc = RC::SUCCESS;
496// if (OB_FAIL(parse(s, it))) {
497// LOG_WARN("parse failed");
498// return rc;
499// }
500// if (OB_FAIL(parse(s, its...))) {
501// LOG_WARN("parse failed");
502// return rc;
503// }
504// return rc;
505// }
Definition: codec.h:377
Definition: codec.h:29
属性的值
Definition: value.h:30
int get_int() const
Definition: value.cpp:234
Definition: codec.h:49
Definition: codec.h:41
Definition: codec.h:58
Definition: codec.h:64