diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..0d0b4dad7ce90ef4d73db0ab149378d8d24533ef --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags='--cfg CORE_DEBUG="true"' \ No newline at end of file diff --git a/1.bin b/1.bin deleted file mode 100644 index ae41623ec551be8d83267e117d57ff87f5865988..0000000000000000000000000000000000000000 Binary files a/1.bin and /dev/null differ diff --git a/Cargo.toml b/Cargo.toml index 663baf70a491415bd5d55f7d82e56a9ecc5ef241..8b4c9c3adce6834ada5f9fcad08271867a1107fa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,8 +10,29 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +rand = "0.8.5" tokio = "1.24.1" +jemallocator = "0.5" +jemalloc-sys = {version = "0.5", features = ["stats"]} + +[dev-dependencies] +criterion = {version = "0.4.0", features = ["html_reports"]} +crc32fast = "1.3.2" +skiplist = "0.4.0" [profile.dev] + [profile.release] + +[[bench]] +name = "crc_bench" +harness = false + +[[bench]] +name = "skiplist_bench" +harness = false + +[[bench]] +name = "u32_shift" +harness = false \ No newline at end of file diff --git a/README.md b/README.md index c31a5a8f45bee2d97f083203d49430375b6c3e1b..c45a6e31b7a094974aab9f79c930d4cae43d05b7 100644 --- a/README.md +++ b/README.md @@ -25,16 +25,21 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi ## 使用说明 -1. xxxx +1. 编译参数 + CORE_DEBUG 默认开启,打印调试信息 -## 参与贡献 +在构建正式版本时,用户可以用 RUSTFLAGS 环境变量覆盖以上编译参数。 +eg: +```bash +RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release +``` -1. Fork 本仓库 -2. 新建 feat/1.0.0_util_xxx 分支 -3. 提交代码 -4. 新建 Pull Request +#### 参与贡献 -## TODO +1. Fork 本仓库 +2. 新建 feat/1.0.0_util_xxx 分支 +3. 提交代码 +4. 新建 Pull Request [TODO和分工](doc/TODOList.md) @@ -50,7 +55,7 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi |-------------------------------|-----------------|------| | Arena (Memory Management) | wangboo | 100% | | bloom | fengyang | 100% | -| Cache | colagy | 10% | +| Cache | colagy | 30% | | Coding (Primitive Type SerDe) | colagy | 100% | | Comparator | fengyang | 100% | | CRC | wangboo、lxd5866 | 100% | @@ -60,37 +65,51 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | Histgram | kazeseiriou | 100% | | loging | | | | MutexLock | kazeseiriou | 100% | -| Random | colagy | | +| Random | colagy | 100% | | Status | fengyang | 100% | | Slice | wangboo | 100% | -### 1.1.0 -1.1.0 版本, 完成基础零部件 - -| 功能模块 | 完成人 | 进度 | -|----------------------------------------------------------------------------------|--------------|-----| -| util.Options(ReadOptions, WriteOptions) | kazeseiriou | | -| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | | -| util.Logger | peach | | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy | | -| FilterBlock, FilterBlockReader | colagy | | -| table.format(Footer, BlockHandle) | fengyang、半支烟 | | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang、半支烟 | | -| db.SkipList | wangboo | | -| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | -| IteratorWrapper | kazeseiriou | | -| db.MemTable(MemTable, MemTableIterator) | wangboo | | -| SSTable | fengyang | | -| table.Table | peach | | -| db.leveldb_util | wangboo | | -| db.log_format | wangboo | | -| db.LogReader | wangboo | 90% | -| db.LogWriter | wangboo | 90% | -| db.TableCache | colagy | | -| LinkedList | fengyang | | -| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | | -| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | | -| WriteBatch | peach | | +### 1.1.0 版本, 完成基础零部件 + +| 功能模块 | 完成人 | 进度 | +|----------------------------------------------------------------------------------|----------------------|------| +| util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | +| util.ENV(SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | +| util.Logger/Log日志库 | peach | 50% | +| table.format(Footer, BlockHandle) | 半支烟 | 20% | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | +| db.SkipList | wangboo | 100% | +| table.Iterator(DBIter、EmptyIterator) | kazeseiriou | 0% | +| table.Iterator(merger.MergingIterator) | kazeseiriou | 0% | +| table.Iterator(TwoLevelIterator) | kazeseiriou | 0% | +| table.Iterator(tabletest.KeyConvertingIterator) | kazeseiriou | 0% | +| table.Iterator(dbtest.ModelIter) | kazeseiriou | 0% | +| table.Iterator(Block::Iter) | wangboo | 0% | +| IteratorWrapper | kazeseiriou | 0% | +| db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | +| db.Builder | fengyang | 85% | +| table.table_builder | fengyang | 30% | +| table.Block | fengyang | 30% | +| table.BlockBuilder | fengyang | 30% | +| table.FilterBlockBuilder | fengyang | 30% | +| FilterBlock, FilterBlockReader | fengyang | 80% | +| SSTable | fengyang | 0% | +| table.Table | peach,tzcyujunyong | 30% | +| db.leveldb_util | wangboo | 0% | +| db.log_format | wangboo | 90% | +| db.LogReader | wangboo | 90% | +| db.LogWriter | wangboo | 90% | +| db.TableCache | colagy | 10% | +| LinkedList | fengyang | 60% | +| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | +| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | +| WriteBatch | tzcyujunyong,wangboo | 50% | +| db.filename | | | +| | 半支烟 | 40% | + + + + #### 1.1.0 计划 * 完成gitee -> github (同步) 主仓库gitee diff --git a/benches/crc_bench.rs b/benches/crc_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..e1bdc08bd943ad388c470cfd0da97b29d6f79124 --- /dev/null +++ b/benches/crc_bench.rs @@ -0,0 +1,24 @@ +use criterion::{Criterion, criterion_group, criterion_main}; +use rand::RngCore; +use level_db_rust::util::crc::CRC; + +pub const SRC_DATA: [u8; 512] = [0; 512]; + +pub fn default_crc_bench(c: &mut Criterion) { + let mut rnd = rand::thread_rng(); + c.bench_function("default_crc", |b| { + b.iter(|| { + rnd.fill_bytes(&mut SRC_DATA); + CRC::value(&SRC_DATA); + }); + }); + c.bench_function("crc32fast", |b| { + b.iter(|| { + rnd.fill_bytes(&mut SRC_DATA); + crc32fast::hash(&SRC_DATA); + }); + }); +} + +criterion_group!(benches, default_crc_bench); +criterion_main!(benches); \ No newline at end of file diff --git a/benches/skiplist_bench.rs b/benches/skiplist_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..bc95bd7077288f26c6b37f00cc7ec094064e60e3 --- /dev/null +++ b/benches/skiplist_bench.rs @@ -0,0 +1,64 @@ +use std::sync::{Arc, Mutex}; + +use criterion::{Criterion, criterion_group, criterion_main}; +use rand::Rng; +use skiplist::OrderedSkipList; + +use level_db_rust::db::skip_list::SkipList; +use level_db_rust::util::Arena; +use level_db_rust::util::arena::ArenaRef; +use level_db_rust::util::comparator::BytewiseComparatorImpl; +use level_db_rust::util::unsafe_slice::TryIntoUnsafeSlice; + + +const BENCH_TIMES: usize = 128; + +pub fn skiplist_bench(c: &mut Criterion) { + // 生成测试样本,保证两次测试都是相同的次数 + let mut rnd = rand::thread_rng(); + let mut every_bench_times = [0; BENCH_TIMES]; + for i in 0..BENCH_TIMES { + every_bench_times[i] = rnd.gen_range(32..20480); + } + + c.bench_function("default_skiplist", |b| { + let mut i = 0; + b.iter(|| { + let cmp = Arc::new(BytewiseComparatorImpl::default()); + let arena = Arc::new(Mutex::new(Arena::default())); + let list = SkipList::create(cmp, arena.clone()); + bench_default_skiplist(list, arena, every_bench_times[i % BENCH_TIMES]); + i += 1; + }); + }); + + c.bench_function("skiplist-0.4.0", |b| { + let mut i = 0; + b.iter(|| { + let list: OrderedSkipList = unsafe { + OrderedSkipList::with_comp(|a: &String, b: &String| { + a.cmp(b) + }) + }; + bench_skiplist_v_0_4_0(list, every_bench_times[i % BENCH_TIMES]); + i += 1; + }); + }); +} + +fn bench_default_skiplist(mut list: SkipList, arena: ArenaRef, record_count: usize) { + for j in 0..record_count { + let value = format!("key_{}", j); + list.insert(value.try_into_unsafe_slice(arena.clone()).unwrap()).unwrap(); + } +} + +fn bench_skiplist_v_0_4_0(mut list: OrderedSkipList, record_count: usize) { + for j in 0..record_count { + let value = format!("key_{}", j); + list.insert(value.clone()); + } +} + +criterion_group!(benches, skiplist_bench); +criterion_main!(benches); \ No newline at end of file diff --git a/benches/u32_shift.rs b/benches/u32_shift.rs new file mode 100644 index 0000000000000000000000000000000000000000..f9a2eb5835896e2db9ea149425cea5e0c91efe6e --- /dev/null +++ b/benches/u32_shift.rs @@ -0,0 +1,46 @@ +use std::{mem, slice}; +use std::alloc::{alloc, Layout}; +use std::io::Write; + +use criterion::{Criterion, criterion_group, criterion_main}; +use level_db_rust::debug; + +pub fn u32_shift_bench(c: &mut Criterion) { + let mut data = [0_u8; 4]; + let mut buf = data.as_mut_slice(); + let value = 12345678_u32; + let mut g = c.benchmark_group("u32_shift"); + + g.bench_function("to_ne_bytes", |g| { + g.iter(|| { + buf.write(&value.to_be_bytes()).unwrap(); + }); + }); + buf = data.as_mut_slice(); + buf.fill(0); // reset + debug!("is big endian: {}", cfg!(target_endian = "big")); + g.bench_function("raw_write", |g| { + g.iter(|| { + unsafe { + if cfg!(target_endian = "big") { + (buf.as_mut_ptr() as *mut u32).write(value); + } else { + (buf.as_mut_ptr() as *mut u32).write(value.swap_bytes()); + } + } + }); + }); + buf = data.as_mut_slice(); + buf.fill(0); // reset + g.bench_function("shift_bytes", |g| { + g.iter(|| { + buf[0] = ((value >> 0) & 0xff) as u8; + buf[1] = ((value >> 1) & 0xff) as u8; + buf[2] = ((value >> 2) & 0xff) as u8; + buf[3] = ((value >> 3) & 0xff) as u8; + }); + }); +} + +criterion_group!(benches, u32_shift_bench); +criterion_main!(benches); \ No newline at end of file diff --git a/src/db/builder.rs b/src/db/builder.rs new file mode 100644 index 0000000000000000000000000000000000000000..5a35ddcab12b159a317b26b36f41df544298eb6a --- /dev/null +++ b/src/db/builder.rs @@ -0,0 +1,133 @@ +use std::error::Error; +use std::fs::File; +use std::io; +use std::ops::Deref; +use std::sync::Arc; +use crate::db::file_meta_data::FileMetaData; +use crate::db::filename::FileName; +use crate::db::table_cache::TableCache; +use crate::table::table::Table; +use crate::table::table_builder::TableBuilder; +use crate::traits::DataIterator; +use crate::util::env::Env; +use crate::util::options::{Options, OptionsPtr, ReadOptions}; +use crate::util::Result; +use crate::util::slice::Slice; +use crate::util::status::{LevelError, Status}; + +pub struct BuildTable {} + +impl BuildTable { + + /// + /// 生成 SSTable + /// + /// Build a Table file from the contents of *iter. + /// The generated file will be named according to meta->number. + /// On success, the rest of meta will be filled with metadata about the generated table. + /// If no data is present in *iter, meta->file_size will be set to zero, and no Table file will be produced. + /// + /// # Arguments + /// + /// * `dbname`: + /// * `env`: + /// * `options`: + /// * `table_cache`: + /// * `iter`: + /// * `meta`: + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn build_table(dbname: &Slice, env: &Env, options: OptionsPtr, + table_cache: &TableCache, mut iter: Box, + meta: &mut FileMetaData) -> Result { + meta.set_file_size(0); + // 迭代器移动到第一个节点 + iter.seek_to_first(); + // 生成一个 SSTable 文件名 + let file_name = FileName::table_file_name(dbname, meta.get_number()); + + let mut s : Status = Status::default(); + + if iter.valid() { + let fileRS: Result = env.new_writable_file(&file_name); + if(!fileRS.is_ok()){ + return Err(fileRS.err().unwrap()); + } + + let writable_file = Arc::new(fileRS.unwrap()); + // 生成一个 TableBuilder + let builder: TableBuilder = TableBuilder::new_with_writable_file(options, writable_file.clone()); + + meta.get_smallest().decode_from(&iter.key()); + + // 调用迭代器,依次将每个键-值对加入 TableBuilder + while iter.valid() { + iter.next(); + + let key = iter.key(); + meta.get_largest().decode_from(&key); + builder.add(&key, &iter.value()); + } + + // Finish and check for builder errors + // 调用 TableBuilder 的 Finish 函数生成 SSTable 文件 + s = builder.finish(); + if s.is_ok() { + meta.set_file_size(builder.get_file_size()); + assert!(meta.get_file_size() > 0); + } + + // Finish and check for file errors + // 将文件刷新到磁盘 + if s.is_ok() { + let rs:io::Result<()> = writable_file.sync_data(); + if rs.is_ok() { + s = Status::default(); + }else{ + s = Status::wrapper_str(LevelError::KIOError, rs.unwrap_err().to_string().as_str()); + } + } + // 关闭文件 + // if s.is_ok() { + // writableFile.close + // } + + if s.is_ok() { + let readOptions = ReadOptions::default(); + // Verify that the table is usable + let it: Box = table_cache.new_iterator(&readOptions, + meta.get_number(), + meta.get_file_size() as usize, + &Table::new()) + .expect("table_cache.new_iterator error"); + s = it.status(); + } + } // if end + + // Check for input iterator errors + if !iter.status().is_ok() { + s = iter.status(); + } + + if s.is_ok() && meta.get_file_size() > 0 { + // Keep it + } else { + // DeleteFile fname + // todo + } + + if s.is_ok() { + // todo + // return Ok(meta); + return Ok(FileMetaData::default()); + }else{ + return Err(s); + } + } +} \ No newline at end of file diff --git a/src/db/builder_test.rs b/src/db/builder_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..46899e30533b29eaa3fe7517b405395bc6b85088 --- /dev/null +++ b/src/db/builder_test.rs @@ -0,0 +1,12 @@ + +mod test { + use crate::db::builder::BuildTable; + use crate::db::table_cache::TableCache; + use crate::util::options::Options; + use crate::util::slice::Slice; + + #[test] + fn test_build_table() { + println!("get_name: {}", "a"); + } +} \ No newline at end of file diff --git a/src/db/db_format.rs b/src/db/db_format.rs index 891b90651e6c4f77f50fa837693675794d3045b9..f318f2c4804de49d4fa8124c01c5c2bc20c49ccd 100644 --- a/src/db/db_format.rs +++ b/src/db/db_format.rs @@ -1,16 +1,20 @@ use std::cmp::Ordering; -use std::ops::Deref; -use crate::db::db_format::ValueType::{K_TYPE_DELETION, K_TYPE_VALUE}; +use std::io::Write; +use std::sync::Arc; +use crate::db::db_format::ValueType::{KTypeDeletion, KTypeValue}; use crate::db::file_meta_data::FileMetaData; +use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; +use crate::util::coding::Coding; use crate::util::slice::Slice; +use crate::util::unsafe_slice::UnsafeSlice; pub enum ValueType { /// 0x0 - K_TYPE_DELETION, + KTypeDeletion, /// 0x1 - K_TYPE_VALUE, + KTypeValue, } pub struct ParsedInternalKey { @@ -26,33 +30,23 @@ pub struct InternalKey { /// InternalKeyComparator pub struct InternalKeyComparator { - user_comparator_: dyn Comparator + user_comparator_: Arc } /// 查找键 // todo add clone trait pub struct LookupKey { - // We construct a char array of the form: - // klength varint32 <-- start_ - // userkey char[klength] <-- kstart_ - // tag uint64 - // <-- end_ - // The array is a suitable MemTable key. - // The suffix starting with "userkey" can be used as an InternalKey. - - start_: Slice, - kstart_: Slice, - end_: Slice, - - // Avoid allocation for short keys - space_: [u8; 200], + /// |klength(varint32)|user key(string)|sequence number(7 bytes)|value type(1 byte)| + data: Slice, + /// start index at user key + user_key_start: usize, } impl ValueType { - pub fn get_value(&self) -> i32 { + pub fn get_value(&self) -> usize { let le = match self { - K_TYPE_DELETION => 0, - K_TYPE_VALUE => 1 + KTypeDeletion => 0, + KTypeValue => 1 }; le @@ -79,8 +73,8 @@ impl TryFrom for ValueType { #[inline] fn try_from(value: i32) -> Result { match value { - 0 => Ok(K_TYPE_DELETION), - 1 => Ok(K_TYPE_VALUE), + 0 => Ok(KTypeDeletion), + 1 => Ok(KTypeValue), // all other numbers _ => Err(String::from(format!("Unknown code: {}", value))) } @@ -93,28 +87,28 @@ impl Default for ParsedInternalKey { ParsedInternalKey { user_key: Default::default(), sequence: 0, - value_type: K_TYPE_DELETION, + value_type: KTypeDeletion, } } } impl ParsedInternalKey { - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { Slice::default() } /// Return the length of the encoding of "key". - fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { + pub fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { key.user_key.size() + 8 } // 将 self 的数据追加到 result 中 - fn append_internal_key(&self, result: Slice) { + pub fn append_internal_key(&self, result: Slice) { todo!() } - fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + pub fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { Self { user_key, sequence, @@ -125,13 +119,13 @@ impl ParsedInternalKey { /// Attempt to parse an internal key from "internal_key". On success, /// stores the parsed data in "*result", and returns true. /// On error, returns false, leaves "*result" in an undefined state. - fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { + pub fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { // line 173 todo!() } /// Returns the user key portion of an internal key. - fn extract_user_key(internal_key : Slice) -> Slice { + pub fn extract_user_key(internal_key : Slice) -> Slice { todo!() } } @@ -154,7 +148,7 @@ impl PartialEq for InternalKey { } impl InternalKey { - fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + pub fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { // line 145 let result: Slice = Slice::default(); ParsedInternalKey::new(user_key, sequence, value_type) @@ -178,7 +172,7 @@ impl InternalKey { /// ``` /// /// ``` - fn decode_from(&self, input: Slice) { + pub fn decode_from(&self, input: &UnsafeSlice) { todo!() // wangbo @@ -186,31 +180,42 @@ impl InternalKey { } /// 输出 InternalKey 调试信息 - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { // line 164 todo!() } - fn encode(self) -> Slice { + pub fn encode(self) -> Slice { self.rep_ } - fn user_key(self) -> Slice { + /// 取得 Slice的长度 + pub fn encode_len(&self) -> usize { + self.rep_.size() + } + + pub fn user_key(self) -> Slice { ParsedInternalKey::extract_user_key(self.rep_) } - fn set_from(self, p: ParsedInternalKey) { + pub fn set_from(self, p: ParsedInternalKey) { // self.rep_.clear(); p.append_internal_key(self.rep_); } - fn clear(self) { + pub fn clear(self) { // self.rep_.clear(); } } +impl Default for InternalKeyComparator { + fn default() -> Self { + todo!() + } +} + impl InternalKeyComparator { - pub fn create(c: Box) -> Box { + pub fn create(_cmp: Box) -> Box { todo!() } @@ -231,7 +236,7 @@ impl Comparator for InternalKeyComparator { // todo!() // } - fn compare(&self, _a: &Slice, _b: &Slice) -> Option { + fn compare(&self, _a: &[u8], _b: &[u8]) -> Option { todo!() } @@ -251,24 +256,43 @@ impl Comparator for InternalKeyComparator { impl LookupKey { /// Initialize *this for looking up user_key at a snapshot with /// the specified sequence number. - fn new(user_key: Slice, sequence: u64) -> Self { - // todo - todo!() + pub fn new(user_key: Slice, sequence: usize) -> Self { + let user_key_size = user_key.size(); + let need = user_key_size + 13; // A conservative estimate + let mut data = Vec::with_capacity(need); + let buf = data.as_mut_slice(); + let klength = Coding::varint_length(user_key_size + 8); + let mut offset = 0; + // write key size + offset = Coding::encode_varint32(klength as u32, buf, offset); + // write key slice + offset += (&mut buf[offset..]).write(user_key.as_ref()).expect("write user_key"); + // write sequence number and value type + Coding::encode_fixed64( + pack_sequence_and_type(sequence, ValueType::KTypeValue), + buf, offset); + + LookupKey { + data: Slice::from_vec(data), + user_key_start: klength + } } /// Return a key suitable for lookup in a MemTable. - fn mem_table_key(&self) -> Slice { - todo!() + pub fn mem_table_key(&self) -> Slice { + self.data.clone() } /// Return an internal key (suitable for passing to an internal iterator) - fn internal_key(&self) -> Slice { + pub fn internal_key(&self) -> Slice { // line 204 - todo!() + let buf = self.data.as_ref(); + let internal_key_buf = &buf[self.user_key_start..]; + Slice::from_buf(internal_key_buf.clone()) } /// Return the user key - fn user_key(&self) -> Slice { + pub fn user_key(&self) -> Slice { // line 207 todo!() } @@ -293,6 +317,14 @@ impl LookupKey { // } // } +const K_MAX_SEQUENCE_NUMBER: usize = (1 << 56) - 1; + +#[inline] +pub fn pack_sequence_and_type(seq_no: usize, v_type: ValueType) -> u64 { + debug_assert!(seq_no <= K_MAX_SEQUENCE_NUMBER); + debug_assert!(v_type.get_value() <= 1); + ((seq_no << 8) | v_type.get_value()) as u64 +} pub struct Config {} impl Config { @@ -325,5 +357,5 @@ impl Config { // and the value type is embedded as the low 8 bits in the sequence // number in internal keys, we need to use the highest-numbered // ValueType, not the lowest). - pub const K_VALUE_TYPE_FOR_SEEK: ValueType = ValueType::K_TYPE_VALUE; + pub const K_VALUE_TYPE_FOR_SEEK: ValueType = ValueType::KTypeValue; } \ No newline at end of file diff --git a/src/db/file_meta_data.rs b/src/db/file_meta_data.rs index bc67682954616e298755d3232aff10decc9772d9..b04f6d4bedaaf327b569780d5b1e6e4ed3a2330b 100644 --- a/src/db/file_meta_data.rs +++ b/src/db/file_meta_data.rs @@ -68,12 +68,27 @@ impl FileMetaData { } } + pub fn get_number(&self) -> u64 { + self.number + } + + /// File size in bytes + pub fn get_file_size(&self) -> u64 { + self.file_size + } + + pub fn set_file_size(&mut self, file_size: u64) { + self.file_size = file_size; + } + + /// Smallest internal key served by table pub fn get_smallest(&self) -> &InternalKey { &self.smallest } - pub fn get_number(&self) -> u64 { - self.number + /// Largest internal key served by table + pub fn get_largest(&self) -> &InternalKey { + &self.largest } pub fn get_refs(&self) -> i32 { diff --git a/src/db/filename.rs b/src/db/filename.rs new file mode 100644 index 0000000000000000000000000000000000000000..8c47c9da00daaad9fdab1c6a8262b23985ce0575 --- /dev/null +++ b/src/db/filename.rs @@ -0,0 +1,12 @@ +use crate::util::slice::Slice; + +// TODo 参考 PathBuf +pub struct FileName { + +} + +impl FileName { + pub fn table_file_name(dbname: &Slice, number : u64) -> Slice{ + todo!() + } +} \ No newline at end of file diff --git a/src/db/filename_test.rs b/src/db/filename_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/db/log_wr_test.rs b/src/db/log_wr_test.rs index 98b16481a9dfbeafe726b4da4788d456b99319f4..001b1e750bb3cb5d9803aee1011324de4eb10b5b 100644 --- a/src/db/log_wr_test.rs +++ b/src/db/log_wr_test.rs @@ -6,12 +6,12 @@ mod test { use crate::traits::coding_trait::CodingTrait; use crate::util::coding::Coding; use crate::util::crc::{AsCrc, ToMask}; - use crate::util::Result; use crate::util::slice::Slice; + use crate::util::Result; #[test] fn write() -> Result<()> { - let file = box File::create("../../1.bin")?; + let file = Box::new(File::create("../../1.bin")?); let mut writer = LogWriter::new(file); let sample: Vec = ('0'..='9').map(|a|a as u8).collect(); for i in 0..100 { @@ -23,12 +23,12 @@ mod test { #[test] fn read() -> Result<()> { - let file = box File::open("../../1.bin")?; + let file = Box::new(File::open("../../1.bin")?); let mut reader = LogReader::new(file, true, 0); let sample: Vec = ('0'..='9').map(|a|a as u8).collect(); for i in 0..100 { let slice = reader.read_next().expect("not error").expect("must have record"); - let mut expect = generate_slice(i, &sample); + let expect = generate_slice(i, &sample); assert_eq!(expect.len(), slice.len()); assert_eq!(expect.as_ref(), slice.as_ref()) } diff --git a/src/db/mem_table.rs b/src/db/mem_table.rs index e0d99ce520db67dc86b2af526805eec584638ebb..3a521fb483ff8ce8da7905820b41f625e9dd77e8 100644 --- a/src/db/mem_table.rs +++ b/src/db/mem_table.rs @@ -1,23 +1,23 @@ -use std::rc::Rc; +use std::io::Write; +use std::sync::{Arc, Mutex}; +use crate::db::db_format::{LookupKey, ValueType}; +use crate::db::skip_list::SkipList; +use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; use crate::traits::DataIterator; +use crate::util::arena::ArenaRef; use crate::util::slice::Slice; - -use crate::util::Result; - -pub enum ValueType { - Insert, - Deletion, -} +use crate::util::{Arena, Result}; +use crate::util::coding::Coding; +use crate::util::unsafe_slice::UnsafeSlice; /// 内存表 pub struct MemTable { - cmp: Rc, + cmp: Arc, + list: SkipList, + arena: ArenaRef, } -/// 临时, 查找键 -pub struct LookupKey {} - impl MemTable { /// 创建内存表 @@ -33,15 +33,20 @@ impl MemTable { /// ``` /// let mt = MemTable::create(cmp); /// ``` - pub fn create(cmp: Rc) -> Self { + pub fn create(cmp: Arc) -> Self { + let arena = Arc::new(Mutex::new(Arena::default())); + let list = SkipList::create(cmp.clone(), arena.clone()); Self { cmp, + list, + arena } } /// 返回该表使用的内存近似值 + #[inline] pub fn approximate_memory_usage(&self) -> usize { - todo!() + self.arena.lock().unwrap().memory_usage() } /// 创建内存表迭代器 @@ -54,15 +59,36 @@ impl MemTable { /// /// ``` /// let mem = MemTable::create(comp); - /// let it = mem::new_new_iterator()?; + /// let it = mem.new_new_iterator()?; /// ``` pub fn new_iterator(&self) -> Result> { todo!() } /// 像内存表中写入或删除一个元素 - pub fn add(&mut self, _seq_no: usize, _v_type: ValueType, _key: &Slice, _value: Slice) -> Result<()> { - todo!() + pub fn add>(&mut self, seq_no: usize, v_type: ValueType, key: &R, value: &R) -> Result<()> { + let key_buf = key.as_ref(); + let value_buf = value.as_ref(); + let key_size = key_buf.len(); + let value_size = value_buf.len(); + let internal_key_size = key_size + 8; + let encoded_len = Coding::varint_length(key_size) + + internal_key_size + + Coding::varint_length(value_size) + + value_size; + let mut lock = self.arena.lock()?; + let buf = lock.allocate(encoded_len); + let mut offset = 0; + // write key size + offset = Coding::encode_varint32(internal_key_size as u32, buf, offset); + // write key slice + offset += (&mut buf[offset..]).write(key_buf)?; + // write seq_no and type + offset = Coding::encode_fixed64((seq_no << 8 | v_type.get_value()) as u64, buf, offset); + // write value slice + (&mut buf[offset..]).write(value_buf)?; + // let slice = Slice::from_buf(buf); + self.list.insert(UnsafeSlice::new_with_arena(buf, self.arena.clone())?) } /// 通过 key 查找结果 diff --git a/src/db/mod.rs b/src/db/mod.rs index 0acd557b822986f8a0c2af170953efbcd4f949b2..0d48432cff9606b1bdbf2469b4184d5a479704f0 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,8 +1,7 @@ use crate::db::db_format::InternalKeyComparator; use crate::db::skip_list::SkipList; use crate::db::mem_table::MemTable; -use crate::util::comparator::{BytewiseComparatorImpl}; -use crate::util::slice::Slice; +use crate::util::comparator::BytewiseComparatorImpl; pub mod log_writer; pub mod log_reader; @@ -11,6 +10,7 @@ mod log_wr_test; pub mod skip_list; pub mod mem_table; pub mod db; +mod skip_list_test; pub mod db_format; mod db_format_test; pub mod file_meta_data; @@ -19,8 +19,12 @@ pub mod version_set; mod version_set_test; pub mod version_edit; mod version_edit_test; +pub mod builder; +mod builder_test; +pub mod filename; +mod filename_test; /// 默认调表 -pub type DefaultSkipList = SkipList; +pub type DefaultSkipList = SkipList; /// 默认内存表 -pub type DefaultMemTable = MemTable; \ No newline at end of file +pub type DefaultMemTable = MemTable; diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 01331f22de72120f0ce50c5255778640c6a2bc2b..1bc28f2574a6bf951f320e98de4a94fccf725718 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -1,39 +1,491 @@ -use std::rc::Rc; +use std::cmp::Ordering; +use std::mem; +use std::mem::size_of; +use std::ptr::null_mut; +use std::sync::{Arc, RwLock}; +use rand::prelude::*; + +use crate::debug; use crate::traits::comparator_trait::Comparator; -use crate::util::Arena; -use crate::util::comparator::BytewiseComparatorImpl; -use crate::util::Result; +use crate::traits::DataIterator; +use crate::util::{Arena, Result}; +use crate::util::arena::ArenaRef; use crate::util::slice::Slice; +use crate::util::status::{LevelError, Status}; +use crate::util::unsafe_slice::UnsafeSlice; + +type RawNode = *mut Node; + +const MAX_LEVEL: usize = 8; + +struct Node { + /// 存储的值, 如果为空,则是头指针或者尾指针 + key: Option, + /// 数组元素首地址,代表一个数组,指向每层的下一个节点。 + next_elems: *mut RawNode, + /// 当前节点高度 + level: usize, +} + +pub struct SkipList { + /// 最高层数 + height: usize, + /// 存储数据数量 + num: usize, + /// 头部指针 + head: RawNode, + /// 尾指针 + tail: RawNode, + /// 比较器 + cmp: Arc, + /// 内存分配器 + arena: ArenaRef, +} + +pub struct Iter { + head: RawNode, + tail: RawNode, + current: RawNode, + cmp: Arc, +} + +impl SkipList { + pub fn create(comparator: Arc, arena: ArenaRef) -> Self { + Self { + height: 0, + num: 0, + head: Node::create_head(arena.clone()), + tail: Node::create_tail(), + cmp: comparator, + arena, + } + } + + pub fn insert(&mut self, key: UnsafeSlice) -> Result<()> { + // TODO 这里是否可以优化 + if self.contains(&key) { + return Ok(()); + } + if self.num == 0 { + self.insert_ele0(key) + } else { + unsafe { + self.insert_elen(key) + } + } + } + + #[inline] + fn insert_ele0(&mut self, key: UnsafeSlice) -> Result<()> { + let level = rand_level(); + debug!("insert {}, level: {}", String::from_utf8_lossy(key.as_ref()), level); + let node = Node::create(key, level, self.arena.clone()); + // head bind node + // TODO, use macro to expand for-loop + unsafe { + (&mut *node).level = level; + (&mut *self.head).level = level; + for l in 0..level { + (&mut *self.head).set_node(l, node); + (&mut *node).set_node(l, self.tail); + } + } + self.height = level; + self.num = 1; + return Ok(()); + } + + unsafe fn insert_elen(&mut self, key: UnsafeSlice) -> Result<()> { + let mut current = self.head; + let node_height = rand_level(); + let node_top_level = node_height - 1; + debug!("insert {}, level: {}", &key, node_height); + let node_ptr = Node::create(key, node_height, self.arena.clone()); + let node = unsafe { &mut *node_ptr }; + // loop from highest level to 0 + for l in (0..self.height).rev() { + 'inner_loop: loop { + let ele_ptr = unsafe { (&*current).get_node(l) }; + let ele = unsafe { &mut *ele_ptr }; + if ele.is_tail() { + if l <= node_top_level { + // ele is tail node, add node to last + (&mut *current).set_node(l, node_ptr); + node.set_node(l, self.tail); + debug!("bind: {} before: {}, after: , at level: {}", + node.key.unwrap(), + (&*current).key.unwrap(), + l); + } + break 'inner_loop; + } else { + match self.cmp.compare(node.key.unwrap().as_ref(), ele.key.unwrap().as_ref()) { + Some(Ordering::Less) => { + // node higher than current level at ele + if node_top_level >= l { + (&mut *current).set_node(l, node_ptr); + node.set_node(l, ele_ptr); + if (&*current).is_head() { + debug!("bind: {} before: , after: {}, at level: {}", + node.key.unwrap(), + ele.key.unwrap(), + l); + } else { + debug!("bind: {} before: {}, after: {}, at level: {}", + node.key.unwrap(), + (&*current).key.unwrap(), + ele.key.unwrap(), + l); + } + } + break 'inner_loop; + } + Some(Ordering::Greater) => { + current = ele; + } + Some(Ordering::Equal) => { + // ignore equals + return Ok(()); + } + None => { + return Err(Status::wrapper(LevelError::KInvalidArgument, "key not comparable".into())); + } + } + } + } + } + // if head level is less than new node, then fix head node height + if self.height < node_height { + for l in (self.height()..node_height).rev() { + (&mut *self.head).set_node(l, node_ptr); + node.set_node(l, self.tail); + } + self.height = node_height; + } + self.num += 1; + Ok(()) + } + + pub fn contains>(&self, key: &R) -> bool { + let key_buf = key.as_ref(); + debug!("================== begin contains, key: {} ==================", String::from_utf8_lossy(key_buf)); + if self.num == 0 { + return false; + } + unsafe { + let mut current = unsafe { &*self.head }; + for level in (0..self.height).rev() { + 'a_loop: loop { + let ele_ptr = current.get_node(level); + let ele = &*ele_ptr; + if ele.is_tail() { + // tail node + if level == 0 { + debug!("next is tail, return false"); + return false; + } else { + debug!("next is tail, continue"); + break 'a_loop; + } + } + { + debug!("node: {} at level: {}", ele.key.unwrap(), level) + } + match self.cmp.compare(key_buf, ele.key.unwrap().as_ref()) { + None => return false, + Some(Ordering::Equal) => return true, + Some(Ordering::Less) => { + // break loop, decrease the level + break; + } + Some(Ordering::Greater) => { + if current.level() == 0 { + return false; + } + current = ele; + } + }; + } + } + } + // can not found in all level + false + } + + unsafe fn find_eq_or_greater>(&self, key: &R) -> Option { + todo!() + } + + #[inline] + pub fn max_height(&self) -> usize { + MAX_LEVEL + } + + #[inline] + pub fn height(&self) -> usize { + self.height + } + + #[inline] + pub fn len(&self) -> usize { + self.num + } + + #[inline] + pub fn iter(&self) -> Iter { + Iter::create(&self) + } + + #[inline] + pub fn memory_usage(&self) -> usize { + let a = Arc::new(RwLock::new(Arena::default())); + a.read().unwrap().memory_usage(); + self.arena.lock().unwrap().memory_usage() + } + + fn rnd_level(&self) -> usize { + let mut level = 1; + for _ in 1..MAX_LEVEL { + if random() { + level += 1; + } + } + level + } +} + +impl ToString for SkipList { + fn to_string(&self) -> String { + let mut tree = String::with_capacity(1024); + // calculate each item width + let mut widths = Vec::with_capacity(tree.len()); + self.iter().for_each(|s| { + widths.push(s.len()); + }); + // print value list + if self.num > 0 { + unsafe { + let mut node = &*((&*self.head).get_node(0)); + tree.push_str("[head]"); + while !node.is_head_or_tail() { + tree.push_str(" -> "); + tree.push_str(node.key.unwrap().as_str()); + let level_str = format!("({})", node.level); + tree.push_str(level_str.as_str()); + node = &*node.get_node(0); + } + } + } + tree.push_str("-> [tail]"); + format!("height: {}, num: {}\n {}", self.height, self.num, tree) + } +} + + +impl Node { + #[inline] + fn create(src: UnsafeSlice, level: usize, arena: ArenaRef) -> RawNode { + let node = Box::new(Self { + key: Some(src), + next_elems: allocate_next_elems(arena), + level, + }); + Box::into_raw(node) + } -// todo -struct Node { - value: T, + #[inline] + fn create_head(arena: ArenaRef) -> RawNode { + let node = Box::new(Self { + key: None, + next_elems: allocate_next_elems(arena), + level: MAX_LEVEL, + }); + Box::into_raw(node) + } + + #[inline] + fn create_tail() -> RawNode { + let node = Box::new(Self { + key: None, + next_elems: null_mut(), + level: 0, + }); + Box::into_raw(node) + } + + #[inline] + #[must_use] + fn is_head_or_tail(&self) -> bool { + self.key.is_none() + } + + #[inline] + #[must_use] + fn is_tail(&self) -> bool { + self.key.is_none() && self.level == 0 + } + + #[inline] + #[must_use] + fn is_head(&self) -> bool { + self.key.is_none() && self.level > 0 + } + + + #[inline] + fn level(&self) -> usize { + self.level + } + + #[inline] + #[must_use] + unsafe fn get_node(&self, level: usize) -> RawNode { + assert!(level < MAX_LEVEL); + self.next_elems.offset(level as isize).read() + } + + #[inline] + unsafe fn set_node(&mut self, level: usize, node: RawNode) { + assert!(level < MAX_LEVEL); + self.next_elems.offset(level as isize).write(node); + } + + /// 找到最后一个数据元素 + unsafe fn seek_to_last(&self) -> Option { + if self.is_tail() { + return None; + } + let mut pre = self; + let mut cur = &*self.next_top_node(); + loop { + if cur.is_tail() { + return Some(pre as *const Node as *mut Node); + } + pre = cur; + cur = &*cur.next_top_node(); + } + } + + /// 找到最上层的下一个元素 + #[inline] + unsafe fn next_top_node(&self) -> RawNode { + self.get_node(self.level - 1) + } +} + +fn rand_level() -> usize { + let mut level = 1_usize; + while random::() { + level += 1; + if level >= MAX_LEVEL { + break; + } + } + level } -pub struct SkipList { - node: Option>, - comp: Rc, +fn allocate_next_elems(arena: ArenaRef) -> *mut RawNode { + // RawNode is a raw ptr + assert_eq!(size_of::(), size_of::()); + // allocate next_elems to 8 capacity array + let elems_size = size_of::() * MAX_LEVEL; + let mut lock = arena.lock().expect("lock arena"); + let elems_ptr = lock.allocate(elems_size); + // transmute raw ptr to RawNode ptr + unsafe { + mem::transmute(elems_ptr.as_ptr()) + } } -impl SkipList { +#[inline] +fn min_max(a: usize, b: usize) -> (usize, usize) { + if a < b { + (a, b) + } else { + (b, a) + } +} - pub fn create(comparator: Rc, _arena: Rc) -> Self { +// 'b lifetime is bigger than 'a +impl Iter { + fn create(list: &SkipList) -> Self { Self { - node: None, - comp: comparator, + head: list.head, + tail: list.tail, + current: list.head, + cmp: list.cmp.clone(), + } + } +} + +impl Iterator for Iter { + type Item = UnsafeSlice; + + #[inline] + fn next(&mut self) -> Option { + unsafe { + if (&*self.current).is_tail() { + return None; + } else { + self.current = (&*self.current).get_node(0); + } + (&*self.current).key + } + } +} + +impl DataIterator for Iter { + #[inline] + fn valid(&self) -> bool { + unsafe { + (&*self.current).is_head_or_tail() + } + } + + #[inline] + fn seek_to_first(&mut self) { + self.current = unsafe { + (&*self.head).get_node(0) } } - pub fn insert(&mut self, _seq_no: usize, _key: &Slice) -> Result<()> { + #[inline] + fn seek_to_last(&mut self) { + unsafe { + self.current = (&*self.current).seek_to_last().unwrap_or(self.tail) + } + } + + fn seek(&mut self, key: &Slice) { todo!() } - pub fn contains(&self, _key: &Slice) -> bool { + fn next(&mut self) { + unsafe { + if (&*self.current).is_tail() { + return; + } + self.current = (&*self.current).get_node(0); + } + } + + fn pre(&mut self) { + todo!() + } + + fn key(&self) -> UnsafeSlice { + let mem_key = unsafe { + (&*self.current).key.unwrap() + }; + mem_key + } + + fn value(&self) -> UnsafeSlice { todo!() } - pub fn get_max_height(&self) -> usize { + fn status(&self) -> Status { todo!() } } \ No newline at end of file diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..721adb19a0b4249a2c7e71f2f6dac10a26a95a87 --- /dev/null +++ b/src/db/skip_list_test.rs @@ -0,0 +1,108 @@ +// mod test { +// use std::collections::HashSet; +// use std::env::args; +// use std::ffi::{c_char, c_void}; +// use std::ptr::{null, null_mut}; +// use std::sync::{Arc, Mutex}; +// +// use rand::Rng; +// use skiplist::OrderedSkipList; +// +// use crate::db::DefaultSkipList; +// use crate::db::skip_list::SkipList; +// use crate::debug; +// use crate::util::Arena; +// use crate::util::arena::ArenaRef; +// use crate::util::comparator::BytewiseComparatorImpl; +// use crate::util::mem_debug::mem_print; +// use crate::util::Result; +// use crate::util::slice::Slice; +// use crate::util::unsafe_slice::TryIntoUnsafeSlice; +// +// #[test] +// fn test_add() -> Result<()> { +// let cmp = Arc::new(BytewiseComparatorImpl::default()); +// let arena = Arc::new(Mutex::new(Arena::default())); +// let mut list = DefaultSkipList::create(cmp, arena.clone()); +// let len = 10; +// for i in 0..len { +// list.insert(format!("key_{}", i).try_into_unsafe_slice(arena.clone())?).expect("insert ok"); +// } +// assert_eq!(10, list.len(), "expect 10, but actually is: {}", list.len()); +// debug!("{}", list.to_string()); +// for i in 0..len { +// let key: Slice = format!("key_{}", i).into(); +// debug!("contains key: {}", key); +// assert!(list.contains(&key), "contains key: {}", key); +// } +// list.iter().for_each(|slice| { +// debug!("slice: {}", slice.as_str()) +// }); +// Ok(()) +// } +// +// #[test] +// fn test_rnd_add() -> Result<()> { +// let cmp = Arc::new(BytewiseComparatorImpl::default()); +// let arena = Arc::new(Mutex::new(Arena::default())); +// let mut list = DefaultSkipList::create(cmp, arena.clone()); +// let len = 10; +// let mut rnd = rand::thread_rng(); +// let mut set = HashSet::new(); +// for _i in 0..10 { +// let j = rnd.gen_range(0..len); +// let key = format!("key_{}", j); +// set.insert(key.clone()); +// list.insert(key.try_into_unsafe_slice(arena.clone())?)?; +// debug!("skiplist: {}", list.to_string()); +// } +// assert_eq!(set.len(), list.len(), "list length must eq: {}", list.len()); +// set.iter().for_each(|key| { +// let c = list.contains(&key); +// assert!(c, "must contains key: {}", key) +// }); +// +// Ok(()) +// } +// +// +// fn default_skiplist(mut list: SkipList, arena: ArenaRef, record_count: usize) { +// for j in 0..record_count { +// let value = format!("key_{}", j); +// list.insert(value.try_into_unsafe_slice(arena.clone()).unwrap()).unwrap(); +// } +// println!("bench_default_skiplist: "); +// mem_print(); +// } +// +// fn bench_skiplist_v_0_4_0(mut list: OrderedSkipList, record_count: usize) { +// for j in 0..record_count { +// let value = format!("key_{}", j); +// list.insert(value.clone()); +// } +// println!("bench_skiplist_v_0_4_0: "); +// mem_print(); +// } +// +// #[test] +// fn bench_default_skiplist() { +// let record_count = 100 * 1024; +// println!("bench default skiplist"); +// let cmp = Arc::new(BytewiseComparatorImpl::default()); +// let arena = Arc::new(Mutex::new(Arena::default())); +// let list = SkipList::create(cmp, arena.clone()); +// default_skiplist(list, arena, record_count); +// } +// +// #[test] +// fn bench_crate_skiplist() { +// let record_count = 100 * 1024; +// println!("bench crate skiplist"); +// let list: OrderedSkipList = unsafe { +// OrderedSkipList::with_comp(|a: &String, b: &String| { +// a.cmp(b) +// }) +// }; +// bench_skiplist_v_0_4_0(list, record_count); +// } +// } \ No newline at end of file diff --git a/src/db/table_cache.rs b/src/db/table_cache.rs index ad6079bddebb55f87430bf04a6d0f08f777b0e91..6fa4804a0af6d0fe47d3ca509c57d7e1bc2abdb7 100644 --- a/src/db/table_cache.rs +++ b/src/db/table_cache.rs @@ -1,16 +1,19 @@ +use crate::table::table::Table; use crate::traits::DataIterator; +use crate::util::options::ReadOptions; use crate::util::slice::Slice; use crate::util::Result; -struct Saver {} - -struct ReadOptions {} - -struct Table {} +pub struct Saver {} pub struct TableCache {} impl TableCache { + pub fn new() -> Self { + Self { + + } + } /// 从缓存中获取Table /// /// # Arguments @@ -28,9 +31,10 @@ impl TableCache { /// ``` /// /// ``` - fn get(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _k: &Slice, _arg: &mut Saver, _handle_result: F) - where F: FnMut(&mut Saver, &Slice, &Slice) -> Result<()> { - () + pub fn get(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, + _k: &Slice, _arg: &mut Saver, _handle_result: F) + where F: FnMut(&mut Saver, &Slice, &Slice) { + todo!() } /// 根据文件号消除缓存 /// @@ -45,7 +49,7 @@ impl TableCache { /// ``` /// /// ``` - fn evict(&mut self, _file_number: u64) { + pub fn evict(&mut self, _file_number: u64) { todo!() } @@ -65,7 +69,7 @@ impl TableCache { /// ``` /// /// ``` - fn new_iterator(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _table: &Table) -> Result> { + pub fn new_iterator(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _table: &Table) -> Result> { todo!() } } \ No newline at end of file diff --git a/src/db/version_edit.rs b/src/db/version_edit.rs index e820e778d9c66e6d1b9c3c9349b33411a9792c30..792d00afe2b09ccd289cba0486b0ff2d98a5d4d5 100644 --- a/src/db/version_edit.rs +++ b/src/db/version_edit.rs @@ -1,8 +1,13 @@ +use std::fs::read; use std::iter::Map; use crate::db::db_format::InternalKey; use crate::db::file_meta_data::FileMetaData; +use crate::db::version_edit; +use crate::traits::coding_trait::CodingTrait; +use crate::util::coding::Coding; use crate::util::slice::Slice; use crate::util::Result; +use crate::util::status::{LevelError, Status}; pub struct VersionEdit { comparator_: String, @@ -17,35 +22,99 @@ pub struct VersionEdit { has_last_sequence_: bool, compact_pointers_: Vec<(u32, InternalKey)>, + // left: level; right: file number deleted_files_: Vec<(u32, u64)>, + // left: level; right: FileMetaData new_files_: Vec<(u32, FileMetaData)>, } -enum Tag { - // kComparator = 1, - // kLogNumber = 2, - // kNextFileNumber = 3, - // kLastSequence = 4, - // kCompactPointer = 5, - // kDeletedFile = 6, - // kNewFile = 7, - // // 8 was used for large value refs - // kPrevLogNumber = 9 - - kComparator, - kLogNumber, - kNextFileNumber, - kLastSequence, - kCompactPointer, - kDeletedFile, - kNewFile, +pub enum Tag { + k_comparator = 1, + kLogNumber = 2, + kNextFileNumber = 3, + kLastSequence = 4, + kCompactPointer = 5, + kDeletedFile = 6, + kNewFile = 7, // 8 was used for large value refs - kPrevLogNumber + kPrevLogNumber = 9 +} + +impl Tag { + /// 得到枚举 Tag 的固定值 + /// Tag numbers for serialized VersionEdit. These numbers are written to disk and should not be changed. + pub fn get_value(&self) -> i32 { + let val = match self { + Tag::k_comparator => 1, + Tag::kLogNumber => 2, + Tag::kNextFileNumber => 3, + Tag::kLastSequence => 4, + Tag::kCompactPointer => 5, + Tag::kDeletedFile => 6, + Tag::kNewFile => 7, + Tag::kPrevLogNumber => 9, + _ => 0 + }; + + val + } + + /// 根据值计算枚举 Tag + pub fn from_value(val: u32) -> Option { + let val = match val { + 1 => Some(Tag::k_comparator), + 2 => Some(Tag::kLogNumber), + 3 => Some(Tag::kNextFileNumber), + 4 => Some(Tag::kLastSequence), + 5 => Some(Tag::kCompactPointer), + 6 => Some(Tag::kDeletedFile), + 7 => Some(Tag::kNewFile), + 9 => Some(Tag::kPrevLogNumber), + _ => None + }; + + val + } } impl VersionEdit { + #[inline] + pub fn new() -> Self { + Self { + comparator_ : String::new(), + log_number_: 0, + prev_log_number_: 0, + next_file_number_: 0, + last_sequence_: 0, + has_comparator_: false, + has_log_number_: false, + has_prev_log_number_: false, + has_next_file_number_: false, + has_last_sequence_: false, + compact_pointers_: vec![], + deleted_files_: vec![], + new_files_: vec![] + } + } + + #[inline] + pub fn new_with_log_number(log_number: u64) -> Self { + let mut version_edit = VersionEdit::new(); + version_edit.set_log_number(log_number); + + version_edit + } + + #[inline] + pub fn new_with_prev_log_number(prev_log_number: u64) -> Self { + let mut version_edit = VersionEdit::new(); + version_edit.set_prev_log_number(prev_log_number); + + version_edit + } + /// 清空 - fn clear(&mut self) { + pub fn clear(&mut self) { self.comparator_.clear(); self.log_number_ = 0; self.prev_log_number_ = 0; @@ -62,27 +131,32 @@ impl VersionEdit { // compact_pointers_ don't clear } - fn set_comparator_name(&mut self, name: Slice){ + pub fn set_comparator_name(&mut self, name: Slice){ self.has_comparator_ = true; self.comparator_ = name.into(); } - fn set_prev_log_number(&mut self, num: u64){ + pub fn set_log_number(&mut self, num: u64){ + self.has_log_number_ = true; + self.log_number_ = num; + } + + pub fn set_prev_log_number(&mut self, num: u64){ self.has_prev_log_number_ = true; self.prev_log_number_ = num; } - fn set_next_file(&mut self, num: u64){ + pub fn set_next_file(&mut self, num: u64){ self.has_next_file_number_ = true; self.next_file_number_ = num; } - fn set_last_sequence(&mut self, seq: u64){ + pub fn set_last_sequence(&mut self, seq: u64){ self.has_last_sequence_ = true; self.last_sequence_ = seq; } - fn set_compact_pointer(&mut self, level: u32, key: InternalKey) { + pub fn set_compact_pointer(&mut self, level: u32, key: InternalKey) { self.compact_pointers_.push((level, key)) } @@ -105,13 +179,13 @@ impl VersionEdit { /// ``` /// /// ``` - fn add_file(&mut self, level: u32, file: u64, file_size: u64, smallest: InternalKey, largest: InternalKey) { + pub fn add_file(&mut self, level: u32, file: u64, file_size: u64, smallest: InternalKey, largest: InternalKey) { let file_meta_data = FileMetaData::new_with_number_file_size_internal_key(file, file_size, smallest, largest); self.new_files_.push((level, file_meta_data)); } - fn delete_file(&mut self, level: u32, file: u64) { + pub fn delete_file(&mut self, level: u32, file: u64) { self.deleted_files_.push((level, file)); } @@ -128,8 +202,59 @@ impl VersionEdit { /// ``` /// /// ``` - fn encode_to(&self, target: Vec) { - todo!() + pub fn encode_to(&self, target: &mut Vec) { + let mut position: usize = 0; + if self.has_comparator_ { + position += Coding::put_varint32(target, position, Tag::k_comparator.get_value() as u32); + position += Coding::put_length_prefixed_slice(target, position, self.comparator_.len()); + } + + if self.has_log_number_ { + let mut offset = Coding::put_varint32(target, position, Tag::kLogNumber.get_value() as u32); + position = position + offset; + + offset = Coding::put_varint64(target, position, self.log_number_); + position = position + offset; + } + + if self.has_prev_log_number_ { + position += Coding::put_varint32(target, position, Tag::kPrevLogNumber.get_value() as u32); + position += Coding::put_varint64(target, position, self.prev_log_number_); + } + + if self.has_next_file_number_ { + position += Coding::put_varint32(target, position, Tag::kNextFileNumber.get_value() as u32); + position += Coding::put_varint64(target, position, self.next_file_number_); + } + + if self.has_last_sequence_ { + position += Coding::put_varint32(target, position, Tag::kLastSequence.get_value() as u32); + position += Coding::put_varint64(target, position, self.last_sequence_); + } + + for i in 0..self.compact_pointers_.len() { + position += Coding::put_varint32(target, position, Tag::kCompactPointer.get_value() as u32); + position += Coding::put_varint32(target, position, self.compact_pointers_[i].0); + position += Coding::put_length_prefixed_slice(target, position, + self.compact_pointers_[i].1.encode_len()); + } + + for i in 0..self.deleted_files_.len() { + position += Coding::put_varint32(target, position, Tag::kDeletedFile.get_value() as u32); + position += Coding::put_varint32(target, position, self.deleted_files_[i].0); + position += Coding::put_varint64(target, position, self.deleted_files_[i].1); + } + + for i in 0..self.new_files_.len() { + let f: &FileMetaData = &self.new_files_[i].1; + position += Coding::put_varint32(target, position, Tag::kNewFile.get_value() as u32); + // level + position += Coding::put_varint32(target, position, self.new_files_[i].0); + position += Coding::put_varint64(target, position, f.get_number()); + position += Coding::put_varint64(target, position, f.get_file_size()); + position += Coding::put_length_prefixed_slice(target, position, f.get_smallest().encode_len()); + position += Coding::put_length_prefixed_slice(target, position, f.get_largest().encode_len()); + } } /// 将 source 中的数据解码至 self VersionEdit 中 @@ -145,21 +270,63 @@ impl VersionEdit { /// ``` /// /// ``` - fn decode_from(&mut self, source: Slice) { + pub fn decode_from(&mut self, source: &Slice) -> Status { self.clear(); + let version_edit = VersionEdit::new(); + + let msg : Option = Option::None; + + // todo Coding::get_varint32 存在问题。开发暂停 + while msg.is_none() && Coding::get_varint32(source) != 0_u32 { + let tag_value = Coding::get_varint32(source); + let tag = Tag::from_value(tag_value); + + if tag.is_none() { + return LevelError::corruption_string("VersionEdit", "unknown tag"); + } + + } todo!() } /// VersionEdit 输出调试信息 - fn debug_string(&self) -> Slice { - todo!() + pub fn debug_string(&self) -> Slice { + let debug_str = String::from("VersionEdit {"); + + let mut has_comparator_str = String::default(); + if(self.has_comparator_){ + has_comparator_str.push_str(format!("\n Comparator: {}", self.comparator_.as_str()).as_str()); + } + + let mut has_log_number__str = String::default(); + // if(self.has_log_number_){ + // todo + // // let append_log_number = logging.AppendNumberTo(&r, self.log_number_); + // let append_log_number = self.log_number_ + "".as_ref(); + // has_log_number__str.push_str(format!("\n LogNumber: {}", append_log_number).as_str()); + // } + + let rs = format!("{}{}{}", debug_str, has_log_number__str, "\n}\n"); + + Slice::from(rs) } } +/// 静态方法 impl<'a> VersionEdit { - pub fn get_internal_key(inout: Slice) -> Result { + pub fn get_internal_key(input: Slice) -> Result { + let key= InternalKey::default(); + todo!() + + // Slice str; + // if (GetLengthPrefixedSlice(input, &str)) { + // dst->DecodeFrom(str); + // return true; + // } else { + // return false; + // } } /// 从 Slice 中解出 level 值 diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 3024e557cdf3bbb6f694195504a00074f8eaa364..4791a04f6f1f2fd4910150ecc122eb63779a0bb6 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -1,10 +1,54 @@ mod test { + use crate::db::version_edit; + use crate::db::version_edit::{Tag, VersionEdit}; + use crate::util::slice::Slice; + + #[test] + fn test_tag() { + let tag = Tag::kCompactPointer; + assert_eq!(tag.get_value(), 5); + + let tag1 = Tag::k_comparator; + let v = tag1.get_value(); + assert_eq!(v, 1); + } + + #[test] + fn test_version_edit_encode_to() { + let mut target: Vec = vec![]; + + let version_edit = VersionEdit::new_with_log_number(6); + version_edit.encode_to(&mut target); + println!("target: {}.", &target.len()); + // todo + // assert_eq!(target.len(), 2); + } + #[test] - fn test_() { + fn test_version_edit_decode_from_default() { + let source = Slice::from("a"); - println!("get_name: {}", "a"); + let mut version_edit = VersionEdit::new(); + let status = version_edit.decode_from(&source); + assert!(&status.is_corruption()); + assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); + } + #[test] + fn test_version_edit_decode_from() { + let source = Slice::from("a"); + let mut version_edit = VersionEdit::new_with_log_number(6); + let status = version_edit.decode_from(&source); + assert!(&status.is_corruption()); + assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); + } + + #[test] + fn test_version_edit_debug_string() { + let mut version_edit = VersionEdit::new_with_log_number(6); + let debug_str = version_edit.debug_string(); + println!("debug_str: \n {}", debug_str); } } \ No newline at end of file diff --git a/src/db/version_set.rs b/src/db/version_set.rs index 15dabb08fee721512b2ddc46b95a5cc0cf463376..61752032b02be96ba0c8102c0bc1ecde972fae92 100644 --- a/src/db/version_set.rs +++ b/src/db/version_set.rs @@ -6,7 +6,8 @@ use crate::db::table_cache::TableCache; use crate::db::version_edit::VersionEdit; use crate::traits::comparator_trait::Comparator; use crate::util::cache::Cache; -use crate::util::options::{Env, Options, ReadOptions}; +use crate::util::env::Env; +use crate::util::options::{Options, ReadOptions}; use crate::util::slice::Slice; use crate::util::Result; @@ -42,7 +43,7 @@ pub struct VersionSet { dbname_: Slice, options_: Options, table_cache_: TableCache, - icmp_: Box, + icmp_: InternalKeyComparator, next_file_number_: u64, manifest_file_number_: u64, last_sequence_: u64, @@ -93,7 +94,9 @@ pub struct Compaction { // size_t level_ptrs_[config::kNumLevels]; } -// .h line 68 - 71 +/// Lookup the value for key. If found, store it in *val and +/// return OK. Else return a non-OK status. Fills *stats. +/// REQUIRES: lock is not held struct GetStats { seek_file: Rc, seek_file_level: i32 @@ -101,7 +104,7 @@ struct GetStats { // ,cc line 163 struct LevelFileNumIterator { - icmp_: Rc, + icmp_: InternalKeyComparator, flist_: Vec, index_: u32, @@ -329,7 +332,7 @@ impl VersionSet { /// ``` /// /// ``` - fn find_file(icmp: &InternalKeyComparator, files:&Vec, key:&Slice) -> u32 { + fn find_file(icmp: InternalKeyComparator, files:&Vec, key:&Slice) -> u32 { todo!() } @@ -359,7 +362,7 @@ impl VersionSet { /// ``` /// /// ``` - fn some_file_overlaps_range(icmp: &InternalKeyComparator, disjoint_sorted_files:bool, + fn some_file_overlaps_range(icmp: InternalKeyComparator, disjoint_sorted_files:bool, files:&Vec, smallest_user_key:&Slice,largest_user_key:&Slice) -> bool { todo!() } diff --git a/src/lib.rs b/src/lib.rs index 064907a4348b12aece079868fa6b01bcf2a3552a..c0dce0021b0ec5283db9efe17a9e28a72b023211 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,13 @@ -#![feature(box_syntax)] +extern crate core; -mod db; +pub mod db; mod table; -mod util; +pub mod util; mod traits; +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + mod test { #[test] diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs index a58922a09598e533c26255af1605d7dcd7362acb..f24a9956152436e27ef210d98c111ad68425426b 100644 --- a/src/table/block_builder.rs +++ b/src/table/block_builder.rs @@ -1,11 +1,45 @@ +use std::fs::File; +use std::sync::Arc; +use crate::util::options::{Options, OptionsPtr}; use crate::util::slice::Slice; use crate::util::Result; +use crate::util::status::Status; -pub struct BlockBuilder {} +// 智能指针 Rc, 引用计数器,用来记录一个值是否被使用,如果计数为零可清除。 +// 适用于堆中数据需要被程序多部分使用,但编译时不能确定谁最后完成。 + +// Arc 是一种能够使得数据在线程间安全共享的智能指针. +// Arc会追踪这个指针的所有拷贝,当最后一份拷贝离开作用域时,它就会安全释放内存。 + +// 智能指针 Box。 box 允许你将一个值放在堆上而不是栈上。留在栈上的则是指向堆数据的指针。 + +/// BlockBuilder 的 `Arc` 别名 +pub type BlockBuilderPtr = Arc; + +pub struct BlockBuilder { + // 在 BlockBuilder 初始化时,指定的配置项 + options: OptionsPtr, + index_block_options: OptionsPtr, + + // SSTable 生成后的文件 + file: Arc, + + offset: u64, + status: Status, + + // 生成 SSTable 中的数据区域 + data_block: BlockBuilderPtr, + // 生成 SSTable 中的数据索引区域 + index_block: BlockBuilderPtr, +} impl BlockBuilder { - /// 添加数据到block + pub fn new(options: OptionsPtr) -> Self { + todo!() + } + + /// 向datablock增加entry /// /// # Arguments /// @@ -22,6 +56,7 @@ impl BlockBuilder { pub fn add(&mut self, _key: &Slice, _value: &Slice) { todo!() } + /// 重置builder /// /// # Examples @@ -32,7 +67,8 @@ impl BlockBuilder { pub fn reset(&mut self) { todo!() } - /// 构造block + + /// 追加Restart points /// /// /// # Examples @@ -43,6 +79,7 @@ impl BlockBuilder { pub fn finish(&mut self) -> Result { todo!() } + /// 判断builder是否为空 /// /// # Examples @@ -53,7 +90,8 @@ impl BlockBuilder { pub fn empty(&self) -> bool { todo!() } - /// 估算当前的block大小 + + /// 估算当前的block大小, 超过一定大小后,写入文件 /// /// # Examples /// @@ -63,4 +101,5 @@ impl BlockBuilder { pub fn current_size_estimate(&self) -> usize { todo!() } + } \ No newline at end of file diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 524c324c4c7c82cbfef98718c4f3617e15104828..00133829d48b3d8763179b3ac941527325ed275d 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,10 +1,40 @@ +use std::io::Write; +use std::sync::Arc; +use crate::traits::coding_trait::CodingTrait; +use crate::traits::filter_policy_trait::{FilterPolicy, FilterPolicyPtr}; +use crate::util::coding::Coding; use crate::util::slice::Slice; use crate::util::Result; -pub struct FilterBlockBuilder {} +// Generate new filter every 2KB of data +const FILTER_BASE_LG: usize = 11; +const FILTER_BASE: usize = 1 << FILTER_BASE_LG; + +/// +/// meta block 构建器 +/// FilterBlock,实质上就是SST文件里面的 meta block +/// +pub trait FilterBlock { + fn new_with_policy(policy: FilterPolicyPtr) -> Self; + + /// + /// 构造一个 FilterBlockBuilder + /// + /// # Arguments + /// + /// * `policy`: + /// * `capacity`: 初始化容量 + /// + /// returns: Self + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn new_with_policy_capacity(policy: FilterPolicyPtr, capacity: usize) -> Self; -impl FilterBlockBuilder { /// 设置block的起始位置 /// /// # Arguments @@ -18,9 +48,9 @@ impl FilterBlockBuilder { /// ``` /// filter_block_builder.start_block(1024_u64); /// ``` - pub fn start_block(&mut self, _block_offset: u64) { - todo!() - } + fn start_block(&mut self, block_offset: u64); + + fn add_key_from_str(&mut self, key: &str); /// 添加key到builder /// @@ -35,9 +65,8 @@ impl FilterBlockBuilder { /// ``` /// /// ``` - pub fn add_key(&mut self, _key: &Slice) { - todo!() - } + fn add_key(&mut self, key: &Slice); + /// 构造filterBlock /// /// # Examples @@ -45,15 +74,239 @@ impl FilterBlockBuilder { /// ``` /// filter_block_builder.finish(); /// ``` - pub fn finish(&mut self) -> Result { - todo!() + fn finish(&mut self) -> Result; + + fn get_policy(&self) -> FilterPolicyPtr; + + fn get_keys(&self) -> Vec; + + fn get_start(&self) -> Vec; + + fn get_result(&self) -> Vec; + + fn get_tmp_keys(&self) -> Vec; + + fn get_tmp_filter_offsets(&self) -> Vec; +} + +/// SSTable 文件里面的 meta block 构建器, 按内存里面指定的格式整理在内存中 +pub struct FilterBlockBuilder { + policy: FilterPolicyPtr, + // Flattened key contents + keys: Vec, + // Starting index in keys_ of each key + start: Vec, + // Filter data computed so far + result: Vec, + // policy_->CreateFilter() argument + tmp_keys: Vec, + filter_offsets: Vec, +} + +pub struct FilterBlockReader { + policy: FilterPolicyPtr, + // Pointer to filter data (at block-start) + data: Vec, + // Pointer to beginning of offset array (at block-end) + offset: Vec, + // Number of entries in offset array + num: usize, + // Encoding parameter (see kFilterBaseLg in .cc file) + base_lg: usize +} + +impl FilterBlock for FilterBlockBuilder { + fn new_with_policy(policy: FilterPolicyPtr) -> Self { + FilterBlock::new_with_policy_capacity(policy, 64) + } + + fn new_with_policy_capacity(policy: FilterPolicyPtr, capacity: usize) -> Self { + let keys:Vec = Vec::with_capacity(capacity); + let start:Vec = Vec::with_capacity(capacity); + let result:Vec = Vec::with_capacity(capacity); + let tmp_keys:Vec = vec![]; + let filter_offsets:Vec = vec![]; + + Self { + policy, + keys, + start, + result, + tmp_keys, + filter_offsets + } + } + + fn start_block(&mut self, block_offset: u64) { + let filter_index = block_offset / (FILTER_BASE as u64); + assert!(filter_index >= self.filter_offsets.len() as u64); + + while filter_index > self.filter_offsets.len() as u64 { + self.generate_filter(); + } + } + + fn add_key_from_str(&mut self, key: &str) { + self.add_key(&Slice::from(key)) + } + + fn add_key(&mut self, key: &Slice) { + self.start.push(key.len()); + self.keys.write(key.as_str().as_bytes()).expect("add_key error!"); + } + + fn finish(&mut self) -> Result { + if self.start.len() != 0 { + self.generate_filter(); + } + + // Append array of per-filter offsets + let array_offset = self.result.len() as u32; + // 当前需要写入的位置。result 中可能存在数据,因此为 self.result.len() 的位置 + let mut pos: usize = self.result.len(); + + // todo 判断是否需要扩容 + let result_total_capacity = self.result.capacity(); + + let dst_append = self.result.as_mut_slice(); + + for i in 0..self.filter_offsets.len() { + // 判断当前 pos + len 4 + let filter_offset_val = self.filter_offsets[i]; + pos = Coding::put_fixed32(dst_append, pos, filter_offset_val); + } + + pos = Coding::put_fixed32(dst_append, pos, array_offset); + + // Save encoding parameter in result + // todo 判断是否需要扩容 + Coding::put_varint64(self.result.as_mut_slice(), pos, FILTER_BASE_LG as u64); + + Ok(Slice::from_buf(&self.result)) + } + + fn get_policy(&self) -> FilterPolicyPtr { + self.policy.clone() + } + + fn get_keys(&self) -> Vec { + self.keys.to_vec() + } + + fn get_start(&self) -> Vec { + self.start.to_vec() + } + + fn get_result(&self) -> Vec { + self.result.to_vec() + } + + fn get_tmp_keys(&self) -> Vec { + self.tmp_keys.to_vec() + } + + fn get_tmp_filter_offsets(&self) -> Vec { + self.filter_offsets.to_vec() } } -pub struct FilterBlockReader {} +impl FilterBlockBuilder { + fn generate_filter(&mut self) { + let num_keys = self.start.len(); + + if num_keys == 0 { + // Fast path if there are no keys for this filter + self.filter_offsets.push(self.result.len() as u32); + return; + } + + /* Make list of keys from flattened key structure */ + // Simplify length computation + self.start.push(self.keys.len()); + // 如果 new_len 大于 len ,则 Vec 由差异扩展,每个额外的插槽都用 value 填充。如果 new_len 小于 len ,则 Vec 将被截断。 + self.tmp_keys.resize(num_keys, Slice::default()); + + for i in 0..num_keys { + let base = &self.keys[self.start[i]..]; + let length = self.start[i+1] - self.start[i]; + + let mut tmp_key = Vec::with_capacity(length); + tmp_key.write(&base); + self.tmp_keys[i] = Slice::from_vec(tmp_key); + } + + // Generate filter for current set of keys and append to result_. + self.filter_offsets.push(self.result.len() as u32); + + let mut keys: Vec<&Slice> = Vec::new(); + keys.push(&self.tmp_keys[0]); + let create_filter:Slice = self.policy.create_filter_with_len(num_keys, keys); + + // let result_len = self.result.len(); + // let result_total_capacity = self.result.capacity(); + self.result.write(create_filter.as_ref()); + // let result_len = self.result.len(); + // let result_total_capacity = self.result.capacity(); + + self.tmp_keys.clear(); + self.keys.clear(); + self.start.clear(); + } +} impl FilterBlockReader { - pub fn key_may_match(&self, _block_offset: u64, _key: &Slice) -> bool { + pub fn new_with_policy(policy: FilterPolicyPtr, contents: Slice) -> Self { + let data = Vec::new(); + let offset = Vec::new(); + + let contents_len = contents.len(); + + // 1 byte for base_lg_ and 4 for start of offset array + if contents_len < 5 { + return Self { + policy, + data, + offset, + num: 0, + base_lg: 0 + } + }; + + // let buf = contents.as_ref()[contents_len-5..]; + + // let base_lg_ = contentsVe[contents_len-1]; + + // let last_word: u32 = Coding::decode_fixed32(buf)); + Self { + policy, + data, + offset, + num: 0, + base_lg: 0 + } + } + + pub fn key_may_match(&self, block_offset: u64, key: &Slice) -> bool { todo!() } + + pub fn get_policy(&self) -> FilterPolicyPtr { + self.policy.clone() + } + + pub fn get_data(&self) -> Vec { + self.data.to_vec() + } + + pub fn get_offset(&self) -> Vec { + self.offset.to_vec() + } + + pub fn get_num(&self) -> usize { + self.num + } + + pub fn get_base_lg(&self) -> usize { + self.base_lg + } } \ No newline at end of file diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..23506196fcc758d919834ae36258e569d3477d23 --- /dev/null +++ b/src/table/filter_block_test.rs @@ -0,0 +1,181 @@ + +mod test { + use std::borrow::BorrowMut; + use std::sync::Arc; + use crate::table::filter_block; + use crate::table::filter_block::{FilterBlock, FilterBlockBuilder, FilterBlockReader}; + use crate::traits::coding_trait::CodingTrait; + use crate::traits::filter_policy_trait::FilterPolicy; + use crate::util::coding::Coding; + use crate::util::filter_policy::BloomFilterPolicy; + use crate::util::slice::Slice; + use crate::util::hash::{Hash, ToHash}; + + use crate::util::Result; + + pub struct TestHashFilter { + //. + } + + impl TestHashFilter { + fn new() -> Self { + Self { + + } + } + } + + impl FilterPolicy for TestHashFilter { + fn name(&self) -> String { + String::from("TestHashFilter") + } + + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + let mut n: usize = 0; + for i in 0..keys.len() { + n += keys[i].len(); + } + + self.create_filter_with_len(n, keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + let mut n: usize = len; + + let mut dst_chars = vec![0; n]; + let dst_chars_u8 = dst_chars.borrow_mut(); + + let mut offset: usize = 0; + for i in 0..keys.len() { + let h = Hash::hash_code(keys[i].as_ref(), 1); + let of = Coding::put_fixed32(dst_chars_u8, offset, h); + offset += of; + } + + Slice::from_buf(dst_chars_u8) + } + + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + let h = Hash::hash_code(key.to_vec().as_slice(), 1); + + let mut pos = 0; + while pos <= bloom_filter.size() { + let buf = &bloom_filter.as_ref()[pos..]; + + if h == Coding::decode_fixed32(buf) { + return true + } + + pos += 4; + } + + false + } + } + + // #[test] + // fn test_create_filter() { + // let policy = TestHashFilter::new(); + // + // let s1 = Slice::try_from(String::from("hello")).unwrap(); + // let s2 = Slice::try_from(String::from("world")).unwrap(); + // let mut keys : Vec<&Slice> = Vec::new(); + // keys.push(&s1); + // keys.push(&s2); + // + // let bloom_filter: Slice = policy.create_filter(keys); + // + // let mut key_may_match = policy.key_may_match( + // &Slice::try_from(String::from("hello")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + // + // key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + // + // let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // } + + #[test] + fn test_filter_block_new_with_policy() { + let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); + + let filter_block: FilterBlockBuilder = FilterBlockBuilder:: + new_with_policy_capacity(policy, 10); + + let fp = filter_block.get_policy(); + let filter_policy_name = fp.name(); + assert_eq!(filter_policy_name, "TestHashFilter"); + assert_eq!(filter_block.get_keys().len(), 0); + assert_eq!(filter_block.get_result().len(), 0); + assert_eq!(filter_block.get_start().len(), 0); + assert_eq!(filter_block.get_tmp_keys().len(), 0); + assert_eq!(filter_block.get_tmp_filter_offsets().len(), 0); + } + + #[test] + fn test_filter_block_reader_new_with_policy_empty_content() { + let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); + let contents = Slice::default(); + + let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + + let fp_reader = filter_block_reader.get_policy(); + let _reader_filter_policy_name = fp_reader.name(); + assert_eq!(_reader_filter_policy_name, "TestHashFilter"); + assert_eq!(filter_block_reader.get_data().len(), 0); + assert_eq!(filter_block_reader.get_offset().len(), 0); + assert_eq!(filter_block_reader.get_num(), 0); + assert_eq!(filter_block_reader.get_base_lg(), 0); + } + + #[test] + fn test_filter_block_new_with_policy_and_addkey() { + let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); + let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy_capacity( + policy, 10); + + filter_block_builder.start_block(100); + filter_block_builder.add_key_from_str("foo"); + filter_block_builder.add_key_from_str("bar"); + filter_block_builder.add_key_from_str("box"); + filter_block_builder.start_block(200); + filter_block_builder.add_key_from_str("box"); + filter_block_builder.start_block(300); + filter_block_builder.add_key_from_str("hello"); + + let sliceRs: Result = filter_block_builder.finish(); + assert_eq!("a", "leveldb.BuiltinBloomFilter"); + } + + // #[test] + // fn test_filter_block_reader_new_with_policy_with_content() { + // let policy = Arc::new(BloomFilterPolicy::new(2)); + // let contents = Slice::from("\000"); + // + // let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + // + // let fp_reader = filter_block_reader.get_policy(); + // let _reader_filter_policy_name = fp_reader.name(); + // assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); + // assert_eq!(filter_block_reader.get_data().len(), 0); + // assert_eq!(filter_block_reader.get_offset().len(), 0); + // assert_eq!(filter_block_reader.get_num(), 0); + // assert_eq!(filter_block_reader.get_base_lg(), 0); + // } +} \ No newline at end of file diff --git a/src/table/format.rs b/src/table/format.rs index 01c9610c998bfc3f3532bf7df37e4868de6d1577..e082810331dc2420b1e28b128bde167fa603fc8f 100644 --- a/src/table/format.rs +++ b/src/table/format.rs @@ -12,26 +12,28 @@ pub const k_max_encoded_length: u32 = 10 + 10; /// of two block handles and a magic number. pub const k_encoded_length: u32 = 2 * k_max_encoded_length + 8; -// // kTableMagicNumber was picked by running -// // echo http://code.google.com/p/leveldb/ | sha1sum -// // and taking the leading 64 bits. -// pub const k_table_magic_number: &str = 0xdb4775248b80fb57ull; +/// Footer 的大小为 48 字节,内容是一个 8 字节的 magic number 和两个 BlockHandle 构成 +/// 在 Footer::EncodeTo 和 Footer::DecodeFrom 中起作用 +/// kTableMagicNumber was picked by running +/// echo http://code.google.com/p/leveldb/ | sha1sum +/// and taking the leading 64 bits. +pub const k_table_magic_number: u64 = 0xdb4775248b80fb57; /// 1-byte type + 32-bit crc pub const k_block_trailer_size: usize = 5; pub struct BlockHandle { // 偏移量 - offset_: u64, + offset: u64, // - size_: u64 + size: u64 } /// Footer encapsulates the fixed information stored at the tail /// end of every table file. pub struct Footer { - metaindex_handle_: BlockHandle, - index_handle_: BlockHandle + meta_index_handle: BlockHandle, + index_handle: BlockHandle } pub struct BlockContents { @@ -45,7 +47,7 @@ pub struct BlockContents { heap_allocated:bool, } -trait BlockTrait { +trait BlockHandleTrait { /// /// The offset of the block in the file. /// @@ -102,7 +104,7 @@ trait BlockTrait { trait FootTrait { // The block handle for the metaindex block of the table - fn metaindex_handle(&self) -> BlockHandle; + fn meta_index_handle(&self) -> BlockHandle; fn set_metaindex_handle(&mut self, h: BlockHandle); @@ -151,21 +153,21 @@ trait BlockContent { } -impl BlockTrait for BlockHandle { +impl BlockHandleTrait for BlockHandle { fn offset(&self) -> u64 { - self.offset_ + self.offset } fn set_offset(&mut self, offset: u64) { - self.offset_ = offset; + self.offset = offset; } fn size(&self) -> u64 { - self.size_ + self.size } fn set_size(&mut self, size: u64) { - self.size_ = size; + self.size = size; } fn encode_to(&self) -> Result { @@ -190,14 +192,15 @@ impl Default for BlockHandle { #[inline] fn default() -> Self { BlockHandle { - offset_: 0, - size_: 0, + offset: 0, + size: 0, } } } impl FootTrait for Footer { - fn metaindex_handle(&self) -> BlockHandle { + /// The block handle for the metaindex block of the table + fn meta_index_handle(&self) -> BlockHandle { todo!() } diff --git a/src/table/mod.rs b/src/table/mod.rs index ade478fc4045f8c85b832af1adda4f74d428e097..f4e0a9449b88927c779a3f00f5aa0a0c546fb351 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,8 +1,13 @@ pub mod block; pub mod block_builder; pub mod filter_block; +mod filter_block_test; pub mod format; mod format_test; -pub(crate) mod ss_table; +pub mod ss_table; mod ss_table_test; -mod iterator_wrapper; \ No newline at end of file +pub mod iterator_wrapper; +pub mod table_builder; +mod table_builder_test; +pub mod table; +mod table_test; \ No newline at end of file diff --git a/src/table/table.rs b/src/table/table.rs new file mode 100644 index 0000000000000000000000000000000000000000..65db03891b948262187c7127a1f8c78d37c057d7 --- /dev/null +++ b/src/table/table.rs @@ -0,0 +1,25 @@ +use std::fs::File; +use std::sync::Arc; +use crate::util::options::Options; +use crate::util::Result; +use crate::util::status::Status; + +pub struct Table { + rep: Rep +} + +struct Rep { + options: Box, + status: Status, + file: Arc, +} + +impl Table { + pub fn new() -> Self{ + todo!() + } + + pub fn open(&self, options:&Options, randomAccessFile:&File, file_size: u64, table:&Table) -> Result{ + todo!() + } +} \ No newline at end of file diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs new file mode 100644 index 0000000000000000000000000000000000000000..c2d449260de3aa60f457c410609d47dd5d7f652d --- /dev/null +++ b/src/table/table_builder.rs @@ -0,0 +1,138 @@ +use std::borrow::Borrow; +use std::fs::File; +use std::sync::Arc; +use crate::table::block_builder::BlockBuilder; +use crate::table::filter_block::{FilterBlock, FilterBlockBuilder}; +use crate::table::format::BlockHandle; +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::options::{CompressionType, OptionsPtr, Options}; +use crate::util::slice::Slice; +use crate::util::status::Status; +use crate::util::unsafe_slice::UnsafeSlice; + +/// 在一个 SSTable 中,文件末尾的 Footer 是定长的, +/// 其他数据都被划分成一个个变长的 block: +/// index block(@see format.BlockHandle、Footer#index_handle)、 +/// meta_index block(@see format.BlockHandle、Footer#meta_index_handle)、 +/// meta blocks(@see table.FilterBlock)、 +/// data blocks。 +pub struct TableBuilder { + rep: Box +} + +/// TableBuilder Rep 结构体, 内部使用 +struct Rep<> { + options: OptionsPtr, + index_block_options: OptionsPtr, + + // SSTable 生成后的文件 + file: Arc, + + offset: u64, + status: Status, + + // 生成 SSTable 中的数据区域 + data_block: BlockBuilder, + // 生成 SSTable 中的数据索引区域 + index_block: BlockBuilder, + + last_key: Slice, + num_entries: u64, + // Either Finish() or Abandon() has been called. + closed: bool, + + // 生成 SSTable 中的元数据区域 + filter_block: Option, + // 判断是否需要生成 SSTable中的数据索引, SSTable中每次生成一个完整的块之后,需要将该值置为 true, 说明需要为该块添加索引 + pending_index_entry: bool, + // Handle to add to index block + // pending_handle 记录需要生成数据索引的数据块在 SSTable 中的偏移量和大小 + // 也就是说, pending_handle 主要用于表示当前块的offset及size。 + pending_handle: BlockHandle, + + compressed_output: Slice, +} + +impl TableBuilder { + pub fn new_with_writable_file(options: OptionsPtr, writable_file: Arc) -> Self { + let rep = Rep::new(options, writable_file); + + // Self { + // rep + // } + + todo!() + } + + /// 写入 entry + pub fn add(&self, key: &UnsafeSlice, value: &UnsafeSlice) { + todo!() + } + + /// flush到文件 + pub fn flush(&self) { + todo!() + } + + /// block->Finish、压缩 + pub fn write_block(&self, block: &BlockBuilder, handler: &BlockHandle) { + todo!() + } + + /// datablock写入文件,添加压缩方式、crc。 + pub fn write_raw_block(&self, block_contents: &UnsafeSlice, compression_type: CompressionType, handler: &BlockHandle) { + todo!() + } + + pub fn status(&self) -> Status { + todo!() + } + + /// 剩余datablock写入文件,并生成管理区。 + pub fn finish(&self) -> Status { + todo!() + } + + pub fn abandon(&self) { + todo!() + } + + pub fn get_num_entries(&self) -> u64 { + todo!() + } + + pub fn get_file_size(&self) -> u64 { + todo!() + } +} + +impl Rep { + pub fn new(opt: OptionsPtr, writableFile: Arc) -> Self { + let mut filter_block: Option; + if opt.filter_policy.is_none() { + filter_block = None; + }else { + filter_block = Some(FilterBlockBuilder::new_with_policy(opt.filter_policy.clone().unwrap())); + } + // TODo if let sytax + // let filter_block = opt.filter_policy.map(|e|FilterBlockBuilder::new_with_policy(e.clone().unwrap())); + + Self { + options: opt.clone(), + index_block_options: opt.clone(), + file: writableFile, + offset: 0, + // default Status::OK + status: Status::default(), + data_block: BlockBuilder::new(opt.clone()), + index_block: BlockBuilder::new(opt.clone()), + last_key: Slice::default(), + num_entries: 0, + closed: false, + filter_block, + pending_index_entry: false, + pending_handle: BlockHandle::default(), + compressed_output: Slice::default(), + } + } +} \ No newline at end of file diff --git a/src/table/table_builder_test.rs b/src/table/table_builder_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/table/table_test.rs b/src/table/table_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/traits/coding_trait.rs b/src/traits/coding_trait.rs index 3a0a49e42ebb574d2c92272b3e57f0ff406bf999..fd498821991260e1d1546a55b8f04d1911029030 100644 --- a/src/traits/coding_trait.rs +++ b/src/traits/coding_trait.rs @@ -70,7 +70,7 @@ pub trait CodingTrait { /// # Arguments /// /// * `dst`: 目标字符串 - /// * `value`: Slice类型的编码值 + /// * `value_len`: Slice类型的编码值长度 /// /// returns: () /// @@ -79,7 +79,8 @@ pub trait CodingTrait { /// ``` /// /// ``` - fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: Slice) -> usize; + // fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: &Slice) -> usize; + fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value_len: usize) -> usize; /// 从slice的开头解码一个32位的变长整数, 并将slice的索引置于解码后的位置 /// /// # Arguments @@ -93,7 +94,7 @@ pub trait CodingTrait { /// ``` /// /// ``` - fn get_varint32(input: &mut Slice) -> u32; + fn get_varint32(input: &Slice) -> u32; /// 从slice的开头解码一个64位的变长整数, 并将slice的索引置于解码后的位置 /// /// # Arguments @@ -107,7 +108,7 @@ pub trait CodingTrait { /// ``` /// /// ``` - fn get_varint64(input: &mut Slice) -> u64; + fn get_varint64(input: &Slice) -> u64; /// 从slice数据中读取长度 返回长度的Slice /// /// # Arguments @@ -172,7 +173,7 @@ pub trait CodingTrait { /// /// ``` /// 从slice的开头解码一个32位的变长整数, 并将slice的索引置于解码后的位置 - fn varint_length(value: u64) -> i32; + fn varint_length(value: usize) -> usize; /// 32位定长正整数编码 /// /// # Arguments diff --git a/src/traits/comparator_trait.rs b/src/traits/comparator_trait.rs index a7a74c1d637fb4626079b3af33d3caf5420b24a0..477eb58d765ec5b2593fefc739561b1c18168e2e 100644 --- a/src/traits/comparator_trait.rs +++ b/src/traits/comparator_trait.rs @@ -17,16 +17,17 @@ pub trait Comparator { /// /// ``` /// use std::cmp::Ordering; + /// use crate::util::slice::Slice; /// /// let comp = BytewiseComparatorImpl::default(); - /// optionVal = comp.compare(&Slice::from("a"), &Slice::from("ab")); + /// optionVal = comp.compare("a", "ab"); /// assert_eq!(optionVal.unwrap(), Ordering::Less); /// /// let comp = BytewiseComparatorImpl::default(); /// let optionVal = comp.compare(&Slice::from("b"), &Slice::from("abcd")); /// assert_eq!(optionVal.unwrap(), Ordering::Greater); /// ``` - fn compare(&self, a: &Slice, b: &Slice) -> Option; + fn compare(&self, a: &[u8], b: &[u8]) -> Option; /// 返回comparator的名字 fn get_name(&self) -> String; diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index f3e4ad07bdcfb2c33b39b569c527890873dbface..aaafafd2b0af82d7bbf3435e74b3922bf0be716c 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -1,5 +1,10 @@ +use std::sync::Arc; use crate::util::slice::Slice; + +/// FilterPolicy 的 `Arc>` 别名 +pub type FilterPolicyPtr = Arc>; + /// 用于key过滤,可以快速的排除不存在的key pub trait FilterPolicy { @@ -23,6 +28,8 @@ pub trait FilterPolicy { /// # Examples /// /// ``` + /// use crate::util::slice::Slice; + /// /// let mut keys : Vec = Vec::new(); /// keys.push(Slice::try_from(String::from("hello")).unwrap()); /// keys.push(Slice::try_from(String::from("world")).unwrap()); @@ -30,7 +37,9 @@ pub trait FilterPolicy { /// let policy = BloomFilterPolicy::new(800); /// let bloom_filter: Slice = policy.create_filter(keys); /// ``` - fn create_filter(&self, keys: Vec) -> Slice; + fn create_filter(&self, keys: Vec<&Slice>) -> Slice; + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice; /// /// diff --git a/src/traits/iterator.rs b/src/traits/iterator.rs index 7717574de82560f01aac082d4790953581d0b93c..69bb7c18f633189f854d4405ff9ac9f11f928a14 100644 --- a/src/traits/iterator.rs +++ b/src/traits/iterator.rs @@ -1,5 +1,9 @@ use crate::util::slice::Slice; +use crate::util::status::Status; +use crate::util::unsafe_slice::UnsafeSlice; +/// +/// Iterator 迭代器定义 pub trait DataIterator { /// 检查当前位置是否有效 /// @@ -67,6 +71,7 @@ pub trait DataIterator { /// /// ``` fn next(&mut self); + /// 定位到上一个元素 /// /// # Arguments @@ -80,6 +85,7 @@ pub trait DataIterator { /// /// ``` fn pre(&mut self); + /// 获取key值 /// /// # Arguments @@ -92,7 +98,9 @@ pub trait DataIterator { /// ``` /// /// ``` - fn key(&self) -> &Slice; + /// todo UnsafeSlice 与 Slice 应该存在一个共同traits或者struct 便于API操作 + fn key(&self) -> UnsafeSlice; + /// 获取value值 /// /// # Arguments @@ -105,6 +113,8 @@ pub trait DataIterator { /// ``` /// /// ``` - fn value(&self) -> &Slice; + fn value(&self) -> UnsafeSlice; + + fn status(&self) -> Status; } diff --git a/src/traits/mod.rs b/src/traits/mod.rs index 47c662e6d0774ea213fb7838379a7ae1e0073d24..a1332ef7548ed05a5d266799f00c677e3720015f 100644 --- a/src/traits/mod.rs +++ b/src/traits/mod.rs @@ -4,5 +4,5 @@ pub mod comparator_trait; pub mod coding_trait; pub mod filter_policy_trait; -use std::rc::Rc; + pub use iterator::DataIterator; diff --git a/src/util/arena.rs b/src/util/arena.rs index 3f46dcb401207e891698b81eb28388c407d88531..d239b485d6d0fa0bd97e646cd1615bec8000a49d 100644 --- a/src/util/arena.rs +++ b/src/util/arena.rs @@ -1,10 +1,21 @@ +use std::slice; use std::alloc::{alloc, dealloc, Layout}; use std::ptr::NonNull; -use std::slice; +use std::sync::{Arc, Mutex}; + +use crate::util::slice::Slice; // Arena block size const ARENA_BLOCK_SIZE: usize = 4096; +pub type ArenaRef = Arc>; + + +/// +pub trait ArenaAllocLike { + fn copy_with_arena(&self, arena: ArenaRef) -> Self; +} + pub struct Arena { alloc_ptr: Option>, alloc_bytes_remaining: usize, @@ -24,7 +35,6 @@ impl Default for Arena { } impl Arena { - /// 申请一块内存 /// /// # Arguments @@ -102,4 +112,16 @@ impl Drop for Arena { } } } +} + +impl ArenaAllocLike for Slice { + fn copy_with_arena(&self, arena: ArenaRef) -> Self { + unsafe { + let mut lock_guard = arena.lock().unwrap(); + let dst = lock_guard.allocate(self.len()); + let src = &**self; + dst.copy_from_slice(src); + Slice::from_raw_parts(dst.as_mut_ptr(), self.len()) + } + } } \ No newline at end of file diff --git a/src/util/coding.rs b/src/util/coding.rs index 3a13b7dd95b7ac2a1c47de34c178dfbb328487bb..421ea97a39fb695fae2bf327210c7545e4fe48b5 100644 --- a/src/util/coding.rs +++ b/src/util/coding.rs @@ -1,3 +1,4 @@ +use std::io::Write; use crate::traits::coding_trait::CodingTrait; use crate::traits::coding_trait::Coding32; use crate::traits::coding_trait::Coding64; @@ -83,12 +84,13 @@ impl CodingTrait for Coding { offset } - fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: Slice) -> usize { - Self::put_varint64(dst, offset, value.size() as u64); + // fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: &Slice) -> usize { + fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value_len: usize) -> usize { + Self::put_varint64(dst, offset, value_len as u64); offset } - fn get_varint32(input: &mut Slice) -> u32 { + fn get_varint32(input: &Slice) -> u32 { let cow = input.borrow_data(); let bytes = cow.as_bytes(); let mut result = 0_u32; @@ -108,7 +110,7 @@ impl CodingTrait for Coding { result } - fn get_varint64(input: &mut Slice) -> u64 { + fn get_varint64(input: &Slice) -> u64 { let cow = input.borrow_data(); let bytes = cow.as_bytes(); let mut result = 0_u64; @@ -133,7 +135,7 @@ impl CodingTrait for Coding { Slice::from_buf(decode.to_le_bytes().as_mut_slice()) } - fn varint_length(mut value: u64) -> i32 { + fn varint_length(mut value: usize) -> usize { let mut len = 1; while value >= 128 { value >>= 7; @@ -143,35 +145,13 @@ impl CodingTrait for Coding { } fn encode_fixed32(value: u32, buf: &mut [u8], mut offset: usize) -> usize { - buf[offset] = value as u8; - offset += 1; - buf[offset] = (value >> 8) as u8; - offset += 1; - buf[offset] = (value >> 16) as u8; - offset += 1; - buf[offset] = (value >> 24) as u8; - offset += 1; - offset + (&mut buf[offset..]).write(&value.to_le_bytes()).unwrap(); + offset+4 } fn encode_fixed64(value: u64, buf: &mut [u8], mut offset: usize) -> usize { - buf[offset] = value as u8; - offset += 1; - buf[offset] = (value >> 8) as u8; - offset += 1; - buf[offset] = (value >> 16) as u8; - offset += 1; - buf[offset] = (value >> 24) as u8; - offset += 1; - buf[offset] = (value >> 32) as u8; - offset += 1; - buf[offset] = (value >> 40) as u8; - offset += 1; - buf[offset] = (value >> 48) as u8; - offset += 1; - buf[offset] = (value >> 56) as u8; - offset += 1; - offset + (&mut buf[offset..]).write(&value.to_le_bytes()).unwrap(); + offset+8 } @@ -197,9 +177,41 @@ impl CodingTrait for Coding { macro_rules! coding_impl { {$TRAIT: ident, $TYPE: ty, $VAR_NAME: ident, $FIXED_NAME: ident} => { impl $TRAIT for $TYPE { + /// 变长正整数编码 + /// + /// # Arguments + /// + /// * `buf`: 目标数组 + /// * `offset`: 偏移量 + /// + /// returns: usize : 编码后的偏移量 + /// + /// # Examples + /// + /// ``` + /// let mut buf: [u8; 4] = [0, 0, 0, 0]; + /// let value: u32 = 65534; + /// let offset = value.varint(&mut buf, 0); + /// ``` fn varint(self, buf: &mut [u8], offset: usize) -> usize { Coding::$VAR_NAME (self, buf, offset) } + /// 定长正整数编码 + /// + /// # Arguments + /// + /// * `buf`: 目标数组 + /// * `offset`: 偏移量 + /// + /// returns: usize : 编码后的偏移量 + /// + /// # Examples + /// + /// ``` + /// let mut buf: [u8; 4] = [0, 0, 0, 0]; + /// let value: u32 = 65534; + /// let offset = value.fixedint(&mut buf, 0); + /// ``` fn fixedint(self, buf: &mut [u8], offset: usize) -> usize { Coding::$FIXED_NAME (self, buf, offset) } diff --git a/src/util/coding_test.rs b/src/util/coding_test.rs index c924acb41d896f96c9038bb993ff5762e28944c9..1531ad0f440208695cb6adbc6f38c3312315a585 100644 --- a/src/util/coding_test.rs +++ b/src/util/coding_test.rs @@ -127,7 +127,7 @@ mod test { #[test] fn test_varint_length() { - let len = Coding::varint_length( 65535 as u64); + let len = Coding::varint_length(65535 as u64 as usize); println!("len: {:?}", len); assert_eq!(len, 3); } diff --git a/src/util/comparator.rs b/src/util/comparator.rs index 5159a95788853e3d27565a1ff1e08f2913b65a05..661a930059cad515d1f52cd324e0e8eb08a5d0b2 100644 --- a/src/util/comparator.rs +++ b/src/util/comparator.rs @@ -1,8 +1,9 @@ - use std::cmp::{min, Ordering}; -use crate::traits::comparator_trait::{Comparator}; + +use crate::traits::comparator_trait::Comparator; use crate::util::slice::Slice; +#[derive(Copy, Clone)] pub struct BytewiseComparatorImpl {} /// @@ -11,14 +12,33 @@ pub struct BytewiseComparatorImpl {} /// 也就是说 i>helloworld,因为先比较i和h,i>h,比较直接结束 impl Default for BytewiseComparatorImpl { fn default() -> Self { - Self{} + Self {} } } +#[allow(improper_ctypes)] +extern { + fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; +} + impl Comparator for BytewiseComparatorImpl { - fn compare(&self, a: &Slice, b: &Slice) -> Option { - a.partial_cmp(b) + fn compare(&self, a: &[u8], b: &[u8]) -> Option { + let min = a.len().min(b.len()); + let cmp = unsafe { + memcmp( + a.as_ptr() as *const i8, + b.as_ptr() as *const i8, + min, + ) + }; + if cmp == 0 { + a.len().partial_cmp(&b.len()) + } else if cmp > 0 { + Some(Ordering::Greater) + } else { + Some(Ordering::Less) + } } fn get_name(&self) -> String { @@ -30,7 +50,7 @@ impl Comparator for BytewiseComparatorImpl { let min_length: usize = min(start.len(), limit.len()); let mut diff_index: usize = 0; - let mut start_char_vec: Vec = start.as_bytes().to_vec(); + let mut start_char_vec: Vec = start.as_bytes().to_vec(); let limit_char_vec: &Vec = &limit.to_vec(); // or use // let start_char_vec: Vec = start.chars().collect::>(); @@ -47,7 +67,7 @@ impl Comparator for BytewiseComparatorImpl { // 如果一个字符串是另个一字符串的前缀,无需做截短操作,否则进入 else。 if diff_index >= min_length { // 说明 start是limit的前缀,或者反之,此时不作修改,直接返回 - } else{ + } else { // 尝试执行字符start[diff_index]++, 设置start长度为diff_index+1,并返回 // ++条件:字符 < oxff 并且字符+1 < limit上该index的字符 let diff_byte: u8 = start_char_vec[diff_index]; @@ -61,9 +81,8 @@ impl Comparator for BytewiseComparatorImpl { } } - let shortest_separator: &[u8] = &start_char_vec[0..diff_index+1]; - - let shortest_separator_val: String= Slice::from_buf(shortest_separator).into(); + let shortest_separator: &[u8] = &start_char_vec[0..diff_index + 1]; + let shortest_separator_val: String = Slice::from_buf(shortest_separator).into(); shortest_separator_val } @@ -72,15 +91,15 @@ impl Comparator for BytewiseComparatorImpl { // 如果找不到说明 key的字符都是 u8::MAX,直接返回 let key_len = key.len(); - let mut key_char_vec: Vec = key.as_bytes().to_vec(); + let mut key_char_vec: Vec = key.as_bytes().to_vec(); for i in 0..key_len { let byte_val: u8 = key_char_vec[i]; if byte_val != u8::MAX { key_char_vec[i] = byte_val + 1; - let short_successor: &[u8] = &key_char_vec[0..i+1]; + let short_successor: &[u8] = &key_char_vec[0..i + 1]; - let short_successor_val: String= Slice::from_buf(short_successor).into(); + let short_successor_val: String = Slice::from_buf(short_successor).into(); return short_successor_val; } } diff --git a/src/util/comparator_test.rs b/src/util/comparator_test.rs index e1ad1b62b0e57ea8971d2b56564c216ccaf9e208..b018bf6588bb960fb610a30288ec466f7bb815eb 100644 --- a/src/util/comparator_test.rs +++ b/src/util/comparator_test.rs @@ -20,12 +20,9 @@ mod test { let option_val = comp.compare(&Slice::from("a"), &Slice::from("ab")); assert_eq!(option_val.unwrap(), Ordering::Less); - // // todo Slice 存在 bug 未修复 - // let comp = BytewiseComparatorImpl::default(); - // let option_val = comp.compare(&Slice::from("b"), &Slice::from("abcd")); - // assert_eq!(option_val.unwrap(), Ordering::Greater); + let option_val = comp.compare(&Slice::from("b"), &Slice::from("abcd")); + assert_eq!(option_val.unwrap(), Ordering::Greater); - let comp = BytewiseComparatorImpl::default(); let option_val = comp.compare(&Slice::from("abcd"), &Slice::from("abcd")); assert_eq!(option_val.unwrap(), Ordering::Equal); } @@ -96,7 +93,7 @@ mod test { // u8max 结尾 let mut u8_vec: Vec = vec![]; - u8_vec.write(&String::from("helloWorld").as_bytes().to_vec()); + u8_vec.write(&String::from("helloWorld").as_bytes().to_vec()).unwrap(); u8_vec.push(u8::MAX); let u8_array_str = String::from(Slice::from_buf(u8_vec.as_slice())); @@ -109,7 +106,7 @@ mod test { // u8max 开头 let mut u8_vec: Vec = vec![]; u8_vec.push(u8::MAX); - u8_vec.write(&String::from("helloWorld").as_bytes().to_vec()); + u8_vec.write(&String::from("helloWorld").as_bytes().to_vec()).unwrap(); let u8_max_str = String::from(Slice::from_buf(u8_vec.as_slice())); let comp = BytewiseComparatorImpl::default(); diff --git a/src/util/const.rs b/src/util/const.rs index a8ffcf84196499637a80e70873f174ae079929b0..4ae23686e47641f2aeab78fe093ba0b9195fa2d2 100644 --- a/src/util/const.rs +++ b/src/util/const.rs @@ -5,3 +5,5 @@ pub const COLON_WHITE_SPACE: &'static str = ": "; /// hash 的默认seed: 0xbc9f1d34 pub const HASH_DEFAULT_SEED: u32 = 0xbc9f1d34; + +pub const DEBUG_ENABLE: bool = true; diff --git a/src/util/debug.rs b/src/util/debug.rs new file mode 100644 index 0000000000000000000000000000000000000000..464919b011763959d8fec31668f7ebd04115d0a6 --- /dev/null +++ b/src/util/debug.rs @@ -0,0 +1,26 @@ + + + +// #[cfg(feature = "debug-macro")] +#[cfg(CORE_DEBUG = "true")] +#[macro_export] +macro_rules! debug { + () => { + std::io::stdout().write("\n".as_bytes()).unwrap(); + }; + ($($arg:tt)*) => {{ + use std::io::Write; + std::io::stdout().write(format!($($arg)*).as_bytes()); + debug!(); + }}; +} + +// #[cfg(not(feature = "debug-macro"))] +#[cfg(not(CORE_DEBUG = "true"))] +#[macro_export] +macro_rules! debug { + () => { + }; + ($($arg:tt)*) => {{ + }}; +} \ No newline at end of file diff --git a/src/util/env.rs b/src/util/env.rs new file mode 100644 index 0000000000000000000000000000000000000000..8bdb966f36a04d5248a0e79b809304a0a3de46a7 --- /dev/null +++ b/src/util/env.rs @@ -0,0 +1,11 @@ +use std::fs::File; +use crate::util::Result; +use crate::util::slice::Slice; + +pub struct Env {} + +impl Env { + pub fn new_writable_file(&self, fname: &Slice) -> Result { + todo!() + } +} \ No newline at end of file diff --git a/src/util/env_test.rs b/src/util/env_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 3ba1ac8cd58dd096b0d497859885db96c87999fd..726e5411b9fb034396b426a875163bd35a6e5741 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -19,8 +19,9 @@ pub trait AsBloomHash { /// 实现了 Slice 转 bloom_hash 的特质 /// Sample: /// ``` -/// let val = "aabbccd"; -/// let slice: Slice = Slice::from_buf(val.as_bytes()); +/// use rand::distributions::Slice; +/// let val = "aabbccd"; +/// let slice = Slice::from_buf(val.as_bytes()); /// let hash_val = slice.bloom_hash(); /// ``` impl AsBloomHash for Slice { @@ -78,11 +79,15 @@ impl FromPolicy for BloomFilterPolicy { impl FilterPolicy for BloomFilterPolicy { fn name(&self) -> String { - String::from("leveldb.BuiltinBloomFilter2") + String::from("leveldb.BuiltinBloomFilter") } - fn create_filter(&self, keys: Vec) -> Slice { - let n: usize = keys.len(); + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + let n: usize = len; let mut bits: usize = n * self.bits_per_key; @@ -99,7 +104,7 @@ impl FilterPolicy for BloomFilterPolicy { dst_chars[bytes] = self.k as u8; for i in 0..n { - let slice = keys.get(i).unwrap(); + let slice = keys[i]; let mut h : u32 = slice.bloom_hash(); let delta : u32 = (h >> 17) | (h << 15); @@ -172,11 +177,20 @@ impl FilterPolicy for InternalFilterPolicy { todo!() } - fn create_filter(&self, keys: Vec) -> Slice { + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 + // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, + // 并把根据这些key创建的filter追加到 dst中。 + // todo!() } fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { todo!() } + } \ No newline at end of file diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index e84ee121de60da5aa508025477dcad7d8ab595bd..bea6d2aafb9bf43e610f680a568aa0859754e1ef 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -33,9 +33,12 @@ fn test_new() { fn test_create_filter() { let policy = BloomFilterPolicy::new(800); - let mut keys : Vec = Vec::new(); - keys.push(Slice::try_from(String::from("hello")).unwrap()); - keys.push(Slice::try_from(String::from("world")).unwrap()); + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from(String::from("world")).unwrap(); + + let mut keys : Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); let bloom_filter: Slice = policy.create_filter(keys); diff --git a/src/util/hash.rs b/src/util/hash.rs index 2b2e0ba316ec83c1b6cad637debd678457a472bc..15a1a0328f2639f07d7d9da34d64e3e822cf8328 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -4,17 +4,16 @@ use std::slice as stds; use crate::traits::coding_trait::CodingTrait; use crate::util::coding::Coding; -use crate::util::crc::AsCrc; + use crate::util::r#const::HASH_DEFAULT_SEED; -use crate::util::slice; + use crate::util::slice::Slice; /// 一种可以计算 hash 的特质 pub trait ToHash { - #[inline] + fn to_hash(&self) -> u32; - #[inline] fn to_hash_with_seed(&self, seed: u32) -> u32; } diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs index 81dce89e612992852a6e784b4a32915532de6bfb..28bf95dea029f916e6b7aacf10472b3106fe8d14 100644 --- a/src/util/hash_test.rs +++ b/src/util/hash_test.rs @@ -1,7 +1,6 @@ use crate::util::hash::{Hash, ToHash}; use crate::util::r#const::HASH_DEFAULT_SEED; use crate::util::slice::Slice; -use std::slice; #[test] fn test_hash() { diff --git a/src/util/linked_list.rs b/src/util/linked_list.rs index 5c1b209abf4e5d26275cdf4a97db2157e524fdb7..8ffb8e61ca1bcce711f04fc343b45886441b036d 100644 --- a/src/util/linked_list.rs +++ b/src/util/linked_list.rs @@ -4,6 +4,8 @@ use crate::util::Result; use crate::util::slice::Slice; use crate::util::status::{LevelError, Status}; +type Link = Option>>; + /// 节点 #[derive(Debug)] struct Node { @@ -11,9 +13,9 @@ struct Node { val: T, // 前驱 // 因为会出现一个节点同时存在多个可变引用的情况,因此需要使用裸指针(裸指针的包装 NonNull) - prev: Option>>, + prev: Link, // 后继. Option 表示该节点为空,即不存在 prev 前置节点(整个链表为空时)、或不存在next 后置节点(链表的尾节点) - next: Option>>, + next: Link, } /// 双向链表 @@ -22,15 +24,22 @@ pub struct LinkedList { // 双向链表的当前长度 length: usize, // 头 - head: Option>>, + head: Link, // 尾 - tail: Option>>, + tail: Link, + // // 内存分配器 + // allocator: Allocator } pub trait LinkedListBuilder: Default { /// 构造函数, 构造空的双向链表 fn new() -> Self; + // /// 指定内存分配器 + // #[inline] + // #[unstable(feature = "allocator_api", issue = "32838")] + // fn new_in(alloc: A) -> Self; + fn length(&self) -> usize; /// 链表末尾添加元素 @@ -118,6 +127,37 @@ pub trait LinkedListBuilder: Default { /// ``` fn add_by_position(&mut self, position: usize, data: T) -> Result; + /// 弹出此列表所代表的堆栈中的元素。(将元素从链表中删除,并且返回) + /// 等价于 pop_last + fn pop(&mut self) -> Option; + + /// Removes the first element from a list and returns it, or `None` if it is empty. + fn pop_first(&mut self) -> Option; + + /// Removes the last element from a list and returns it, or `None` if it is empty. + fn pop_last(&mut self) -> Option; + + /// 查看和返回第一个元素。不可变引用类型 + /// 等价于 peek_first + /// 仅仅返回元素的引用,而元素的所有权还是在链表中 + fn peek(&mut self) -> Option; + + // public E element() 返回第一个元素。 + + /// 查看和返回第一个元素。可变引用类型 + /// 返回元素的可变引用类型,使得能够对链表中的节点元素值进行修改,但是不真正获取元素的所有权! + fn peek_mut(&mut self) -> Option; + + /// 返回头部元素 + fn peek_first(&mut self) -> Option; + + /// 返回尾部元素 + fn peek_last(&mut self) -> Option; + + /// 返回尾部元素 + /// 返回元素的可变引用类型,使得能够对链表中的节点元素值进行修改,但是不真正获取元素的所有权! + fn peek_last_mut(&mut self) -> Option; + /// 删除并返回第一个元素。 fn remove_first(&mut self) -> Result>; @@ -126,7 +166,9 @@ pub trait LinkedListBuilder: Default { /// 删除指定位置的元素并返回。 fn remove(&mut self, position: usize) -> Result>; + // public boolean remove(Object o) 删除某一元素,返回是否成功,成功为 true,失败为 false。 + // public boolean remove(int index) 删除某一位置元素,返回是否成功,成功为 true,失败为 false。 /// 获取列表开头的元素 fn get_first(&self) -> Result>; @@ -166,20 +208,11 @@ pub trait LinkedListBuilder: Default { // public boolean offerFirst(E e) 头部插入元素,返回是否成功,成功为 true,失败为 false。 // public boolean offerLast(E e) 尾部插入元素,返回是否成功,成功为 true,失败为 false。 + // public E poll() 删除并返回第一个元素。 // public E pollFirst() 检索并删除此列表的第一个元素,如果此列表为空,则返回 null 。 // public E pollLast() 检索并删除此列表的最后一个元素,如果此列表为空,则返回 null 。 - // public E pop() 弹出此列表所代表的堆栈中的元素。(将元素从链表中删除,并且返回) - // public E popFirst() - // public E popLast() - - // public E element() 返回第一个元素。 - // public E peek() 返回第一个元素。不可变引用类型 - // public E peek_mut() 返回第一个元素。可变引用类型 - // public E peekFirst() 返回头部元素。 - // public E peekLast() 返回尾部元素。 - // public Iterator descendingIterator() 返回倒序迭代器。 // public ListIterator listIterator(int index) 返回从指定位置开始到末尾的迭代器。 // public Object[] toArray() 返回一个由链表元素组成的数组。 @@ -192,6 +225,9 @@ pub trait LinkedListBuilder: Default { // public int lastIndexOf(Object o) 查找指定元素最后一次出现的索引。 } +pub trait LinkedListBuilderIn: Default { +} + impl Node { fn new(val: T) -> Node { Node { @@ -221,9 +257,20 @@ impl LinkedListBuilder for LinkedList { length: 0, head: None, tail: None, + // allocator: Global } } + // #[inline] + // fn new_in(alloc: A) -> Self { + // Self { + // length: 0, + // head: None, + // tail: None, + // allocator: alloc, + // } + // } + #[inline] fn length(&self) -> usize { self.length @@ -242,6 +289,7 @@ impl LinkedListBuilder for LinkedList { #[inline] fn add_first(&mut self, val: T) -> Result { // 使用入参中的 val 创建一个链表节点Node,为了方便后续直接从 Box 获取到 raw ptr 裸指针, 使用 Box 包装 + // Box.new_in(v, 自定义 ) let mut node = Box::new(Node::new(val)); node.next = self.head; @@ -318,6 +366,38 @@ impl LinkedListBuilder for LinkedList { Ok(true) } + fn pop(&mut self) -> Option { + self.pop_last() + } + + fn pop_first(&mut self) -> Option { + todo!() + } + + fn pop_last(&mut self) -> Option { + todo!() + } + + fn peek(&mut self) -> Option { + self.peek_first() + } + + fn peek_mut(&mut self) -> Option { + todo!() + } + + fn peek_first(&mut self) -> Option { + todo!() + } + + fn peek_last(&mut self) -> Option { + todo!() + } + + fn peek_last_mut(&mut self) -> Option { + todo!() + } + fn remove_first(&mut self) -> Result> { todo!() } diff --git a/src/util/mem_debug.rs b/src/util/mem_debug.rs new file mode 100644 index 0000000000000000000000000000000000000000..bef8a837f271a8644bd1d2365d2e17c3dba126a4 --- /dev/null +++ b/src/util/mem_debug.rs @@ -0,0 +1,12 @@ +use std::ffi::{c_char, c_void}; +use std::ptr::{null, null_mut}; + +extern "C" fn write_cb(_: *mut c_void, message: *const c_char) { + print!("{}", String::from_utf8_lossy(unsafe { + std::ffi::CStr::from_ptr(message as *const i8).to_bytes() + })); +} + +pub fn mem_print() { + unsafe { jemalloc_sys::malloc_stats_print(Some(write_cb), null_mut(), null()) } +} \ No newline at end of file diff --git a/src/util/mod.rs b/src/util/mod.rs index 3291ada92977f5b76f9281e8d6ab7940be6adf22..527f6e559e38757531de80316b2beabdc9b252c7 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,4 +1,3 @@ -use std::rc::Rc; use std::result; pub use arena::Arena; @@ -31,15 +30,20 @@ mod filter_policy_test; pub mod histogram; mod histogram_test; -mod hash; +pub mod hash; mod hash_test; -mod mutex_lock; +pub mod mutex_lock; mod mutex_lock_test; pub mod random; mod random_test; pub mod options; +pub mod debug; pub mod linked_list; mod linked_list_test; +pub mod unsafe_slice; +pub mod env; +mod env_test; +pub mod mem_debug; /// 定义别名 pub type Result = result::Result; diff --git a/src/util/mutex_lock.rs b/src/util/mutex_lock.rs index 84e6d65acfc23d92adb8510dbf052674c53f70e1..c6c4d77632e91041d8981a2e469435f0c4b5523a 100644 --- a/src/util/mutex_lock.rs +++ b/src/util/mutex_lock.rs @@ -1,4 +1,4 @@ -use std::ops::Deref; + use std::sync::{Arc, LockResult, Mutex, MutexGuard, TryLockResult}; pub struct Lock { diff --git a/src/util/mutex_lock_test.rs b/src/util/mutex_lock_test.rs index 7273a71d7ae2bd90dbf01f4a186800a4cc51a2b4..f442b372f9f2aa41a54bc1aeebf44f5596d9f002 100644 --- a/src/util/mutex_lock_test.rs +++ b/src/util/mutex_lock_test.rs @@ -1,6 +1,5 @@ mod test { use std::thread; - use crate::util::mutex_lock::MutexLock; #[test] diff --git a/src/util/options.rs b/src/util/options.rs index a63c9f675ae203452e3f73603a0f13cdb4024253..ba51a8c5c24ad601eddb986b0e573d2bf6775c60 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -1,19 +1,28 @@ +use std::sync::Arc; use crate::db::db::Snapshot; +use crate::db::db_format::InternalKeyComparator; use crate::traits::comparator_trait::Comparator; +use crate::traits::filter_policy_trait::{FilterPolicy, FilterPolicyPtr}; use crate::util::comparator::BytewiseComparatorImpl; +use crate::util::env::Env; + +/// Options 的 `Arc>` 别名 +pub type OptionsPtr = Arc>; pub enum CompressionType { NoCompression, SnappyCompression } -/// TODO temp -pub struct Env {} - pub struct Cache {} -pub struct FilterPolicy {} +// 使用如下定义(后续路径会重构) +// use crate::traits::filter_policy_trait::FilterPolicy; +// pub struct FilterPolicy {} +// pub cmp: Box, +// | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the trait `Clone` is not implemented for `dyn Comparator` +// #[derive(Clone)] pub struct Options { /// Comparator used to define the order of keys in the table. @@ -95,7 +104,7 @@ pub struct Options { /// If non-null, use the specified filter policy to reduce disk reads. /// Many applications will benefit from passing the result of /// NewBloomFilterPolicy() here. - pub filter_policy: Option, + pub filter_policy: Option, } /// Options that control read operations pub struct ReadOptions { @@ -132,7 +141,7 @@ pub struct WriteOptions { impl Default for Options { fn default() -> Self { Self { - cmp: Box::new(BytewiseComparatorImpl::default()), + cmp: Box::new(InternalKeyComparator::default()), create_if_missing: false, error_if_exists: false, paranoid_checks: false, diff --git a/src/util/slice.rs b/src/util/slice.rs index 26ea8b17359ad73bd5484abfe492ba504dc74da5..1f923b8f8c61d1ca03cd47a75658d7f5ef6237f1 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -1,6 +1,8 @@ use std::mem; use std::borrow::Cow; use std::cmp::Ordering; +use std::fmt::{Display, Formatter}; +use std::mem::ManuallyDrop; use std::ops::Deref; #[derive(Debug)] @@ -40,6 +42,12 @@ impl Slice { } } + #[inline] + pub unsafe fn from_raw_parts(ptr: *mut u8, len: usize) -> Self { + let data = Vec::from_raw_parts(ptr, len, len); + Self { data } + } + /// 获取 slice 长度 #[inline] pub fn size(&self) -> usize { @@ -96,11 +104,15 @@ impl Slice { } } + pub fn as_str(&self) -> &str { + let s = self.as_ref(); + std::str::from_utf8(s).unwrap() + } } impl<'a> Slice { /// 借取 Slice 中的数据, 调用方只拥有读权限 - pub fn borrow_data(&mut self) -> Cow<'a, String> { + pub fn borrow_data(&self) -> Cow<'a, String> { unsafe { // String & Vec has the same layout let s: &String = mem::transmute(&self.data); @@ -109,6 +121,13 @@ impl<'a> Slice { } } +impl Clone for Slice { + fn clone(&self) -> Self { + let data = self.data.clone(); + Slice::from_vec(data) + } +} + impl From for String { /// 将 Slice 内数据的所有权移交给 String #[inline] @@ -135,6 +154,13 @@ impl > From for Slice { } } +impl AsRef<[u8]> for Slice { + #[inline] + fn as_ref(&self) -> &[u8] { + self.data.as_slice() + } +} + impl PartialEq for Slice { /// 判断两个 Slice 是否相同 #[inline] @@ -152,24 +178,20 @@ impl PartialEq for Slice { impl PartialOrd for Slice { /// 判断两个 slice 的大小关系 fn partial_cmp(&self, other: &Self) -> Option { - match self.size().partial_cmp(&other.size()) { - Some(Ordering::Equal) => { - let cmp = unsafe { - memcmp( - self.data.as_ptr() as *const i8, - other.data.as_ptr() as *const i8, - self.size(), - ) - }; - if cmp == 0 { - Some(Ordering::Equal) - } else if cmp > 0 { - Some(Ordering::Greater) - } else { - Some(Ordering::Less) - } - } - op => op + let min = self.size().min(other.size()); + let cmp = unsafe { + memcmp( + self.data.as_ptr() as *const i8, + other.data.as_ptr() as *const i8, + min, + ) + }; + if cmp == 0 { + self.size().partial_cmp(&other.size()) + } else if cmp > 0 { + Some(Ordering::Greater) + } else { + Some(Ordering::Less) } } } @@ -195,3 +217,16 @@ impl Deref for Slice { } } +impl Display for Slice { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + unsafe { + let string = ManuallyDrop::new( + String::from_raw_parts( + self.as_ptr() as *mut u8, + self.data.len(), + self.data.capacity()) + ); + f.write_str(string.as_str()) + } + } +} diff --git a/src/util/slice_test.rs b/src/util/slice_test.rs index a2e0ec5780b3916957f2eeb2538eeb116ff81c7f..c56f7a4b077c51a2a01e455299b125ef58bdc9b0 100644 --- a/src/util/slice_test.rs +++ b/src/util/slice_test.rs @@ -40,7 +40,7 @@ mod test { #[test] fn test_borrow_data() { - let mut a0 = Slice::from("123"); + let a0 = Slice::from("123"); let borrowed = a0.borrow_data(); assert_eq!(3, borrowed.len()); let owned = borrowed.to_owned(); @@ -88,7 +88,7 @@ mod test { #[test] fn test_merge2() { let mut a0 = Slice::from("123"); - let mut a2 = Slice::from("456"); + let a2 = Slice::from("456"); a0.merge(a2, None); assert_eq!(String::from("123456"), String::from(a0)); } diff --git a/src/util/status.rs b/src/util/status.rs index 20493355fe69b7d68d1283013845b385c613d43f..97b33d6ecb53e76dd74a8320c3a8cc2add26bad9 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -1,9 +1,10 @@ use std::fmt::{Display, Formatter}; use std::io; use std::ops::Deref; +use std::sync::PoisonError; use crate::util::r#const::COLON_WHITE_SPACE; use crate::util::slice::Slice; -use crate::util::status::LevelError::{KCorruption, KIOError, KInvalidArgument, KNotSupported, KNotFound, KOk, KBadRecord}; +use crate::util::status::LevelError::{KCorruption, KIOError, KInvalidArgument, KNotSupported, KNotFound, KOk, KBadRecord, KRepeatedRecord}; /// db 中的返回状态,将错误号和错误信息封装成Status类,统一进行处理。 /// 在 leveldb的实现里, 为了节省空间Status将返回码(code), 错误信息message及长度打包存储于一个字符串数组中, 来存储错误信息。 @@ -102,10 +103,13 @@ impl Status { self.err.is_invalid_argument() } - pub fn get_error_string(&self) -> String { - self.err.to_string() + pub fn get_msg(&self) -> String { + let msg = &self.msg; + + String::from(msg.as_str()) } + /// 得到 LevelError /// 请注意, err 的所有权会发生转移!!! pub fn get_error(self) -> LevelError { self.err @@ -158,6 +162,7 @@ impl Status { KInvalidArgument => "Invalid argument: ", KIOError => "IO error: ", KBadRecord=> "wal bad record", + KRepeatedRecord => "repeated record" }; if self.err.is_ok() { @@ -204,6 +209,7 @@ pub enum LevelError { KInvalidArgument, KIOError, KBadRecord, + KRepeatedRecord, } impl LevelError { @@ -231,6 +237,10 @@ impl LevelError { matches!(*self, KInvalidArgument) } + pub fn is_repeated_record(&self) -> bool { + matches!(self, KRepeatedRecord) + } + pub fn ok() -> Status { Status{ err: Default::default(), @@ -270,6 +280,10 @@ impl LevelError { } } + pub fn corruption_string(msg: &str, msg2: &str) -> Status { + LevelError::corruption(Slice::from(msg), Slice::from(msg2)) + } + pub fn not_supported(mut msg: Slice, msg2: Slice) -> Status { let _ = &msg.merge(msg2, Some(String::from(COLON_WHITE_SPACE))); @@ -288,6 +302,14 @@ impl LevelError { } } + #[inline] + pub fn repeated_record(msg: Slice) -> Status { + Status { + err: KRepeatedRecord, + msg + } + } + /// 生成 LevelError.KIOError /// /// # Arguments @@ -320,7 +342,8 @@ impl LevelError { KNotSupported => 3, KInvalidArgument => 4, KIOError => 5, - KBadRecord => 6 + KBadRecord => 6, + KRepeatedRecord => 7 }; le @@ -361,6 +384,7 @@ impl TryFrom for LevelError { 4 => Ok(KInvalidArgument), 5 => Ok(KIOError), 6 => Ok(KBadRecord), + 7 => Ok(KRepeatedRecord), // all other numbers _ => Err(String::from(format!("Unknown code: {}", value))) } @@ -373,6 +397,12 @@ impl From for Status { } } +impl From> for Status { + fn from(_value: PoisonError) -> Self { + Status::wrapper(KCorruption, "PoisonError".into()) + } +} + impl Display for LevelError { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { @@ -386,6 +416,7 @@ impl Display for LevelError { KInvalidArgument => "Invalid argument: ", KIOError => "IO error: ", KBadRecord => "wal bad record: ", + KRepeatedRecord => "repeated record: ", }; print.push_str(msg_type); diff --git a/src/util/status_test.rs b/src/util/status_test.rs index baad8329e2ec23eb0947c41ace8fff6759b67772..c186ceea0ce27595744696872608d4a07997b029 100644 --- a/src/util/status_test.rs +++ b/src/util/status_test.rs @@ -37,6 +37,9 @@ mod test { String::from(msg2).into()); assert!(&err.is_corruption()); + let err1: Status = LevelError::corruption_string("AAaaa", "bbhugy"); + assert!(&err1.is_corruption()); + let err: Status = LevelError::not_found(String::from(msg1).into(), String::from(msg2).into()); assert!(&err.is_not_found()); @@ -97,7 +100,7 @@ mod test { } #[test] - fn test_level_error_toString() { + fn test_level_error_to_string() { // ok let status: Status = LevelError::ok(); assert_eq!("OK", status.to_string()); diff --git a/src/util/unsafe_slice.rs b/src/util/unsafe_slice.rs new file mode 100644 index 0000000000000000000000000000000000000000..c48d2bb536aa3d4d73049d43bed992e46b3ef5c9 --- /dev/null +++ b/src/util/unsafe_slice.rs @@ -0,0 +1,107 @@ +use std::alloc::{alloc, Layout}; +use std::fmt::{Display, Formatter}; +use std::io::Write; +use std::mem::ManuallyDrop; + +use crate::util::arena::ArenaRef; +use crate::util::Result; +use crate::util::slice::Slice; + +/// 提供一种将其它结构体转为 UnsafeSlice 的特质 +pub trait TryIntoUnsafeSlice { + /// 尝试将结构体通过 arena 内存分配器,构造出一个新的 UnsafeSlice + fn try_into_unsafe_slice(&self, arena: ArenaRef) -> Result; +} + +/// 内存不安全的 Slice, 内存由 Arena 分配和管理。 +/// 实现了 Copy 语义,有更高效的读 api +#[derive(Copy, Clone)] +pub struct UnsafeSlice { + ptr: *mut u8, + len: usize, +} + +impl UnsafeSlice { + + /// 利用 arena 生成 UnsafeSlice + pub fn new_with_arena>(data: B, arena: ArenaRef) -> Result { + let mut lock = arena.lock()?; + let src = data.as_ref(); + let mut buf = lock.allocate(src.len()); + buf.write(src)?; + Ok(Self { + ptr: buf.as_mut_ptr(), + len: buf.len(), + }) + } + + + + #[inline] + pub fn len(&self) -> usize { + self.len + } + + #[inline] + pub fn as_str(&self) -> &str { + unsafe { + core::str::from_utf8_unchecked(self.as_ref()) + } + } +} + +impl UnsafeSlice { + + /// 返回子串。这个方法是高效的,在内部只复制了裸指针偏的移量。 + pub unsafe fn sub_slice(&self, start: usize, len: usize) -> Self { + assert!(start + len < self.len, "sub_slice out of range"); + Self { + ptr: self.ptr.offset(start as isize), + len, + } + } + + /// 生成 Slice 串,由于 Slice 是内存安全的,所以实现上会有内存拷贝。 + /// 高性能场景优先考虑 UnsafeSlice + pub fn to_slice(&self) -> Slice { + unsafe { + let raw_ptr = alloc(Layout::from_size_align_unchecked(self.len, 8)); + Slice::from_raw_parts(raw_ptr, self.len) + } + } +} + +impl AsRef<[u8]> for UnsafeSlice { + + #[inline] + fn as_ref(&self) -> &[u8] { + unsafe { + core::slice::from_raw_parts(self.ptr, self.len) + } + } +} + +impl Display for UnsafeSlice { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + unsafe { + let string = ManuallyDrop::new( + String::from_raw_parts(self.ptr, self.len, self.len) + ); + f.write_str(string.as_str()) + } + } +} + +impl TryIntoUnsafeSlice for &str { + #[inline] + fn try_into_unsafe_slice(&self, arena: ArenaRef) -> Result { + UnsafeSlice::new_with_arena(self.as_bytes(), arena) + } +} + +impl TryIntoUnsafeSlice for String { + #[inline] + fn try_into_unsafe_slice(&self, arena: ArenaRef) -> Result { + UnsafeSlice::new_with_arena(self.as_bytes(), arena) + } +} \ No newline at end of file