From 2b610f5f7dd89a02dde4baa89afbb54795f971da Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Tue, 17 Jan 2023 18:28:36 +0800 Subject: [PATCH 01/50] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=83=A8=E5=88=86?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 2 +- src/db/skip_list.rs | 78 ++++++++++++++++++++++++++++++++++++++++++--- src/lib.rs | 1 + 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6f688ef..c05ad50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] - +rand = "0.8.5" [profile.dev] diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 144c0b6..ff6d102 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -1,3 +1,5 @@ +use std::cmp::Ordering; +use std::ptr::NonNull; use std::rc::Rc; use crate::traits::comparator_trait::ComparatorTrait; @@ -6,26 +8,71 @@ use crate::util::comparator::BytewiseComparatorImpl; use crate::util::Result; use crate::util::slice::Slice; +use rand::prelude::*; +use crate::util::status::{LevelError, Status}; + // todo struct Node { - value: T, + /// 存储的值 + key: T, + /// 最大深度 + max_level: u8, + /// 柔性数组, 该节点层下存储的指向后方的节点 + next: Vec>>, } pub struct SkipList { - node: Option>, + /// 最高层数 + level: u8, + /// 最高层数 + max_level: u8, + /// 存储数据数量 + num: usize, + /// 头部指针 + head: Option>, + /// 比较器 comp: Rc, + /// 内存分配器 + arena: Rc, } impl SkipList { - pub fn create(comparator: Rc, _arena: Rc) -> Self { + pub fn create(comparator: Rc, arena: Rc) -> Self { Self { - node: None, + level: 0, + max_level: 8, + num: 0, + head: None, comp: comparator, + arena, } } - pub fn insert(&mut self, _seq_no: usize, _key: &Slice) -> Result<()> { + pub fn insert(&mut self, key: &Slice) -> Result<()> { + if self.head.is_none() { + self.head = Some(Node::create(key)); + self.level = 1; + self.num = 1; + self.max_level = 1; + return Ok(()) + } + let pre = self.head.unwrap(); + let mut next = true; + while next { + match self.comp.compare(&pre.key, key) { + None => Err(Status::wrapper(LevelError::KInvalidArgument, "key not comparable".into())); + Some(Ordering::Equal) => { + + }, + Some(Ordering::Less) => { + + }, + Some(Ordering::Greater) => { + + } + } + } todo!() } @@ -36,4 +83,25 @@ impl SkipList { pub fn get_max_height(&self) -> usize { todo!() } + + fn rnd_level(&self) -> u8 { + let mut level = 1; + for _ in 1..self.max_level { + if random() { + level += 1; + } + } + level + } + +} + +impl Node { + fn create(key: T) -> Self { + Self { + key, + max_level: 1, + next: Vec::with_capacity(4), + } + } } \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 064907a..a915437 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![feature(box_syntax)] +#![feature(let_else)] mod db; mod table; -- Gitee From babdefd9d5fc4cc85dbcecaa0170e3617a70b6a6 Mon Sep 17 00:00:00 2001 From: fengyang Date: Tue, 31 Jan 2023 20:37:48 +0800 Subject: [PATCH 02/50] =?UTF-8?q?=E5=8F=8C=E5=90=91=E9=93=BE=E8=A1=A8?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 12 +++---- src/util/linked_list.rs | 75 +++++++++++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index c31a5a8..d490375 100644 --- a/README.md +++ b/README.md @@ -74,22 +74,22 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | util.Logger | peach | | | table.Block, BlockBuilder, FilterBlockBuilder | colagy | | | FilterBlock, FilterBlockReader | colagy | | -| table.format(Footer, BlockHandle) | fengyang、半支烟 | | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang、半支烟 | | +| table.format(Footer, BlockHandle) | fengyang、半支烟 | 20% | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang、半支烟 | 20% | | db.SkipList | wangboo | | | table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | | IteratorWrapper | kazeseiriou | | | db.MemTable(MemTable, MemTableIterator) | wangboo | | -| SSTable | fengyang | | +| SSTable | fengyang | 0% | | table.Table | peach | | | db.leveldb_util | wangboo | | | db.log_format | wangboo | | | db.LogReader | wangboo | 90% | | db.LogWriter | wangboo | 90% | | db.TableCache | colagy | | -| LinkedList | fengyang | | -| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | | -| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | | +| LinkedList | fengyang | 60% | +| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 10% | +| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 10% | | WriteBatch | peach | | #### 1.1.0 计划 diff --git a/src/util/linked_list.rs b/src/util/linked_list.rs index 5c1b209..4736f7f 100644 --- a/src/util/linked_list.rs +++ b/src/util/linked_list.rs @@ -118,6 +118,37 @@ pub trait LinkedListBuilder: Default { /// ``` fn add_by_position(&mut self, position: usize, data: T) -> Result; + /// 弹出此列表所代表的堆栈中的元素。(将元素从链表中删除,并且返回) + /// 等价于 pop_last + fn pop(&mut self) -> Option; + + /// Removes the first element from a list and returns it, or `None` if it is empty. + fn pop_first(&mut self) -> Option; + + /// Removes the last element from a list and returns it, or `None` if it is empty. + fn pop_last(&mut self) -> Option; + + /// 查看和返回第一个元素。不可变引用类型 + /// 等价于 peek_first + /// 仅仅返回元素的引用,而元素的所有权还是在链表中 + fn peek(&mut self) -> Option; + + // public E element() 返回第一个元素。 + + /// 查看和返回第一个元素。可变引用类型 + /// 返回元素的可变引用类型,使得能够对链表中的节点元素值进行修改,但是不真正获取元素的所有权! + fn peek_mut(&mut self) -> Option; + + /// 返回头部元素 + fn peek_first(&mut self) -> Option; + + /// 返回尾部元素 + fn peek_last(&mut self) -> Option; + + /// 返回尾部元素 + /// 返回元素的可变引用类型,使得能够对链表中的节点元素值进行修改,但是不真正获取元素的所有权! + fn peek_last_mut(&mut self) -> Option; + /// 删除并返回第一个元素。 fn remove_first(&mut self) -> Result>; @@ -126,7 +157,9 @@ pub trait LinkedListBuilder: Default { /// 删除指定位置的元素并返回。 fn remove(&mut self, position: usize) -> Result>; + // public boolean remove(Object o) 删除某一元素,返回是否成功,成功为 true,失败为 false。 + // public boolean remove(int index) 删除某一位置元素,返回是否成功,成功为 true,失败为 false。 /// 获取列表开头的元素 fn get_first(&self) -> Result>; @@ -170,16 +203,6 @@ pub trait LinkedListBuilder: Default { // public E pollFirst() 检索并删除此列表的第一个元素,如果此列表为空,则返回 null 。 // public E pollLast() 检索并删除此列表的最后一个元素,如果此列表为空,则返回 null 。 - // public E pop() 弹出此列表所代表的堆栈中的元素。(将元素从链表中删除,并且返回) - // public E popFirst() - // public E popLast() - - // public E element() 返回第一个元素。 - // public E peek() 返回第一个元素。不可变引用类型 - // public E peek_mut() 返回第一个元素。可变引用类型 - // public E peekFirst() 返回头部元素。 - // public E peekLast() 返回尾部元素。 - // public Iterator descendingIterator() 返回倒序迭代器。 // public ListIterator listIterator(int index) 返回从指定位置开始到末尾的迭代器。 // public Object[] toArray() 返回一个由链表元素组成的数组。 @@ -318,6 +341,38 @@ impl LinkedListBuilder for LinkedList { Ok(true) } + fn pop(&mut self) -> Option { + self.pop_last() + } + + fn pop_first(&mut self) -> Option { + todo!() + } + + fn pop_last(&mut self) -> Option { + todo!() + } + + fn peek(&mut self) -> Option { + self.peek_first() + } + + fn peek_mut(&mut self) -> Option { + todo!() + } + + fn peek_first(&mut self) -> Option { + todo!() + } + + fn peek_last(&mut self) -> Option { + todo!() + } + + fn peek_last_mut(&mut self) -> Option { + todo!() + } + fn remove_first(&mut self) -> Result> { todo!() } -- Gitee From f4a2c285dfaffa74429857c18bf778ddfac9fdc7 Mon Sep 17 00:00:00 2001 From: fengyang Date: Sat, 4 Feb 2023 19:52:48 +0800 Subject: [PATCH 03/50] =?UTF-8?q?=E5=8F=8C=E5=90=91=E9=93=BE=E8=A1=A8?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 1 + src/util/linked_list.rs | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 064907a..ae56398 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![feature(box_syntax)] +#![feature(allocator_api)] mod db; mod table; diff --git a/src/util/linked_list.rs b/src/util/linked_list.rs index 4736f7f..f451302 100644 --- a/src/util/linked_list.rs +++ b/src/util/linked_list.rs @@ -1,3 +1,4 @@ +use std::alloc::{Allocator, Global}; use std::fmt::{Display, Formatter}; use std::ptr::NonNull; use crate::util::Result; @@ -18,6 +19,7 @@ struct Node { /// 双向链表 #[derive(Debug)] +#[allow(missing_debug_implementations)] pub struct LinkedList { // 双向链表的当前长度 length: usize, @@ -25,12 +27,19 @@ pub struct LinkedList { head: Option>>, // 尾 tail: Option>>, + // 内存分配器 + allocator: Allocator } -pub trait LinkedListBuilder: Default { +pub trait LinkedListBuilder: Default { /// 构造函数, 构造空的双向链表 fn new() -> Self; + /// 指定内存分配器 + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + fn new_in(alloc: A) -> Self; + fn length(&self) -> usize; /// 链表末尾添加元素 @@ -215,6 +224,9 @@ pub trait LinkedListBuilder: Default { // public int lastIndexOf(Object o) 查找指定元素最后一次出现的索引。 } +pub trait LinkedListBuilderIn: Default { +} + impl Node { fn new(val: T) -> Node { Node { @@ -244,6 +256,18 @@ impl LinkedListBuilder for LinkedList { length: 0, head: None, tail: None, + allocator: Global + } + } + + // #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global + #[inline] + fn new_in(alloc: A) -> Self { + Self { + length: 0, + head: None, + tail: None, + allocator: alloc, } } @@ -265,6 +289,7 @@ impl LinkedListBuilder for LinkedList { #[inline] fn add_first(&mut self, val: T) -> Result { // 使用入参中的 val 创建一个链表节点Node,为了方便后续直接从 Box 获取到 raw ptr 裸指针, 使用 Box 包装 + // Box.new_in(v, 自定义 ) let mut node = Box::new(Node::new(val)); node.next = self.head; -- Gitee From 748f3458f73ce5b05d2e9169748802631da68caf Mon Sep 17 00:00:00 2001 From: xiao Date: Sat, 4 Feb 2023 20:58:08 +0800 Subject: [PATCH 04/50] doc --- src/lib.rs | 1 - src/util/linked_list.rs | 37 ++++++++++++++----------------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ae56398..064907a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,4 @@ #![feature(box_syntax)] -#![feature(allocator_api)] mod db; mod table; diff --git a/src/util/linked_list.rs b/src/util/linked_list.rs index f451302..87c2dd0 100644 --- a/src/util/linked_list.rs +++ b/src/util/linked_list.rs @@ -1,4 +1,3 @@ -use std::alloc::{Allocator, Global}; use std::fmt::{Display, Formatter}; use std::ptr::NonNull; use crate::util::Result; @@ -19,7 +18,6 @@ struct Node { /// 双向链表 #[derive(Debug)] -#[allow(missing_debug_implementations)] pub struct LinkedList { // 双向链表的当前长度 length: usize, @@ -27,18 +25,17 @@ pub struct LinkedList { head: Option>>, // 尾 tail: Option>>, - // 内存分配器 - allocator: Allocator + // // 内存分配器 + // allocator: Box } -pub trait LinkedListBuilder: Default { +pub trait LinkedListBuilder: Default { /// 构造函数, 构造空的双向链表 fn new() -> Self; - /// 指定内存分配器 - #[inline] - #[unstable(feature = "allocator_api", issue = "32838")] - fn new_in(alloc: A) -> Self; + // /// 指定内存分配器 + // #[inline] + // fn new_in(alloc: Box) -> Self; fn length(&self) -> usize; @@ -224,9 +221,6 @@ pub trait LinkedListBuilder Self { - Self { - length: 0, - head: None, - tail: None, - allocator: alloc, - } - } + // fn new_in(alloc: Box) -> Self { + // Self { + // length: 0, + // head: None, + // tail: None, + // allocator: alloc + // } + // } #[inline] fn length(&self) -> usize { -- Gitee From cbdfa08b1d789b77346488d260056e515a558aac Mon Sep 17 00:00:00 2001 From: fengyang Date: Fri, 10 Feb 2023 18:40:11 +0800 Subject: [PATCH 05/50] =?UTF-8?q?=E5=8F=8C=E5=90=91=E9=93=BE=E8=A1=A8?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/util/linked_list.rs | 46 ++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/util/linked_list.rs b/src/util/linked_list.rs index f451302..10199b0 100644 --- a/src/util/linked_list.rs +++ b/src/util/linked_list.rs @@ -5,6 +5,8 @@ use crate::util::Result; use crate::util::slice::Slice; use crate::util::status::{LevelError, Status}; +type Link = Option>>; + /// 节点 #[derive(Debug)] struct Node { @@ -12,33 +14,32 @@ struct Node { val: T, // 前驱 // 因为会出现一个节点同时存在多个可变引用的情况,因此需要使用裸指针(裸指针的包装 NonNull) - prev: Option>>, + prev: Link, // 后继. Option 表示该节点为空,即不存在 prev 前置节点(整个链表为空时)、或不存在next 后置节点(链表的尾节点) - next: Option>>, + next: Link, } /// 双向链表 #[derive(Debug)] -#[allow(missing_debug_implementations)] pub struct LinkedList { // 双向链表的当前长度 length: usize, // 头 - head: Option>>, + head: Link, // 尾 - tail: Option>>, - // 内存分配器 - allocator: Allocator + tail: Link, + // // 内存分配器 + // allocator: Allocator } -pub trait LinkedListBuilder: Default { +pub trait LinkedListBuilder: Default { /// 构造函数, 构造空的双向链表 fn new() -> Self; - /// 指定内存分配器 - #[inline] - #[unstable(feature = "allocator_api", issue = "32838")] - fn new_in(alloc: A) -> Self; + // /// 指定内存分配器 + // #[inline] + // #[unstable(feature = "allocator_api", issue = "32838")] + // fn new_in(alloc: A) -> Self; fn length(&self) -> usize; @@ -256,20 +257,19 @@ impl LinkedListBuilder for LinkedList { length: 0, head: None, tail: None, - allocator: Global + // allocator: Global } } - // #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global - #[inline] - fn new_in(alloc: A) -> Self { - Self { - length: 0, - head: None, - tail: None, - allocator: alloc, - } - } + // #[inline] + // fn new_in(alloc: A) -> Self { + // Self { + // length: 0, + // head: None, + // tail: None, + // allocator: alloc, + // } + // } #[inline] fn length(&self) -> usize { -- Gitee From 506151e0b59fbd0c3dcd690be7d278a70bc900d2 Mon Sep 17 00:00:00 2001 From: fengyang Date: Fri, 10 Feb 2023 18:41:53 +0800 Subject: [PATCH 06/50] =?UTF-8?q?=E5=8F=8C=E5=90=91=E9=93=BE=E8=A1=A8?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/util/linked_list.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/util/linked_list.rs b/src/util/linked_list.rs index 10199b0..3cab807 100644 --- a/src/util/linked_list.rs +++ b/src/util/linked_list.rs @@ -1,4 +1,3 @@ -use std::alloc::{Allocator, Global}; use std::fmt::{Display, Formatter}; use std::ptr::NonNull; use crate::util::Result; @@ -225,7 +224,7 @@ pub trait LinkedListBuilder: Default { // public int lastIndexOf(Object o) 查找指定元素最后一次出现的索引。 } -pub trait LinkedListBuilderIn: Default { +pub trait LinkedListBuilderIn: Default { } impl Node { -- Gitee From e63dc99a9210abd29371125746167bac079a275e Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 16:47:20 +0800 Subject: [PATCH 07/50] =?UTF-8?q?=E9=83=A8=E5=88=86skiplist=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/mod.rs | 5 +- src/db/skip_list.rs | 404 ++++++++++++++++++++++++++++++++++----- src/db/skip_list_test.rs | 65 +++++++ src/lib.rs | 3 + src/util/arena.rs | 30 ++- src/util/const.rs | 2 + src/util/debug.rs | 17 ++ src/util/mod.rs | 3 +- src/util/slice.rs | 25 +++ src/util/status.rs | 18 +- 10 files changed, 513 insertions(+), 59 deletions(-) create mode 100644 src/db/skip_list_test.rs create mode 100644 src/util/debug.rs diff --git a/src/db/mod.rs b/src/db/mod.rs index 5a34cf6..6ce1187 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,5 +1,5 @@ -use crate::db::skip_list::SkipList; use crate::db::mem_table::MemTable; +use crate::db::skip_list::SkipList; use crate::util::comparator::{BytewiseComparatorImpl, InternalKeyComparator}; use crate::util::slice::Slice; @@ -9,8 +9,9 @@ mod log_wr_test; pub mod skip_list; pub mod mem_table; pub mod db; +mod skip_list_test; /// 默认调表 -pub type DefaultSkipList = SkipList; +pub type DefaultSkipList = SkipList; /// 默认内存表 pub type DefaultMemTable = MemTable; \ No newline at end of file diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index ff6d102..628f87c 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -1,107 +1,405 @@ -use std::cmp::Ordering; -use std::ptr::NonNull; -use std::rc::Rc; +use std::cell::{Cell, RefCell}; +use std::cmp::{max, min, Ordering}; +use std::fmt::format; +use std::mem; +use std::mem::{ManuallyDrop, size_of}; +use std::ops::{Deref, DerefMut}; +use std::ptr::{NonNull, null_mut}; +use std::sync::Arc; + +use rand::prelude::*; +use crate::debug; use crate::traits::comparator_trait::ComparatorTrait; use crate::util::Arena; +use crate::util::arena::{ArenaAllocLike, ArenaRef}; use crate::util::comparator::BytewiseComparatorImpl; use crate::util::Result; use crate::util::slice::Slice; - -use rand::prelude::*; use crate::util::status::{LevelError, Status}; +type RawNode = *mut Node; + +const MAX_LEVEL: usize = 8; + // todo -struct Node { - /// 存储的值 - key: T, - /// 最大深度 - max_level: u8, - /// 柔性数组, 该节点层下存储的指向后方的节点 - next: Vec>>, +struct Node { + /// 存储的值, 如果为空,则是头指针或者尾指针 + key: Option, + /// 数组元素首地址,代表一个数组,指向每层的下一个节点。 + next_elems: *mut RawNode, + /// 当前节点高度 + level: usize, } -pub struct SkipList { - /// 最高层数 - level: u8, +pub struct SkipList { /// 最高层数 - max_level: u8, + level: usize, /// 存储数据数量 num: usize, /// 头部指针 - head: Option>, + head: RawNode, + /// 尾指针 + tail: RawNode, /// 比较器 - comp: Rc, + cmp: Arc, /// 内存分配器 - arena: Rc, + arena: ArenaRef, } -impl SkipList { +pub struct Iter<'a, Cmp: ComparatorTrait> { + list: &'a SkipList, + node: RawNode, +} - pub fn create(comparator: Rc, arena: Rc) -> Self { +impl SkipList { + pub fn create(comparator: Arc, arena: ArenaRef) -> Self { Self { level: 0, - max_level: 8, num: 0, - head: None, - comp: comparator, + head: Node::create_head(arena.clone()), + tail: Node::create_tail(), + cmp: comparator, arena, } } - pub fn insert(&mut self, key: &Slice) -> Result<()> { - if self.head.is_none() { - self.head = Some(Node::create(key)); - self.level = 1; - self.num = 1; - self.max_level = 1; - return Ok(()) + pub fn insert(&mut self, key: Slice) -> Result<()> { + if self.num == 0 { + self.insert_ele0(key) + } else { + self.insert_elen(key) } - let pre = self.head.unwrap(); - let mut next = true; - while next { - match self.comp.compare(&pre.key, key) { - None => Err(Status::wrapper(LevelError::KInvalidArgument, "key not comparable".into())); - Some(Ordering::Equal) => { + } - }, - Some(Ordering::Less) => { + #[inline] + fn insert_ele0(&mut self, key: Slice) -> Result<()> { + let level = rand_level(); + debug!("insert {}, level: {}", &key, level); + let node = unsafe { Node::create(key, level, self.arena.clone()) }; + // head bind node + // TODO, use macro to expand for-loop + unsafe { + (&mut *node).level = level; + (&mut *self.head).level = level; + for l in 0..level { + (&mut *self.head).set_node(l, node); + (&mut *node).set_node(l, self.tail); + } + } + self.level = level; + self.num = 1; + return Ok(()); + } - }, - Some(Ordering::Greater) => { + fn insert_elen(&mut self, key: Slice) -> Result<()> { + let mut current = self.head; + let level = rand_level(); + debug!("insert {}, level: {}", &key, level); + let node_ptr = unsafe { + Node::create(key, level, self.arena.clone()) + }; + let node = unsafe { &mut *node_ptr }; + // loop from highest level to 0 + for l in (0..self.level).rev() { + 'inner_loop: loop { + let ele_ptr = unsafe { (&*current).get_node(l) }; + let ele = unsafe { &mut *ele_ptr }; + if ele.is_tail() { + if l < level { + // ele is tail node, add node to last + unsafe { + (&mut *current).set_node(l, node_ptr); + node.set_node(l, self.tail); + debug!("bind: {} before: {}, after: , at level: {}", + node.key.as_ref().unwrap(), + (&*current).key.as_ref().unwrap(), + l); + }; + } + break 'inner_loop; + } else { + match self.cmp.compare(node.key.as_ref().unwrap(), ele.key.as_ref().unwrap()) { + Some(Ordering::Less) => { + // node higher than current level at ele + if level > l { + unsafe { + (&mut *current).set_node(l, node_ptr); + node.set_node(l, ele_ptr); + if (&*current).is_head() { + debug!("bind: {} before: , after: {}, at level: {}", + node.key.as_ref().unwrap(), + ele.key.as_ref().unwrap(), + l); + } else { + debug!("bind: {} before: {}, after: {}, at level: {}", + node.key.as_ref().unwrap(), + (&*current).key.as_ref().unwrap(), + ele.key.as_ref().unwrap(), + l); + } + }; + } + break 'inner_loop; + } + Some(Ordering::Greater) => { + current = ele; + } + Some(Ordering::Equal) => { + // ignore equals + return Ok(()); + } + None => { + return Err(Status::wrapper(LevelError::KInvalidArgument, "key not comparable".into())); + } + } + } + } + } + // if head level is less than new node, then fix head node height + if self.level < level { + for l in (self.level()..level).rev() { + unsafe { + (&mut *self.head).set_node(l, node_ptr); + node.set_node(l, self.tail); + }; + } + self.level = level; + } + self.num += 1; + Ok(()) + } + #[macro_use] + pub fn contains(&self, key: &Slice) -> bool { + debug!("================== begin contains, key: {} ==================", key); + if self.num == 0 { + return false; + } + unsafe { + let mut current = unsafe { &*self.head }; + for level in (0..self.level).rev() { + 'a_loop: loop { + let ele_ptr = current.get_node(level); + let ele = &*ele_ptr; + if ele.is_tail() { + // tail node + if level == 0 { + debug!("next is tail, return false"); + return false; + } else { + debug!("next is tail, continue"); + break 'a_loop; + } + } + { + debug!("node: {} at level: {}", ele.key.as_ref().unwrap(), level) + } + match self.cmp.compare(key, ele.key.as_ref().unwrap()) { + None => return false, + Some(Ordering::Equal) => return true, + Some(Ordering::Less) => { + // break loop, decrease the level + break; + } + Some(Ordering::Greater) => { + if current.level() == 0 { + return false; + } + current = ele; + } + }; } } } - todo!() + // can not found in all level + false + } + + #[inline] + pub fn max_height(&self) -> usize { + MAX_LEVEL } - pub fn contains(&self, _key: &Slice) -> bool { - todo!() + #[inline] + pub fn level(&self) -> usize { + self.level } - pub fn get_max_height(&self) -> usize { - todo!() + #[inline] + pub fn len(&self) -> usize { + self.num } - fn rnd_level(&self) -> u8 { + #[inline] + pub fn iter(&self) -> Iter { + Iter::create(&self) + } + + fn rnd_level(&self) -> usize { let mut level = 1; - for _ in 1..self.max_level { + for _ in 1..MAX_LEVEL { if random() { level += 1; } } level } +} + +impl ToString for SkipList { + fn to_string(&self) -> String { + let mut tree = String::with_capacity(1024); + // calculate each item width + let mut widths = Vec::with_capacity(tree.len()); + self.iter().for_each(|s| { + widths.push(s.size()); + }); + // print value list + if self.num > 0 { + unsafe { + let mut node = &*((&*self.head).get_node(0)); + tree.push_str("[head]"); + while !node.is_head_or_tail() { + tree.push_str(" -> "); + tree.push_str(node.key.as_ref().unwrap().as_str()); + let level_str = format!("({})", node.level); + tree.push_str(level_str.as_str()); + node = &*node.get_node(0); + } + } + } + tree.push_str("-> [tail]"); + format!("height: {}, num: {}\n {}", self.level, self.num, tree) + } +} + + +impl Node { + #[inline] + fn create(src: Slice, level: usize, mut arena: ArenaRef) -> RawNode { + let key = src.copy_with_arena(arena.clone()); + let node = box Self { + key: Some(key), + next_elems: allocate_next_elems(arena), + level, + }; + Box::into_raw(node) + } + #[inline] + fn create_head(mut arena: ArenaRef) -> RawNode { + let node = box Self { + key: None, + next_elems: allocate_next_elems(arena), + level: MAX_LEVEL, + }; + Box::into_raw(node) + } + + #[inline] + fn create_tail() -> RawNode { + let node = box Self { + key: None, + next_elems: null_mut(), + level: 0, + }; + Box::into_raw(node) + } + + #[inline] + #[must_use] + fn is_head_or_tail(&self) -> bool { + self.key.is_none() + } + + #[inline] + #[must_use] + fn is_tail(&self) -> bool { + self.key.is_none() && self.level == 0 + } + + #[inline] + #[must_use] + fn is_head(&self) -> bool { + self.key.is_none() && self.level > 0 + } + + + #[inline] + fn level(&self) -> usize { + self.level + } + + #[inline] + #[must_use] + unsafe fn get_node(&self, level: usize) -> RawNode { + assert!(level < MAX_LEVEL); + self.next_elems.offset(level as isize).read() + } + + #[inline] + unsafe fn set_node(&mut self, level: usize, node: RawNode) { + assert!(level < MAX_LEVEL); + self.next_elems.offset(level as isize).write(node); + } } -impl Node { - fn create(key: T) -> Self { +fn rand_level() -> usize { + let mut level = 1_usize; + while random::() { + level += 1; + if level >= MAX_LEVEL { + break; + } + } + level +} + +fn allocate_next_elems(mut arena: ArenaRef) -> *mut RawNode { + // RawNode is a raw ptr + assert_eq!(size_of::(), size_of::()); + // allocate next_elems to 8 capacity array + let elems_size = size_of::() * MAX_LEVEL; + let mut lock = arena.lock().expect("lock arena"); + let elems_ptr = lock.allocate(elems_size); + // transmute raw ptr to RawNode ptr + unsafe { + mem::transmute(elems_ptr.as_ptr()) + } +} + +#[inline] +fn min_max(a: usize, b: usize) -> (usize, usize) { + if a < b { + (a, b) + } else { + (b, a) + } +} + +// 'b lifetime is bigger than 'a +impl<'a, Cmp: ComparatorTrait> Iter<'a, Cmp> { + fn create(list: &'a SkipList) -> Self { Self { - key, - max_level: 1, - next: Vec::with_capacity(4), + list, + node: list.head, + } + } +} + +impl<'a, Cmp: ComparatorTrait> Iterator for Iter<'a, Cmp> { + type Item = &'a Slice; + + #[inline] + fn next(&mut self) -> Option { + unsafe { + if (&*self.node).is_tail() { + return None; + } else { + self.node = (&*self.node).get_node(0); + } + (&*self.node).key.as_ref() } } } \ No newline at end of file diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs new file mode 100644 index 0000000..b697a44 --- /dev/null +++ b/src/db/skip_list_test.rs @@ -0,0 +1,65 @@ + +mod test { + use std::collections::HashSet; + use std::io::{stdout, Write}; + use std::panic; + use std::sync::{Arc, Mutex}; + use rand::Rng; + use crate::util::Result; + use crate::db::DefaultSkipList; + use crate::db::skip_list::SkipList; + use crate::debug; + use crate::util::Arena; + use crate::util::comparator::BytewiseComparatorImpl; + use crate::util::slice::Slice; + + #[test] + fn test_add() -> Result<()> { + let cmp = Arc::new(BytewiseComparatorImpl::default()); + let arena = Arc::new(Mutex::new(Arena::default())); + let mut list = DefaultSkipList::create(cmp, arena); + let len = 10; + for i in 0..len { + list.insert(format!("key_{}", i).into()).expect("insert ok"); + } + assert_eq!(10, list.len(), "expect 10, but actually is: {}", list.len()); + println!("{}", list.to_string()); + for i in 0..len { + let key: Slice = format!("key_{}", i).into(); + println!("contains key: {}", key); + assert!(list.contains(&key), "contains key: {}", key); + } + list.iter().for_each(|slice| { + println!("slice: {}", slice.as_str()) + }); + Ok(()) + } + + #[test] + fn test_rnd_add() -> Result<()> { + panic::set_hook(Box::new(|_panic_info| { + stdout().flush().unwrap(); + })); + let cmp = Arc::new(BytewiseComparatorImpl::default()); + let arena = Arc::new(Mutex::new(Arena::default())); + let mut list = DefaultSkipList::create(cmp, arena); + let len = 10; + let mut rnd = rand::thread_rng(); + let mut set = HashSet::new(); + for i in 0..10 { + let j = rnd.gen_range(0..len); + let key = format!("key_{}", j); + set.insert(key.clone()); + list.insert(key.into())?; + debug!("skiplist: {}", list.to_string()); + } + assert_eq!(set.len(), list.len(), "list length must eq: {}", list.len()); + set.iter().for_each(|key| { + let c = list.contains(&key.clone().into()); + assert!(c, "must contains key: {}", key) + }); + + Ok(()) + } + +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index a915437..b3fbd97 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,8 @@ #![feature(box_syntax)] #![feature(let_else)] +#![feature(generic_associated_types)] + +extern crate core; mod db; mod table; diff --git a/src/util/arena.rs b/src/util/arena.rs index 3f46dcb..2022a51 100644 --- a/src/util/arena.rs +++ b/src/util/arena.rs @@ -1,10 +1,25 @@ +use std::{ptr, slice}; use std::alloc::{alloc, dealloc, Layout}; +use std::cell::Cell; +use std::ops::Deref; use std::ptr::NonNull; -use std::slice; +use std::rc::Rc; +use std::sync::{Arc, Mutex}; +use crate::util::mutex_lock::MutexLock; + +use crate::util::slice::Slice; // Arena block size const ARENA_BLOCK_SIZE: usize = 4096; +pub type ArenaRef = Arc>; + + +/// +pub trait ArenaAllocLike { + fn copy_with_arena(&self, arena: ArenaRef) -> Self; +} + pub struct Arena { alloc_ptr: Option>, alloc_bytes_remaining: usize, @@ -24,7 +39,6 @@ impl Default for Arena { } impl Arena { - /// 申请一块内存 /// /// # Arguments @@ -102,4 +116,16 @@ impl Drop for Arena { } } } +} + +impl ArenaAllocLike for Slice { + fn copy_with_arena(&self, mut arena: ArenaRef) -> Self { + unsafe { + let mut lock_guard = arena.lock().unwrap(); + let dst = lock_guard.allocate(self.len()); + let src = &**self; + dst.copy_from_slice(src); + Slice::from_raw_parts(dst.as_mut_ptr(), self.len()) + } + } } \ No newline at end of file diff --git a/src/util/const.rs b/src/util/const.rs index a8ffcf8..4ae2368 100644 --- a/src/util/const.rs +++ b/src/util/const.rs @@ -5,3 +5,5 @@ pub const COLON_WHITE_SPACE: &'static str = ": "; /// hash 的默认seed: 0xbc9f1d34 pub const HASH_DEFAULT_SEED: u32 = 0xbc9f1d34; + +pub const DEBUG_ENABLE: bool = true; diff --git a/src/util/debug.rs b/src/util/debug.rs new file mode 100644 index 0000000..9645f5f --- /dev/null +++ b/src/util/debug.rs @@ -0,0 +1,17 @@ +use std::fmt::format; +use std::io::Write; +#[macro_export] +macro_rules! debug { + () => { + if (crate::util::r#const::DEBUG_ENABLE) { + std::io::stdout().write("\n".as_bytes()).unwrap(); + } + }; + ($($arg:tt)*) => {{ + use std::io::Write; + if(crate::util::r#const::DEBUG_ENABLE) { + std::io::stdout().write(format!($($arg)*).as_bytes()); + debug!(); + } + }}; +} diff --git a/src/util/mod.rs b/src/util/mod.rs index b3e53d7..8f7293c 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -34,6 +34,7 @@ mod hash_test; mod mutex_lock; mod mutex_lock_test; pub mod options; +pub mod debug; /// 定义别名 -pub type Result = result::Result; +pub type Result = result::Result; \ No newline at end of file diff --git a/src/util/slice.rs b/src/util/slice.rs index 26ea8b1..7a14a22 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -1,6 +1,8 @@ use std::mem; use std::borrow::Cow; use std::cmp::Ordering; +use std::fmt::{Display, Formatter}; +use std::mem::ManuallyDrop; use std::ops::Deref; #[derive(Debug)] @@ -40,6 +42,12 @@ impl Slice { } } + #[inline] + pub unsafe fn from_raw_parts(ptr: *mut u8, len: usize) -> Self { + let data = Vec::from_raw_parts(ptr, len, len); + Self { data } + } + /// 获取 slice 长度 #[inline] pub fn size(&self) -> usize { @@ -96,6 +104,10 @@ impl Slice { } } + pub fn as_str(&self) -> &str { + let s = self.as_ref(); + std::str::from_utf8(s).unwrap() + } } impl<'a> Slice { @@ -195,3 +207,16 @@ impl Deref for Slice { } } +impl Display for Slice { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + unsafe { + let string = ManuallyDrop::new( + String::from_raw_parts( + self.as_ptr() as *mut u8, + self.data.len(), + self.data.capacity()) + ); + f.write_str(string.as_str()) + } + } +} diff --git a/src/util/status.rs b/src/util/status.rs index 8f5365d..afd4040 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -2,7 +2,7 @@ use std::fmt::{Display, Formatter}; use std::io; use crate::util::r#const::COLON_WHITE_SPACE; use crate::util::slice::Slice; -use crate::util::status::LevelError::{KCorruption, KIOError, KInvalidArgument, KNotSupported, KNotFound, KOk, KBadRecord}; +use crate::util::status::LevelError::{KCorruption, KIOError, KInvalidArgument, KNotSupported, KNotFound, KOk, KBadRecord, KRepeatedRecord}; /// db 中的返回状态,将错误号和错误信息封装成Status类,统一进行处理。 /// 在 leveldb的实现里, 为了节省空间Status将返回码(code), 错误信息message及长度打包存储于一个字符串数组中, 来存储错误信息。 @@ -134,6 +134,7 @@ impl Status { KInvalidArgument => "Invalid argument: ", KIOError => "IO error: ", KBadRecord=> "wal bad record", + KRepeatedRecord => "repeated record" }; if self.err.is_ok() { @@ -180,6 +181,7 @@ pub enum LevelError { KInvalidArgument, KIOError, KBadRecord, + KRepeatedRecord, } impl LevelError { @@ -225,6 +227,10 @@ impl LevelError { } } + pub fn is_repeated_record(&self) -> bool { + matches!(self, KRepeatedRecord) + } + pub fn ok() -> Status { Status{ err: Default::default(), @@ -282,6 +288,14 @@ impl LevelError { } } + #[inline] + pub fn repeated_record(msg: Slice) -> Status { + Status { + err: KRepeatedRecord, + msg + } + } + /// 生成 LevelError.KIOError /// /// # Arguments @@ -339,6 +353,7 @@ impl TryFrom for LevelError { 4 => Ok(KInvalidArgument), 5 => Ok(KIOError), 6 => Ok(KBadRecord), + 7 => Ok(KRepeatedRecord), // all other numbers _ => Err(String::from(format!("Unknown code: {}", value))) } @@ -363,6 +378,7 @@ impl Display for LevelError { KInvalidArgument => "Invalid argument: ", KIOError => "IO error: ", KBadRecord => "wal bad record: ", + KRepeatedRecord => "repeated record: ", }; print.push_str(msg_type); -- Gitee From f8a504a0156f5631e4c34b8e97656c4817405f26 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 16:50:32 +0800 Subject: [PATCH 08/50] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=BF=91=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index c31a5a8..d52d7fc 100644 --- a/README.md +++ b/README.md @@ -67,30 +67,29 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi ### 1.1.0 1.1.0 版本, 完成基础零部件 -| 功能模块 | 完成人 | 进度 | -|----------------------------------------------------------------------------------|--------------|-----| -| util.Options(ReadOptions, WriteOptions) | kazeseiriou | | -| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | | -| util.Logger | peach | | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy | | -| FilterBlock, FilterBlockReader | colagy | | -| table.format(Footer, BlockHandle) | fengyang、半支烟 | | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang、半支烟 | | -| db.SkipList | wangboo | | -| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | -| IteratorWrapper | kazeseiriou | | -| db.MemTable(MemTable, MemTableIterator) | wangboo | | -| SSTable | fengyang | | -| table.Table | peach | | -| db.leveldb_util | wangboo | | -| db.log_format | wangboo | | -| db.LogReader | wangboo | 90% | -| db.LogWriter | wangboo | 90% | -| db.TableCache | colagy | | -| LinkedList | fengyang | | -| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | | -| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | | -| WriteBatch | peach | | +| 功能模块 | 完成人 | 进度 | +|----------------------------------------------------------------------------------|-------------|-----| +| util.Options(ReadOptions, WriteOptions) | kazeseiriou | | +| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | | +| util.Logger | peach | | +| table.Block, BlockBuilder, FilterBlockBuilder | colagy | | +| FilterBlock, FilterBlockReader | colagy | | +| table.format(Footer, BlockHandle) | fengyang | | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang | | +| db.SkipList | wangboo | 80% | +| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | +| IteratorWrapper | kazeseiriou | | +| db.MemTable(MemTable, MemTableIterator) | wangboo | 20% | +| SSTable | fengyang | | +| table.Table | peach | | +| db.leveldb_util | hui | | +| db.log_format | hui | | +| db.LogReader | wangboo | 90% | +| db.LogWriter | wangboo | 90% | +| db.TableCache | colagy | | +| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | | +| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | | +| WriteBatch | peach | | #### 1.1.0 计划 * 完成gitee -> github (同步) 主仓库gitee -- Gitee From 1db0982f164e5d993b89686e9ebdfc85883c577e Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 16:59:58 +0800 Subject: [PATCH 09/50] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=AE=98=E7=BD=91?= =?UTF-8?q?=E7=9A=84=E4=BB=BB=E5=8A=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index df24fec..0b67a43 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 10% | | db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 10% | | WriteBatch | tzcyujunyong | | +| | 半支烟 | | #### 1.1.0 计划 * 完成gitee -> github (同步) 主仓库gitee -- Gitee From 2d56fa3b05feb200a59e074d61ccdd180b481063 Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 16 Mar 2023 17:31:46 +0800 Subject: [PATCH 10/50] md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d490375..45475df 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,8 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | util.Logger | peach | | | table.Block, BlockBuilder, FilterBlockBuilder | colagy | | | FilterBlock, FilterBlockReader | colagy | | -| table.format(Footer, BlockHandle) | fengyang、半支烟 | 20% | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang、半支烟 | 20% | +| table.format(Footer, BlockHandle) | 半支烟 | 20% | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | | db.SkipList | wangboo | | | table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | | IteratorWrapper | kazeseiriou | | -- Gitee From 2185cb5f2a5ab98cbb491941fa6b9ed495621f78 Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 16 Mar 2023 17:35:31 +0800 Subject: [PATCH 11/50] md --- src/util/linked_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/linked_list.rs b/src/util/linked_list.rs index 3cab807..8ffb8e6 100644 --- a/src/util/linked_list.rs +++ b/src/util/linked_list.rs @@ -208,6 +208,7 @@ pub trait LinkedListBuilder: Default { // public boolean offerFirst(E e) 头部插入元素,返回是否成功,成功为 true,失败为 false。 // public boolean offerLast(E e) 尾部插入元素,返回是否成功,成功为 true,失败为 false。 + // public E poll() 删除并返回第一个元素。 // public E pollFirst() 检索并删除此列表的第一个元素,如果此列表为空,则返回 null 。 // public E pollLast() 检索并删除此列表的最后一个元素,如果此列表为空,则返回 null 。 -- Gitee From b4c521cc129290ee4a06ad1413558c6faa5d5069 Mon Sep 17 00:00:00 2001 From: colagy Date: Thu, 16 Mar 2023 09:38:13 +0000 Subject: [PATCH 12/50] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=BF=9B=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: colagy --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ce44d5f..2b5cf6c 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | Histgram | kazeseiriou | 100% | | loging | | | | MutexLock | kazeseiriou | 100% | -| Random | colagy | | +| Random | colagy | 100% | | Status | fengyang | 100% | | Slice | wangboo | 100% | @@ -74,8 +74,8 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | util.Logger | peach | | | table.Block, BlockBuilder, FilterBlockBuilder | colagy | | | FilterBlock, FilterBlockReader | colagy | | -| table.format(Footer, BlockHandle) | 半支烟 | 20% | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | +| table.format(Footer, BlockHandle) | fengyang、半支烟 | 20% | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang、半支烟 | 20% | | db.SkipList | wangboo | 80% | | table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | | IteratorWrapper | kazeseiriou | | @@ -86,7 +86,7 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | db.log_format | wangboo | | | db.LogReader | wangboo | 90% | | db.LogWriter | wangboo | 90% | -| db.TableCache | colagy | | +| db.TableCache | colagy | 10% | | LinkedList | fengyang | 60% | | db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 10% | | db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 10% | -- Gitee From bd6cdc7b05ffb2f4c732b752a7c063efa5779cfb Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 16 Mar 2023 17:41:41 +0800 Subject: [PATCH 13/50] md --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2b5cf6c..44a8c46 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,8 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | util.Logger | peach | | | table.Block, BlockBuilder, FilterBlockBuilder | colagy | | | FilterBlock, FilterBlockReader | colagy | | -| table.format(Footer, BlockHandle) | fengyang、半支烟 | 20% | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | fengyang、半支烟 | 20% | +| table.format(Footer, BlockHandle) | 半支烟 | 20% | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | | db.SkipList | wangboo | 80% | | table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | | IteratorWrapper | kazeseiriou | | @@ -93,6 +93,10 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | WriteBatch | tzcyujunyong | | | | 半支烟 | | + + + + #### 1.1.0 计划 * 完成gitee -> github (同步) 主仓库gitee * 官网交给辉哥, 展示一些要做的内容和产品信息。 2月完成。 -- Gitee From c20740cf95e4a8ee14846a971455a0e4fa50f6ff Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 18:27:57 +0800 Subject: [PATCH 14/50] =?UTF-8?q?=E4=BF=AE=E6=94=B9skiplist=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E6=A1=88=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- src/db/skip_list_test.rs | 9 +++------ src/util/comparator.rs | 1 - src/util/mod.rs | 2 +- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 07c70b6..806faa6 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | Slice | wangboo | 100% | 2. 1.1.0 版本, 完成基础零部件 - + | 功能模块 | 完成人 | 进度 | |------------------|---------|-----| | skiplist | 未认领 | | diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs index b697a44..93633a6 100644 --- a/src/db/skip_list_test.rs +++ b/src/db/skip_list_test.rs @@ -23,23 +23,20 @@ mod test { list.insert(format!("key_{}", i).into()).expect("insert ok"); } assert_eq!(10, list.len(), "expect 10, but actually is: {}", list.len()); - println!("{}", list.to_string()); + debug!("{}", list.to_string()); for i in 0..len { let key: Slice = format!("key_{}", i).into(); - println!("contains key: {}", key); + debug!("contains key: {}", key); assert!(list.contains(&key), "contains key: {}", key); } list.iter().for_each(|slice| { - println!("slice: {}", slice.as_str()) + debug!("slice: {}", slice.as_str()) }); Ok(()) } #[test] fn test_rnd_add() -> Result<()> { - panic::set_hook(Box::new(|_panic_info| { - stdout().flush().unwrap(); - })); let cmp = Arc::new(BytewiseComparatorImpl::default()); let arena = Arc::new(Mutex::new(Arena::default())); let mut list = DefaultSkipList::create(cmp, arena); diff --git a/src/util/comparator.rs b/src/util/comparator.rs index 71560fe..0417c10 100644 --- a/src/util/comparator.rs +++ b/src/util/comparator.rs @@ -62,7 +62,6 @@ impl ComparatorTrait for BytewiseComparatorImpl { } let shortest_separator: &[u8] = &start_char_vec[0..diff_index+1]; - let shortest_separator_val: String= Slice::from_buf(shortest_separator).into(); shortest_separator_val } diff --git a/src/util/mod.rs b/src/util/mod.rs index 8f7293c..652da5d 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -37,4 +37,4 @@ pub mod options; pub mod debug; /// 定义别名 -pub type Result = result::Result; \ No newline at end of file +pub type Result = result::Result; -- Gitee From 66bb7c96da841b5d4a8d7a7328de8cb207b29f26 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 18:42:19 +0800 Subject: [PATCH 15/50] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dslice=20partial=5Fcmp?= =?UTF-8?q?=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/util/comparator_test.rs | 7 ++----- src/util/slice.rs | 32 ++++++++++++++------------------ 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/util/comparator_test.rs b/src/util/comparator_test.rs index e1ad1b6..d6d311b 100644 --- a/src/util/comparator_test.rs +++ b/src/util/comparator_test.rs @@ -20,12 +20,9 @@ mod test { let option_val = comp.compare(&Slice::from("a"), &Slice::from("ab")); assert_eq!(option_val.unwrap(), Ordering::Less); - // // todo Slice 存在 bug 未修复 - // let comp = BytewiseComparatorImpl::default(); - // let option_val = comp.compare(&Slice::from("b"), &Slice::from("abcd")); - // assert_eq!(option_val.unwrap(), Ordering::Greater); + let option_val = comp.compare(&Slice::from("b"), &Slice::from("abcd")); + assert_eq!(option_val.unwrap(), Ordering::Greater); - let comp = BytewiseComparatorImpl::default(); let option_val = comp.compare(&Slice::from("abcd"), &Slice::from("abcd")); assert_eq!(option_val.unwrap(), Ordering::Equal); } diff --git a/src/util/slice.rs b/src/util/slice.rs index 26ea8b1..6416db3 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -152,24 +152,20 @@ impl PartialEq for Slice { impl PartialOrd for Slice { /// 判断两个 slice 的大小关系 fn partial_cmp(&self, other: &Self) -> Option { - match self.size().partial_cmp(&other.size()) { - Some(Ordering::Equal) => { - let cmp = unsafe { - memcmp( - self.data.as_ptr() as *const i8, - other.data.as_ptr() as *const i8, - self.size(), - ) - }; - if cmp == 0 { - Some(Ordering::Equal) - } else if cmp > 0 { - Some(Ordering::Greater) - } else { - Some(Ordering::Less) - } - } - op => op + let min = self.size().min(other.size()); + let cmp = unsafe { + memcmp( + self.data.as_ptr() as *const i8, + other.data.as_ptr() as *const i8, + min, + ) + }; + if cmp == 0 { + self.size().partial_cmp(&other.size()) + } else if cmp > 0 { + Some(Ordering::Greater) + } else { + Some(Ordering::Less) } } } -- Gitee From 1e9ca27319f8207f12ee3ef02b837721cf9de4f8 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 18:43:28 +0800 Subject: [PATCH 16/50] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E8=AD=A6=E5=91=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/util/slice_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/slice_test.rs b/src/util/slice_test.rs index a2e0ec5..9baf1d1 100644 --- a/src/util/slice_test.rs +++ b/src/util/slice_test.rs @@ -88,7 +88,7 @@ mod test { #[test] fn test_merge2() { let mut a0 = Slice::from("123"); - let mut a2 = Slice::from("456"); + let a2 = Slice::from("456"); a0.merge(a2, None); assert_eq!(String::from("123456"), String::from(a0)); } -- Gitee From e0c0d16b6ad41c0b7e153675cbe8f4bf5d34bc43 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 20:22:15 +0800 Subject: [PATCH 17/50] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=BC=96=E8=AF=91?= =?UTF-8?q?=E9=80=89=E9=A1=B9=EF=BC=8C=E8=A7=A3=E5=86=B3DEBUG=E5=AE=8F?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cargo/config.toml | 5 +++++ src/util/debug.rs | 11 +++++++++++ 2 files changed, 16 insertions(+) create mode 100644 .cargo/config.toml diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..c32ea9f --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,5 @@ +[target.dev] +rustc-cfg=['DEBUG="true"'] + +[target.release] +rustc-cfg=['DEBUG="false"'] \ No newline at end of file diff --git a/src/util/debug.rs b/src/util/debug.rs index 9645f5f..dcd2777 100644 --- a/src/util/debug.rs +++ b/src/util/debug.rs @@ -1,5 +1,7 @@ use std::fmt::format; use std::io::Write; + +#[cfg(DEBUG = "true")] #[macro_export] macro_rules! debug { () => { @@ -15,3 +17,12 @@ macro_rules! debug { } }}; } + +#[cfg(not(DEBUG = "true"))] +#[macro_export] +macro_rules! debug { + () => { + }; + ($($arg:tt)*) => {{ + }}; +} \ No newline at end of file -- Gitee From 94473e076d1d7d8d709aaef584ff72f007dd96f9 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 21:04:06 +0800 Subject: [PATCH 18/50] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=B0=83=E8=AF=95?= =?UTF-8?q?=E6=97=B6=E7=9A=84=E7=BC=96=E8=AF=91=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cargo/config.toml | 7 ++----- Cargo.toml | 7 +++++++ README.md | 9 ++++++++- src/util/debug.rs | 16 +++++++--------- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index c32ea9f..0d0b4da 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,5 +1,2 @@ -[target.dev] -rustc-cfg=['DEBUG="true"'] - -[target.release] -rustc-cfg=['DEBUG="false"'] \ No newline at end of file +[build] +rustflags='--cfg CORE_DEBUG="true"' \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index c05ad50..b2f54ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,11 @@ edition = "2021" name = "level_db_rust" path = "src/lib.rs" +[features] +default = ["debug-macro"] +release=[] + +debug-macro=[] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] @@ -14,4 +19,6 @@ rand = "0.8.5" [profile.dev] + [profile.release] + diff --git a/README.md b/README.md index 806faa6..d296d60 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,14 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi #### 使用说明 -1. xxxx +1. 编译参数 + CORE_DEBUG 默认开启,打印调试信息 + +在构建正式版本时,用户可以用 RUSTFLAGS 环境变量覆盖以上编译参数。 +eg: +```bash +RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo test db::skip_list_test::test::test_add +``` #### 参与贡献 diff --git a/src/util/debug.rs b/src/util/debug.rs index dcd2777..c32caf1 100644 --- a/src/util/debug.rs +++ b/src/util/debug.rs @@ -1,24 +1,22 @@ use std::fmt::format; use std::io::Write; -#[cfg(DEBUG = "true")] +// #[cfg(feature = "debug-macro")] +#[cfg(CORE_DEBUG = "true")] #[macro_export] macro_rules! debug { () => { - if (crate::util::r#const::DEBUG_ENABLE) { - std::io::stdout().write("\n".as_bytes()).unwrap(); - } + std::io::stdout().write("\n".as_bytes()).unwrap(); }; ($($arg:tt)*) => {{ use std::io::Write; - if(crate::util::r#const::DEBUG_ENABLE) { - std::io::stdout().write(format!($($arg)*).as_bytes()); - debug!(); - } + std::io::stdout().write(format!($($arg)*).as_bytes()); + debug!(); }}; } -#[cfg(not(DEBUG = "true"))] +// #[cfg(not(feature = "debug-macro"))] +#[cfg(not(CORE_DEBUG = "true"))] #[macro_export] macro_rules! debug { () => { -- Gitee From e25dfd56a23aa0774f938214570403e6ba3bec6d Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 21:04:58 +0800 Subject: [PATCH 19/50] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=B0=83=E8=AF=95?= =?UTF-8?q?=E6=97=B6=E7=9A=84=E7=BC=96=E8=AF=91=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d296d60..f3c18ee 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi 在构建正式版本时,用户可以用 RUSTFLAGS 环境变量覆盖以上编译参数。 eg: ```bash -RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo test db::skip_list_test::test::test_add +RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release ``` #### 参与贡献 -- Gitee From 80c4d8ee8f4d4246d7bf7d88ea256109c127af12 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 21:53:47 +0800 Subject: [PATCH 20/50] =?UTF-8?q?=E5=AE=8C=E6=88=90skiplist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/skip_list.rs | 84 +++++++++++++++++++--------------------- src/db/skip_list_test.rs | 9 ++--- src/util/arena.rs | 6 +-- src/util/mod.rs | 2 - 4 files changed, 44 insertions(+), 57 deletions(-) diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 628f87c..40436a7 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -1,19 +1,14 @@ -use std::cell::{Cell, RefCell}; -use std::cmp::{max, min, Ordering}; -use std::fmt::format; +use std::cmp::Ordering; use std::mem; -use std::mem::{ManuallyDrop, size_of}; -use std::ops::{Deref, DerefMut}; -use std::ptr::{NonNull, null_mut}; +use std::mem::size_of; +use std::ptr::null_mut; use std::sync::Arc; use rand::prelude::*; use crate::debug; use crate::traits::comparator_trait::ComparatorTrait; -use crate::util::Arena; use crate::util::arena::{ArenaAllocLike, ArenaRef}; -use crate::util::comparator::BytewiseComparatorImpl; use crate::util::Result; use crate::util::slice::Slice; use crate::util::status::{LevelError, Status}; @@ -34,7 +29,7 @@ struct Node { pub struct SkipList { /// 最高层数 - level: usize, + height: usize, /// 存储数据数量 num: usize, /// 头部指针 @@ -55,7 +50,7 @@ pub struct Iter<'a, Cmp: ComparatorTrait> { impl SkipList { pub fn create(comparator: Arc, arena: ArenaRef) -> Self { Self { - level: 0, + height: 0, num: 0, head: Node::create_head(arena.clone()), tail: Node::create_tail(), @@ -65,10 +60,16 @@ impl SkipList { } pub fn insert(&mut self, key: Slice) -> Result<()> { + // TODO 这里是否可以优化 + if self.contains(&key) { + return Ok(()); + } if self.num == 0 { self.insert_ele0(key) } else { - self.insert_elen(key) + unsafe { + self.insert_elen(key) + } } } @@ -87,58 +88,55 @@ impl SkipList { (&mut *node).set_node(l, self.tail); } } - self.level = level; + self.height = level; self.num = 1; return Ok(()); } - fn insert_elen(&mut self, key: Slice) -> Result<()> { + unsafe fn insert_elen(&mut self, key: Slice) -> Result<()> { let mut current = self.head; - let level = rand_level(); - debug!("insert {}, level: {}", &key, level); + let node_height = rand_level(); + let node_top_level = node_height - 1; + debug!("insert {}, level: {}", &key, node_height); let node_ptr = unsafe { - Node::create(key, level, self.arena.clone()) + Node::create(key, node_height, self.arena.clone()) }; let node = unsafe { &mut *node_ptr }; // loop from highest level to 0 - for l in (0..self.level).rev() { + for l in (0..self.height).rev() { 'inner_loop: loop { let ele_ptr = unsafe { (&*current).get_node(l) }; let ele = unsafe { &mut *ele_ptr }; if ele.is_tail() { - if l < level { + if l <= node_top_level { // ele is tail node, add node to last - unsafe { - (&mut *current).set_node(l, node_ptr); - node.set_node(l, self.tail); - debug!("bind: {} before: {}, after: , at level: {}", + (&mut *current).set_node(l, node_ptr); + node.set_node(l, self.tail); + debug!("bind: {} before: {}, after: , at level: {}", node.key.as_ref().unwrap(), (&*current).key.as_ref().unwrap(), l); - }; } break 'inner_loop; } else { match self.cmp.compare(node.key.as_ref().unwrap(), ele.key.as_ref().unwrap()) { Some(Ordering::Less) => { // node higher than current level at ele - if level > l { - unsafe { - (&mut *current).set_node(l, node_ptr); - node.set_node(l, ele_ptr); - if (&*current).is_head() { - debug!("bind: {} before: , after: {}, at level: {}", + if node_top_level >= l { + (&mut *current).set_node(l, node_ptr); + node.set_node(l, ele_ptr); + if (&*current).is_head() { + debug!("bind: {} before: , after: {}, at level: {}", node.key.as_ref().unwrap(), ele.key.as_ref().unwrap(), l); - } else { - debug!("bind: {} before: {}, after: {}, at level: {}", + } else { + debug!("bind: {} before: {}, after: {}, at level: {}", node.key.as_ref().unwrap(), (&*current).key.as_ref().unwrap(), ele.key.as_ref().unwrap(), l); - } - }; + } } break 'inner_loop; } @@ -157,14 +155,12 @@ impl SkipList { } } // if head level is less than new node, then fix head node height - if self.level < level { - for l in (self.level()..level).rev() { - unsafe { - (&mut *self.head).set_node(l, node_ptr); - node.set_node(l, self.tail); - }; + if self.height < node_height { + for l in (self.height()..node_height).rev() { + (&mut *self.head).set_node(l, node_ptr); + node.set_node(l, self.tail); } - self.level = level; + self.height = node_height; } self.num += 1; Ok(()) @@ -178,7 +174,7 @@ impl SkipList { } unsafe { let mut current = unsafe { &*self.head }; - for level in (0..self.level).rev() { + for level in (0..self.height).rev() { 'a_loop: loop { let ele_ptr = current.get_node(level); let ele = &*ele_ptr; @@ -222,8 +218,8 @@ impl SkipList { } #[inline] - pub fn level(&self) -> usize { - self.level + pub fn height(&self) -> usize { + self.height } #[inline] @@ -270,7 +266,7 @@ impl ToString for SkipList { } } tree.push_str("-> [tail]"); - format!("height: {}, num: {}\n {}", self.level, self.num, tree) + format!("height: {}, num: {}\n {}", self.height, self.num, tree) } } diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs index 93633a6..eb699e9 100644 --- a/src/db/skip_list_test.rs +++ b/src/db/skip_list_test.rs @@ -1,16 +1,14 @@ - mod test { use std::collections::HashSet; - use std::io::{stdout, Write}; - use std::panic; use std::sync::{Arc, Mutex}; + use rand::Rng; - use crate::util::Result; + use crate::db::DefaultSkipList; - use crate::db::skip_list::SkipList; use crate::debug; use crate::util::Arena; use crate::util::comparator::BytewiseComparatorImpl; + use crate::util::Result; use crate::util::slice::Slice; #[test] @@ -58,5 +56,4 @@ mod test { Ok(()) } - } \ No newline at end of file diff --git a/src/util/arena.rs b/src/util/arena.rs index 2022a51..f382625 100644 --- a/src/util/arena.rs +++ b/src/util/arena.rs @@ -1,11 +1,7 @@ -use std::{ptr, slice}; +use std::slice; use std::alloc::{alloc, dealloc, Layout}; -use std::cell::Cell; -use std::ops::Deref; use std::ptr::NonNull; -use std::rc::Rc; use std::sync::{Arc, Mutex}; -use crate::util::mutex_lock::MutexLock; use crate::util::slice::Slice; diff --git a/src/util/mod.rs b/src/util/mod.rs index 652da5d..19c3097 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,9 +1,7 @@ -use std::rc::Rc; use std::result; pub use arena::Arena; -use crate::util::comparator::{BytewiseComparatorImpl, InternalKeyComparator}; use crate::util::status::Status; /// 常量定义 -- Gitee From 7ab46579f8abd8c45c40227dfb601ec7468625eb Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 21:54:51 +0800 Subject: [PATCH 21/50] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E6=97=A0=E7=94=A8=E7=9A=84=20use?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/log_wr_test.rs | 16 ++++++++-------- src/db/mem_table.rs | 2 +- src/db/mod.rs | 2 +- src/db/skip_list.rs | 6 +++--- src/db/skip_list_test.rs | 20 ++++++++++---------- src/traits/mod.rs | 2 +- src/util/arena.rs | 2 +- src/util/arena_test.rs | 2 +- src/util/coding.rs | 16 ++++++++-------- src/util/coding_test.rs | 6 +++--- src/util/comparator_test.rs | 10 +++++----- src/util/crc_test.rs | 4 ++-- src/util/debug.rs | 4 ++-- src/util/filter_policy.rs | 6 +++--- src/util/filter_policy_test.rs | 10 +++++----- src/util/hash.rs | 4 ++-- src/util/hash_test.rs | 8 ++++---- src/util/histogram_test.rs | 2 +- src/util/mutex_lock.rs | 2 +- src/util/mutex_lock_test.rs | 4 ++-- src/util/slice_test.rs | 4 ++-- src/util/status_test.rs | 6 +++--- 22 files changed, 69 insertions(+), 69 deletions(-) diff --git a/src/db/log_wr_test.rs b/src/db/log_wr_test.rs index 98b1648..d333556 100644 --- a/src/db/log_wr_test.rs +++ b/src/db/log_wr_test.rs @@ -1,12 +1,12 @@ mod test { - use std::fs::File; - use crate::db::log_reader::LogReader; - use crate::db::log_writer::LogWriter; - use crate::traits::coding_trait::CodingTrait; - use crate::util::coding::Coding; - use crate::util::crc::{AsCrc, ToMask}; - use crate::util::Result; + + + + + + + use crate::util::slice::Slice; #[test] @@ -28,7 +28,7 @@ mod test { let sample: Vec = ('0'..='9').map(|a|a as u8).collect(); for i in 0..100 { let slice = reader.read_next().expect("not error").expect("must have record"); - let mut expect = generate_slice(i, &sample); + let expect = generate_slice(i, &sample); assert_eq!(expect.len(), slice.len()); assert_eq!(expect.as_ref(), slice.as_ref()) } diff --git a/src/db/mem_table.rs b/src/db/mem_table.rs index 9bc002c..a9dcf71 100644 --- a/src/db/mem_table.rs +++ b/src/db/mem_table.rs @@ -1,7 +1,7 @@ use std::rc::Rc; use crate::traits::comparator_trait::ComparatorTrait; use crate::traits::DataIterator; -use crate::util::comparator::InternalKeyComparator; + use crate::util::slice::Slice; use crate::util::Result; diff --git a/src/db/mod.rs b/src/db/mod.rs index 6ce1187..5b5ff72 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,7 +1,7 @@ use crate::db::mem_table::MemTable; use crate::db::skip_list::SkipList; use crate::util::comparator::{BytewiseComparatorImpl, InternalKeyComparator}; -use crate::util::slice::Slice; + pub mod log_writer; pub mod log_reader; diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 40436a7..ddef242 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -273,7 +273,7 @@ impl ToString for SkipList { impl Node { #[inline] - fn create(src: Slice, level: usize, mut arena: ArenaRef) -> RawNode { + fn create(src: Slice, level: usize, arena: ArenaRef) -> RawNode { let key = src.copy_with_arena(arena.clone()); let node = box Self { key: Some(key), @@ -284,7 +284,7 @@ impl Node { } #[inline] - fn create_head(mut arena: ArenaRef) -> RawNode { + fn create_head(arena: ArenaRef) -> RawNode { let node = box Self { key: None, next_elems: allocate_next_elems(arena), @@ -352,7 +352,7 @@ fn rand_level() -> usize { level } -fn allocate_next_elems(mut arena: ArenaRef) -> *mut RawNode { +fn allocate_next_elems(arena: ArenaRef) -> *mut RawNode { // RawNode is a raw ptr assert_eq!(size_of::(), size_of::()); // allocate next_elems to 8 capacity array diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs index eb699e9..287d18c 100644 --- a/src/db/skip_list_test.rs +++ b/src/db/skip_list_test.rs @@ -1,15 +1,15 @@ mod test { - use std::collections::HashSet; - use std::sync::{Arc, Mutex}; + + - use rand::Rng; + - use crate::db::DefaultSkipList; - use crate::debug; - use crate::util::Arena; - use crate::util::comparator::BytewiseComparatorImpl; - use crate::util::Result; - use crate::util::slice::Slice; + + + + + + #[test] fn test_add() -> Result<()> { @@ -41,7 +41,7 @@ mod test { let len = 10; let mut rnd = rand::thread_rng(); let mut set = HashSet::new(); - for i in 0..10 { + for _i in 0..10 { let j = rnd.gen_range(0..len); let key = format!("key_{}", j); set.insert(key.clone()); diff --git a/src/traits/mod.rs b/src/traits/mod.rs index 47c662e..a1332ef 100644 --- a/src/traits/mod.rs +++ b/src/traits/mod.rs @@ -4,5 +4,5 @@ pub mod comparator_trait; pub mod coding_trait; pub mod filter_policy_trait; -use std::rc::Rc; + pub use iterator::DataIterator; diff --git a/src/util/arena.rs b/src/util/arena.rs index f382625..d239b48 100644 --- a/src/util/arena.rs +++ b/src/util/arena.rs @@ -115,7 +115,7 @@ impl Drop for Arena { } impl ArenaAllocLike for Slice { - fn copy_with_arena(&self, mut arena: ArenaRef) -> Self { + fn copy_with_arena(&self, arena: ArenaRef) -> Self { unsafe { let mut lock_guard = arena.lock().unwrap(); let dst = lock_guard.allocate(self.len()); diff --git a/src/util/arena_test.rs b/src/util/arena_test.rs index dbc8cf1..fc800e1 100644 --- a/src/util/arena_test.rs +++ b/src/util/arena_test.rs @@ -1,4 +1,4 @@ -use crate::util::Arena; + #[test] fn test_memory_usage() { diff --git a/src/util/coding.rs b/src/util/coding.rs index 125c042..f27bc7e 100644 --- a/src/util/coding.rs +++ b/src/util/coding.rs @@ -28,7 +28,7 @@ macro_rules! varint { pub struct Coding {} impl CodingTrait for Coding { - fn put_fixed32(mut dst: &mut String, value: u32) { + fn put_fixed32(dst: &mut String, value: u32) { let mut buf: [u8; 4] = [0, 0, 0, 0]; Self::encode_fixed32(value, &mut buf, 0); for b in buf.iter() { @@ -36,7 +36,7 @@ impl CodingTrait for Coding { } } - fn put_fixed64(mut dst: &mut String, value: u64) { + fn put_fixed64(dst: &mut String, value: u64) { let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; Self::encode_fixed64(value, &mut buf, 0); for b in buf.iter() { @@ -48,7 +48,7 @@ impl CodingTrait for Coding { varint!(u64,encode_varint64); - fn put_varint32(mut dst: &mut String, value: u32) { + fn put_varint32(dst: &mut String, value: u32) { let mut buf: [u8; 4] = [0, 0, 0, 0]; Self::encode_fixed32(value, &mut buf, 0); for b in buf.iter() { @@ -56,7 +56,7 @@ impl CodingTrait for Coding { } } - fn put_varint64(mut dst: &mut String, value: u64) { + fn put_varint64(dst: &mut String, value: u64) { let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; Self::encode_fixed64(value, &mut buf, 0); for b in buf.iter() { @@ -125,7 +125,7 @@ impl CodingTrait for Coding { len } - fn encode_fixed32(mut value: u32, buf: &mut [u8], mut offset: usize) -> usize { + fn encode_fixed32(value: u32, buf: &mut [u8], mut offset: usize) -> usize { buf[offset] = value as u8; offset += 1; buf[offset] = (value >> 8) as u8; @@ -137,7 +137,7 @@ impl CodingTrait for Coding { offset } - fn encode_fixed64(mut value: u64, buf: &mut [u8], mut offset: usize) -> usize { + fn encode_fixed64(value: u64, buf: &mut [u8], mut offset: usize) -> usize { buf[offset] = value as u8; offset += 1; buf[offset] = (value >> 8) as u8; @@ -196,7 +196,7 @@ macro_rules! encoding_impl { /// let value: u32 = 65534; /// let offset = value.varint(&mut buf, 0); /// ``` - fn varint(self, buf: &mut [u8], mut offset: usize) -> usize { + fn varint(self, buf: &mut [u8], offset: usize) -> usize { Coding::$VAR_NAME (self, buf, offset) } /// 定长正整数编码 @@ -215,7 +215,7 @@ macro_rules! encoding_impl { /// let value: u32 = 65534; /// let offset = value.fixedint(&mut buf, 0); /// ``` - fn fixedint(self, buf: &mut [u8], mut offset: usize) -> usize { + fn fixedint(self, buf: &mut [u8], offset: usize) -> usize { Coding::$FIXED_NAME (self, buf, offset) } } diff --git a/src/util/coding_test.rs b/src/util/coding_test.rs index 69ed744..fd56327 100644 --- a/src/util/coding_test.rs +++ b/src/util/coding_test.rs @@ -1,7 +1,7 @@ mod test { - use crate::traits::coding_trait::{Coding32, Coding64, CodingTrait}; - use crate::util::slice::Slice; - use crate::util::coding::{Coding}; + + + #[test] fn test_put_fixed32() { diff --git a/src/util/comparator_test.rs b/src/util/comparator_test.rs index fcfb034..234ccb8 100644 --- a/src/util/comparator_test.rs +++ b/src/util/comparator_test.rs @@ -1,10 +1,10 @@ mod test { - use std::cmp::Ordering; - use std::io::Write; - use crate::traits::comparator_trait::ComparatorTrait; - use crate::util::comparator::{BytewiseComparatorImpl, InternalKeyComparator}; - use crate::util::slice::Slice; + + + + + #[test] fn test_bytewise_comparator_impl_get_name() { diff --git a/src/util/crc_test.rs b/src/util/crc_test.rs index c67c0db..e304450 100644 --- a/src/util/crc_test.rs +++ b/src/util/crc_test.rs @@ -1,5 +1,5 @@ -use crate::util::crc::{AsCrc, CRC, ToMask}; -use crate::util::slice::Slice; + + #[test] fn test_crc() { diff --git a/src/util/debug.rs b/src/util/debug.rs index c32caf1..464919b 100644 --- a/src/util/debug.rs +++ b/src/util/debug.rs @@ -1,5 +1,5 @@ -use std::fmt::format; -use std::io::Write; + + // #[cfg(feature = "debug-macro")] #[cfg(CORE_DEBUG = "true")] diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 5a87f2e..8ddd651 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -1,6 +1,6 @@ use std::ops::Mul; use crate::traits::filter_policy_trait::{FilterPolicy}; -use crate::util::hash::{Hash, ToHash}; +use crate::util::hash::{ToHash}; use crate::util::slice::Slice; pub struct BloomFilterPolicy { @@ -43,7 +43,7 @@ impl FilterPolicy for BloomFilterPolicy { String::from("leveldb.BuiltinBloomFilter2") } - fn create_filter(&self, keys: Slice, n: u32, dst: String) -> String { + fn create_filter(&self, _keys: Slice, _n: u32, _dst: String) -> String { // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, // 并把根据这些key创建的filter追加到 dst中。 @@ -51,7 +51,7 @@ impl FilterPolicy for BloomFilterPolicy { todo!() } - fn key_may_match(key: &Slice, filter: &Slice) -> bool { + fn key_may_match(_key: &Slice, _filter: &Slice) -> bool { todo!() } } \ No newline at end of file diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index b49448f..c5175de 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -1,14 +1,14 @@ -use std::ptr::null; -use crate::util::bloom_filter; -use crate::util::filter_policy::BloomFilterPolicy; + + + #[test] fn test_new() { - let bloom_filter = BloomFilterPolicy::new(8); + let _bloom_filter = BloomFilterPolicy::new(8); println!("hash:{}", "a"); // assert_eq!(bloom_filter, null()); - let bloom_filter = BloomFilterPolicy::new(800); + let _bloom_filter = BloomFilterPolicy::new(800); println!("hash:{}", "a"); } \ No newline at end of file diff --git a/src/util/hash.rs b/src/util/hash.rs index c9fce80..3d7f257 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -4,9 +4,9 @@ use std::slice as stds; use crate::traits::coding_trait::CodingTrait; use crate::util::coding::Coding; -use crate::util::crc::AsCrc; + use crate::util::r#const::HASH_DEFAULT_SEED; -use crate::util::slice; + use crate::util::slice::Slice; /// 一种可以计算 hash 的特质 diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs index 81dce89..e0961d0 100644 --- a/src/util/hash_test.rs +++ b/src/util/hash_test.rs @@ -1,7 +1,7 @@ -use crate::util::hash::{Hash, ToHash}; -use crate::util::r#const::HASH_DEFAULT_SEED; -use crate::util::slice::Slice; -use std::slice; + + + + #[test] fn test_hash() { diff --git a/src/util/histogram_test.rs b/src/util/histogram_test.rs index a4f01e7..74e9033 100644 --- a/src/util/histogram_test.rs +++ b/src/util/histogram_test.rs @@ -1,6 +1,6 @@ mod test{ - use crate::util::histogram::Histogram; + #[test] fn test_add() { diff --git a/src/util/mutex_lock.rs b/src/util/mutex_lock.rs index 84e6d65..c6c4d77 100644 --- a/src/util/mutex_lock.rs +++ b/src/util/mutex_lock.rs @@ -1,4 +1,4 @@ -use std::ops::Deref; + use std::sync::{Arc, LockResult, Mutex, MutexGuard, TryLockResult}; pub struct Lock { diff --git a/src/util/mutex_lock_test.rs b/src/util/mutex_lock_test.rs index 7273a71..548871f 100644 --- a/src/util/mutex_lock_test.rs +++ b/src/util/mutex_lock_test.rs @@ -1,7 +1,7 @@ mod test { - use std::thread; + - use crate::util::mutex_lock::MutexLock; + #[test] fn test() { diff --git a/src/util/slice_test.rs b/src/util/slice_test.rs index a2e0ec5..75ae7a7 100644 --- a/src/util/slice_test.rs +++ b/src/util/slice_test.rs @@ -1,5 +1,5 @@ mod test { - use std::cmp::Ordering; + use crate::util::slice::Slice; #[test] @@ -88,7 +88,7 @@ mod test { #[test] fn test_merge2() { let mut a0 = Slice::from("123"); - let mut a2 = Slice::from("456"); + let a2 = Slice::from("456"); a0.merge(a2, None); assert_eq!(String::from("123456"), String::from(a0)); } diff --git a/src/util/status_test.rs b/src/util/status_test.rs index 2ac1030..90f66ec 100644 --- a/src/util/status_test.rs +++ b/src/util/status_test.rs @@ -1,8 +1,8 @@ mod test { - use crate::util::r#const::COLON_WHITE_SPACE; - use crate::util::slice::Slice; - use crate::util::status::{LevelError, Status}; + + + #[test] fn test_wraper() { -- Gitee From 66b705c1e99cec012e7d9510f0ee4b497e585eef Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 16 Mar 2023 22:16:21 +0800 Subject: [PATCH 22/50] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 1.bin | Bin 5750 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 1.bin diff --git a/1.bin b/1.bin deleted file mode 100644 index ae41623ec551be8d83267e117d57ff87f5865988..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5750 zcmb2_U-OQUfze=o)!Py#AZ@twho(0(h+z~I7|6l`W*HX=FwAC!Fin;;Zs1{qvQ6Kg z`!2~2*t2nW}LI$r2b0(oxb7b)=v9OX8Pmv>_#&Tst&0BdwiVU0; z84VN&sxtIA_4|k-VP!_QnY;s)h)`$T_)%xGGBFBGHt=m>S0PHJ>GUP;e5%ALHMe6}uWT1*bdckR?8QIDzI)J01Aq-rwzej~rYfMi|f*VD9h4N2E#VZw5* z!-x!hmQ!x3{x>E^qrpX!<#SEQ(rNhT)QoGUwv-uRATIHu z$BsfX4A)OMZEa7fAw~+KtkWDQHpTdu2-|K)%8fBuw?(PYi3;YJ+AS&+a;AnsW^HF) zd%IA@By$h7d(E!YG0LL$*Of(XR5HtwH6i`3JGBfmShsV(st46fGyIbhpzcXMq?rQV)HKkf{F0-M4^>Sx{a7p2=u2HA&9)u))b^vYndV^%b$0&LHq;_M zG)Xal>ZV#=UADC^kov|NB;5WmGl&M}8m^X-l?xKXS~I_bRCZ<8k%pp>U#f%Xj)oe zkYudI7DH1T4DS?{%Er>x3M2J9!8vg>w!@gYMIj=d)|Qyu_W0A7KyzD6Q<56g5@~OZ g*{OY7ijwGHkGa Date: Fri, 17 Mar 2023 16:17:57 +0800 Subject: [PATCH 23/50] =?UTF-8?q?=E5=90=88=E5=B9=B6=E5=86=B2=E7=AA=81?= =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=8A=A0skiplist/memtable=E7=9B=B8=E5=85=B3?= =?UTF-8?q?=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/db_format.rs | 79 +++++++++++++++++++++------------- src/db/log_wr_test.rs | 14 +++--- src/db/mem_table.rs | 61 +++++++++++++++++--------- src/db/mod.rs | 8 +--- src/db/skip_list.rs | 23 +++++----- src/db/skip_list_test.rs | 20 ++++----- src/traits/coding_trait.rs | 2 +- src/util/arena_test.rs | 2 +- src/util/coding.rs | 55 +++-------------------- src/util/coding_test.rs | 8 +--- src/util/comparator_test.rs | 8 ---- src/util/crc_test.rs | 4 +- src/util/filter_policy.rs | 20 +++------ src/util/filter_policy_test.rs | 15 ------- src/util/hash_test.rs | 7 ++- src/util/histogram_test.rs | 2 +- src/util/mutex_lock_test.rs | 5 +-- src/util/slice.rs | 7 +++ src/util/slice_test.rs | 2 +- src/util/status.rs | 7 +++ src/util/status_test.rs | 6 +-- 21 files changed, 159 insertions(+), 196 deletions(-) diff --git a/src/db/db_format.rs b/src/db/db_format.rs index 891b906..49e7194 100644 --- a/src/db/db_format.rs +++ b/src/db/db_format.rs @@ -1,16 +1,18 @@ use std::cmp::Ordering; -use std::ops::Deref; -use crate::db::db_format::ValueType::{K_TYPE_DELETION, K_TYPE_VALUE}; +use std::io::Write; +use crate::db::db_format::ValueType::{KTypeDeletion, KTypeValue}; use crate::db::file_meta_data::FileMetaData; +use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; +use crate::util::coding::Coding; use crate::util::slice::Slice; pub enum ValueType { /// 0x0 - K_TYPE_DELETION, + KTypeDeletion, /// 0x1 - K_TYPE_VALUE, + KTypeValue, } pub struct ParsedInternalKey { @@ -32,27 +34,17 @@ pub struct InternalKeyComparator { /// 查找键 // todo add clone trait pub struct LookupKey { - // We construct a char array of the form: - // klength varint32 <-- start_ - // userkey char[klength] <-- kstart_ - // tag uint64 - // <-- end_ - // The array is a suitable MemTable key. - // The suffix starting with "userkey" can be used as an InternalKey. - - start_: Slice, - kstart_: Slice, - end_: Slice, - - // Avoid allocation for short keys - space_: [u8; 200], + /// |klength(varint32)|user key(string)|sequence number(7 bytes)|value type(1 byte)| + data: Slice, + /// start index at user key + user_key_start: usize, } impl ValueType { - pub fn get_value(&self) -> i32 { + pub fn get_value(&self) -> usize { let le = match self { - K_TYPE_DELETION => 0, - K_TYPE_VALUE => 1 + KTypeDeletion => 0, + KTypeValue => 1 }; le @@ -79,8 +71,8 @@ impl TryFrom for ValueType { #[inline] fn try_from(value: i32) -> Result { match value { - 0 => Ok(K_TYPE_DELETION), - 1 => Ok(K_TYPE_VALUE), + 0 => Ok(KTypeDeletion), + 1 => Ok(KTypeValue), // all other numbers _ => Err(String::from(format!("Unknown code: {}", value))) } @@ -93,7 +85,7 @@ impl Default for ParsedInternalKey { ParsedInternalKey { user_key: Default::default(), sequence: 0, - value_type: K_TYPE_DELETION, + value_type: KTypeDeletion, } } } @@ -251,20 +243,39 @@ impl Comparator for InternalKeyComparator { impl LookupKey { /// Initialize *this for looking up user_key at a snapshot with /// the specified sequence number. - fn new(user_key: Slice, sequence: u64) -> Self { - // todo - todo!() + fn new(user_key: Slice, sequence: usize) -> Self { + let user_key_size = user_key.size(); + let need = user_key_size + 13; // A conservative estimate + let mut data = Vec::with_capacity(need); + let buf = data.as_mut_slice(); + let klength = Coding::varint_length(user_key_size + 8); + let mut offset = 0; + // write key size + offset = Coding::encode_varint32(klength as u32, buf, offset); + // write key slice + offset += (&mut buf[offset..]).write(user_key.as_ref()).expect("write user_key"); + // write sequence number and value type + Coding::encode_fixed64( + pack_sequence_and_type(sequence, ValueType::KTypeValue), + buf, offset); + + LookupKey { + data: Slice::from_vec(data), + user_key_start: klength + } } /// Return a key suitable for lookup in a MemTable. fn mem_table_key(&self) -> Slice { - todo!() + self.data.clone() } /// Return an internal key (suitable for passing to an internal iterator) fn internal_key(&self) -> Slice { // line 204 - todo!() + let buf = self.data.as_ref(); + let internal_key_buf = &buf[self.user_key_start..]; + Slice::from_buf(internal_key_buf.clone()) } /// Return the user key @@ -293,6 +304,14 @@ impl LookupKey { // } // } +const K_MAX_SEQUENCE_NUMBER: usize = (1 << 56) - 1; + +#[inline] +pub fn pack_sequence_and_type(seq_no: usize, v_type: ValueType) -> u64 { + debug_assert!(seq_no <= K_MAX_SEQUENCE_NUMBER); + debug_assert!(v_type.get_value() <= 1); + ((seq_no << 8) | v_type.get_value()) as u64 +} pub struct Config {} impl Config { @@ -325,5 +344,5 @@ impl Config { // and the value type is embedded as the low 8 bits in the sequence // number in internal keys, we need to use the highest-numbered // ValueType, not the lowest). - pub const K_VALUE_TYPE_FOR_SEEK: ValueType = ValueType::K_TYPE_VALUE; + pub const K_VALUE_TYPE_FOR_SEEK: ValueType = ValueType::KTypeValue; } \ No newline at end of file diff --git a/src/db/log_wr_test.rs b/src/db/log_wr_test.rs index d333556..a5ad510 100644 --- a/src/db/log_wr_test.rs +++ b/src/db/log_wr_test.rs @@ -1,13 +1,13 @@ mod test { - - - - - - - + use std::fs::File; + use crate::db::log_reader::LogReader; + use crate::db::log_writer::LogWriter; + use crate::traits::coding_trait::CodingTrait; + use crate::util::coding::Coding; + use crate::util::crc::{AsCrc, ToMask}; use crate::util::slice::Slice; + use crate::util::Result; #[test] fn write() -> Result<()> { diff --git a/src/db/mem_table.rs b/src/db/mem_table.rs index be88ba7..aff4c60 100644 --- a/src/db/mem_table.rs +++ b/src/db/mem_table.rs @@ -1,27 +1,22 @@ -use std::rc::Rc; +use std::io::Write; +use std::sync::{Arc, Mutex}; +use crate::db::db_format::{LookupKey, ValueType}; +use crate::db::skip_list::SkipList; +use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; use crate::traits::DataIterator; -<<<<<<< HEAD -======= - ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb +use crate::util::arena::ArenaRef; use crate::util::slice::Slice; - -use crate::util::Result; - -pub enum ValueType { - Insert, - Deletion, -} +use crate::util::{Arena, Result}; +use crate::util::coding::Coding; /// 内存表 pub struct MemTable { - cmp: Rc, + cmp: Arc, + list: SkipList, + arena: ArenaRef, } -/// 临时, 查找键 -pub struct LookupKey {} - impl MemTable { /// 创建内存表 @@ -37,15 +32,20 @@ impl MemTable { /// ``` /// let mt = MemTable::create(cmp); /// ``` - pub fn create(cmp: Rc) -> Self { + pub fn create(cmp: Arc) -> Self { + let arena = Arc::new(Mutex::new(Arena::default())); + let list = SkipList::create(cmp.clone(), arena.clone()); Self { cmp, + list, + arena } } /// 返回该表使用的内存近似值 + #[inline] pub fn approximate_memory_usage(&self) -> usize { - todo!() + self.arena.lock().unwrap().memory_usage() } /// 创建内存表迭代器 @@ -58,15 +58,34 @@ impl MemTable { /// /// ``` /// let mem = MemTable::create(comp); - /// let it = mem::new_new_iterator()?; + /// let it = mem.new_new_iterator()?; /// ``` pub fn new_iterator(&self) -> Result> { todo!() } /// 像内存表中写入或删除一个元素 - pub fn add(&mut self, _seq_no: usize, _v_type: ValueType, _key: &Slice, _value: Slice) -> Result<()> { - todo!() + pub fn add(&mut self, seq_no: usize, v_type: ValueType, key: &Slice, value: Slice) -> Result<()> { + let key_size = key.size(); + let value_size = value.size(); + let internal_key_size = key_size + 8; + let encoded_len = Coding::varint_length(key_size) + + internal_key_size + + Coding::varint_length(value_size) + + value_size; + let mut lock = self.arena.lock()?; + let buf = lock.allocate(encoded_len); + let mut offset = 0; + // write key size + offset = Coding::encode_varint32(internal_key_size as u32, buf, offset); + // write key slice + offset += (&mut buf[offset..]).write(key.as_ref())?; + // write seq_no and type + offset = Coding::encode_fixed64((seq_no << 8 | v_type.get_value()) as u64, buf, offset); + // write value slice + (&mut buf[offset..]).write(value.as_ref())?; + let slice = Slice::from_buf(buf); + self.list.insert(slice) } /// 通过 key 查找结果 diff --git a/src/db/mod.rs b/src/db/mod.rs index eb55254..b82f563 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,13 +1,7 @@ use crate::db::db_format::InternalKeyComparator; use crate::db::skip_list::SkipList; -<<<<<<< HEAD use crate::db::mem_table::MemTable; -use crate::util::comparator::{BytewiseComparatorImpl}; -use crate::util::slice::Slice; -======= -use crate::util::comparator::{BytewiseComparatorImpl, InternalKeyComparator}; - ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb +use crate::util::comparator::BytewiseComparatorImpl; pub mod log_writer; pub mod log_reader; diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 55eeaed..449712f 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -2,14 +2,14 @@ use std::cmp::Ordering; use std::mem; use std::mem::size_of; use std::ptr::null_mut; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; use rand::prelude::*; use crate::debug; use crate::traits::comparator_trait::Comparator; -use crate::util::arena::{ArenaAllocLike, ArenaRef}; -use crate::util::Result; +use crate::util::arena::ArenaRef; +use crate::util::{Arena, Result}; use crate::util::slice::Slice; use crate::util::status::{LevelError, Status}; @@ -77,7 +77,7 @@ impl SkipList { fn insert_ele0(&mut self, key: Slice) -> Result<()> { let level = rand_level(); debug!("insert {}, level: {}", &key, level); - let node = unsafe { Node::create(key, level, self.arena.clone()) }; + let node = Node::create(key, level, self.arena.clone()); // head bind node // TODO, use macro to expand for-loop unsafe { @@ -98,9 +98,7 @@ impl SkipList { let node_height = rand_level(); let node_top_level = node_height - 1; debug!("insert {}, level: {}", &key, node_height); - let node_ptr = unsafe { - Node::create(key, node_height, self.arena.clone()) - }; + let node_ptr = Node::create(key, node_height, self.arena.clone()); let node = unsafe { &mut *node_ptr }; // loop from highest level to 0 for l in (0..self.height).rev() { @@ -166,7 +164,6 @@ impl SkipList { Ok(()) } - #[macro_use] pub fn contains(&self, key: &Slice) -> bool { debug!("================== begin contains, key: {} ==================", key); if self.num == 0 { @@ -232,6 +229,13 @@ impl SkipList { Iter::create(&self) } + #[inline] + pub fn memory_usage(&self) -> usize { + let a = Arc::new(RwLock::new(Arena::default())); + a.read().unwrap().memory_usage(); + self.arena.lock().unwrap().memory_usage() + } + fn rnd_level(&self) -> usize { let mut level = 1; for _ in 1..MAX_LEVEL { @@ -274,9 +278,8 @@ impl ToString for SkipList { impl Node { #[inline] fn create(src: Slice, level: usize, arena: ArenaRef) -> RawNode { - let key = src.copy_with_arena(arena.clone()); let node = box Self { - key: Some(key), + key: Some(src), next_elems: allocate_next_elems(arena), level, }; diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs index 287d18c..7ac6f6f 100644 --- a/src/db/skip_list_test.rs +++ b/src/db/skip_list_test.rs @@ -1,15 +1,13 @@ mod test { - - - - - - - - - - - + use std::collections::HashSet; + use std::sync::{Arc, Mutex}; + use rand::Rng; + use crate::db::DefaultSkipList; + use crate::debug; + use crate::util::Arena; + use crate::util::comparator::BytewiseComparatorImpl; + use crate::util::Result; + use crate::util::slice::Slice; #[test] fn test_add() -> Result<()> { diff --git a/src/traits/coding_trait.rs b/src/traits/coding_trait.rs index 3a0a49e..157d1f7 100644 --- a/src/traits/coding_trait.rs +++ b/src/traits/coding_trait.rs @@ -172,7 +172,7 @@ pub trait CodingTrait { /// /// ``` /// 从slice的开头解码一个32位的变长整数, 并将slice的索引置于解码后的位置 - fn varint_length(value: u64) -> i32; + fn varint_length(value: usize) -> usize; /// 32位定长正整数编码 /// /// # Arguments diff --git a/src/util/arena_test.rs b/src/util/arena_test.rs index fc800e1..dbc8cf1 100644 --- a/src/util/arena_test.rs +++ b/src/util/arena_test.rs @@ -1,4 +1,4 @@ - +use crate::util::Arena; #[test] fn test_memory_usage() { diff --git a/src/util/coding.rs b/src/util/coding.rs index d05abf7..220148e 100644 --- a/src/util/coding.rs +++ b/src/util/coding.rs @@ -1,3 +1,4 @@ +use std::io::Write; use crate::traits::coding_trait::CodingTrait; use crate::traits::coding_trait::Coding32; use crate::traits::coding_trait::Coding64; @@ -25,11 +26,7 @@ macro_rules! varint { pub struct Coding {} impl CodingTrait for Coding { -<<<<<<< HEAD fn put_fixed32(dst: &mut [u8], mut offset: usize, value: u32) -> usize { -======= - fn put_fixed32(dst: &mut String, value: u32) { ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb let mut buf: [u8; 4] = [0, 0, 0, 0]; Self::encode_fixed32(value, &mut buf, 0); dst[offset] = buf[0]; @@ -42,11 +39,7 @@ impl CodingTrait for Coding { offset } -<<<<<<< HEAD fn put_fixed64(dst: &mut [u8], mut offset: usize, value: u64) -> usize { -======= - fn put_fixed64(dst: &mut String, value: u64) { ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; Self::encode_fixed64(value, &mut buf, 0); dst[offset] = buf[0]; @@ -71,11 +64,7 @@ impl CodingTrait for Coding { varint!(u64,encode_varint64); -<<<<<<< HEAD fn put_varint32(dst: &mut [u8], mut offset: usize, value: u32) -> usize { -======= - fn put_varint32(dst: &mut String, value: u32) { ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb let mut buf: [u8; 4] = [0, 0, 0, 0]; let var_offset = Self::encode_varint32(value, &mut buf, 0); for i in 0..var_offset { @@ -85,11 +74,7 @@ impl CodingTrait for Coding { offset } -<<<<<<< HEAD fn put_varint64(dst: &mut [u8], mut offset: usize, value: u64) -> usize { -======= - fn put_varint64(dst: &mut String, value: u64) { ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb let mut buf: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; let var_offset = Self::encode_varint64(value, &mut buf, 0); for i in 0..var_offset { @@ -149,7 +134,7 @@ impl CodingTrait for Coding { Slice::from_buf(decode.to_le_bytes().as_mut_slice()) } - fn varint_length(mut value: u64) -> i32 { + fn varint_length(mut value: usize) -> usize { let mut len = 1; while value >= 128 { value >>= 7; @@ -159,35 +144,13 @@ impl CodingTrait for Coding { } fn encode_fixed32(value: u32, buf: &mut [u8], mut offset: usize) -> usize { - buf[offset] = value as u8; - offset += 1; - buf[offset] = (value >> 8) as u8; - offset += 1; - buf[offset] = (value >> 16) as u8; - offset += 1; - buf[offset] = (value >> 24) as u8; - offset += 1; - offset + (&mut buf[offset..]).write(&value.to_le_bytes()).unwrap(); + offset+4 } fn encode_fixed64(value: u64, buf: &mut [u8], mut offset: usize) -> usize { - buf[offset] = value as u8; - offset += 1; - buf[offset] = (value >> 8) as u8; - offset += 1; - buf[offset] = (value >> 16) as u8; - offset += 1; - buf[offset] = (value >> 24) as u8; - offset += 1; - buf[offset] = (value >> 32) as u8; - offset += 1; - buf[offset] = (value >> 40) as u8; - offset += 1; - buf[offset] = (value >> 48) as u8; - offset += 1; - buf[offset] = (value >> 56) as u8; - offset += 1; - offset + (&mut buf[offset..]).write(&value.to_le_bytes()).unwrap(); + offset+8 } @@ -213,11 +176,6 @@ impl CodingTrait for Coding { macro_rules! coding_impl { {$TRAIT: ident, $TYPE: ty, $VAR_NAME: ident, $FIXED_NAME: ident} => { impl $TRAIT for $TYPE { -<<<<<<< HEAD - fn varint(self, buf: &mut [u8], offset: usize) -> usize { - Coding::$VAR_NAME (self, buf, offset) - } -======= /// 变长正整数编码 /// /// # Arguments @@ -253,7 +211,6 @@ macro_rules! coding_impl { /// let value: u32 = 65534; /// let offset = value.fixedint(&mut buf, 0); /// ``` ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb fn fixedint(self, buf: &mut [u8], offset: usize) -> usize { Coding::$FIXED_NAME (self, buf, offset) } diff --git a/src/util/coding_test.rs b/src/util/coding_test.rs index 96ea2f7..1531ad0 100644 --- a/src/util/coding_test.rs +++ b/src/util/coding_test.rs @@ -1,12 +1,6 @@ mod test { -<<<<<<< HEAD use crate::traits::coding_trait::{Coding32, Coding64, CodingTrait}; use crate::util::coding::{Coding}; -======= - - - ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb #[test] fn test_put_fixed32() { @@ -133,7 +127,7 @@ mod test { #[test] fn test_varint_length() { - let len = Coding::varint_length( 65535 as u64); + let len = Coding::varint_length(65535 as u64 as usize); println!("len: {:?}", len); assert_eq!(len, 3); } diff --git a/src/util/comparator_test.rs b/src/util/comparator_test.rs index 87bc67d..d6d311b 100644 --- a/src/util/comparator_test.rs +++ b/src/util/comparator_test.rs @@ -1,18 +1,10 @@ mod test { -<<<<<<< HEAD use std::cmp::Ordering; use std::io::Write; use crate::traits::comparator_trait::Comparator; use crate::util::comparator::{BytewiseComparatorImpl}; use crate::util::slice::Slice; -======= - - - - - ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb // ##################### BytewiseComparatorImpl test #[test] diff --git a/src/util/crc_test.rs b/src/util/crc_test.rs index e304450..c67c0db 100644 --- a/src/util/crc_test.rs +++ b/src/util/crc_test.rs @@ -1,5 +1,5 @@ - - +use crate::util::crc::{AsCrc, CRC, ToMask}; +use crate::util::slice::Slice; #[test] fn test_crc() { diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 56f048c..f493364 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -1,11 +1,7 @@ use std::ops::{BitOr, Mul, Shl}; use crate::traits::filter_policy_trait::{FilterPolicy}; -<<<<<<< HEAD use crate::util::hash::{Hash, ToHash}; use crate::util::r#const::HASH_DEFAULT_SEED; -======= -use crate::util::hash::{ToHash}; ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb use crate::util::slice::Slice; pub trait FromPolicy { @@ -23,8 +19,9 @@ pub trait AsBloomHash { /// 实现了 Slice 转 bloom_hash 的特质 /// Sample: /// ``` -/// let val = "aabbccd"; -/// let slice: Slice = Slice::from_buf(val.as_bytes()); +/// use rand::distributions::Slice; +/// let val = "aabbccd"; +/// let slice = Slice::from_buf(val.as_bytes()); /// let hash_val = slice.bloom_hash(); /// ``` impl AsBloomHash for Slice { @@ -85,7 +82,6 @@ impl FilterPolicy for BloomFilterPolicy { String::from("leveldb.BuiltinBloomFilter2") } -<<<<<<< HEAD fn create_filter(&self, keys: Vec) -> Slice { let n: usize = keys.len(); @@ -178,12 +174,6 @@ impl FilterPolicy for InternalFilterPolicy { } fn create_filter(&self, keys: Vec) -> Slice { - todo!() - } - - fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { -======= - fn create_filter(&self, _keys: Slice, _n: u32, _dst: String) -> String { // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, // 并把根据这些key创建的filter追加到 dst中。 @@ -191,8 +181,8 @@ impl FilterPolicy for InternalFilterPolicy { todo!() } - fn key_may_match(_key: &Slice, _filter: &Slice) -> bool { ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { todo!() } + } \ No newline at end of file diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index dd94fe4..e84ee12 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -1,4 +1,3 @@ -<<<<<<< HEAD use std::ptr::null; use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::bloom_filter; @@ -28,20 +27,6 @@ fn test_new() { assert_eq!(bloom_filter.from_bits_per_key(), 800); assert_eq!(bloom_filter.from_k(), 30); } -======= - - - - -#[test] -fn test_new() { - let _bloom_filter = BloomFilterPolicy::new(8); - println!("hash:{}", "a"); - // assert_eq!(bloom_filter, null()); - - let _bloom_filter = BloomFilterPolicy::new(800); - println!("hash:{}", "a"); ->>>>>>> 7ab46579f8abd8c45c40227dfb601ec7468625eb // #################### FilterPolicy test #[test] diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs index e0961d0..28bf95d 100644 --- a/src/util/hash_test.rs +++ b/src/util/hash_test.rs @@ -1,7 +1,6 @@ - - - - +use crate::util::hash::{Hash, ToHash}; +use crate::util::r#const::HASH_DEFAULT_SEED; +use crate::util::slice::Slice; #[test] fn test_hash() { diff --git a/src/util/histogram_test.rs b/src/util/histogram_test.rs index 74e9033..a4f01e7 100644 --- a/src/util/histogram_test.rs +++ b/src/util/histogram_test.rs @@ -1,6 +1,6 @@ mod test{ - + use crate::util::histogram::Histogram; #[test] fn test_add() { diff --git a/src/util/mutex_lock_test.rs b/src/util/mutex_lock_test.rs index 548871f..f442b37 100644 --- a/src/util/mutex_lock_test.rs +++ b/src/util/mutex_lock_test.rs @@ -1,7 +1,6 @@ mod test { - - - + use std::thread; + use crate::util::mutex_lock::MutexLock; #[test] fn test() { diff --git a/src/util/slice.rs b/src/util/slice.rs index 64b45d7..f172c83 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -121,6 +121,13 @@ impl<'a> Slice { } } +impl Clone for Slice { + fn clone(&self) -> Self { + let data = self.data.clone(); + Slice::from_vec(data) + } +} + impl From for String { /// 将 Slice 内数据的所有权移交给 String #[inline] diff --git a/src/util/slice_test.rs b/src/util/slice_test.rs index 75ae7a7..9baf1d1 100644 --- a/src/util/slice_test.rs +++ b/src/util/slice_test.rs @@ -1,5 +1,5 @@ mod test { - + use std::cmp::Ordering; use crate::util::slice::Slice; #[test] diff --git a/src/util/status.rs b/src/util/status.rs index 8c51782..9a25887 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -1,6 +1,7 @@ use std::fmt::{Display, Formatter}; use std::io; use std::ops::Deref; +use std::sync::PoisonError; use crate::util::r#const::COLON_WHITE_SPACE; use crate::util::slice::Slice; use crate::util::status::LevelError::{KCorruption, KIOError, KInvalidArgument, KNotSupported, KNotFound, KOk, KBadRecord, KRepeatedRecord}; @@ -389,6 +390,12 @@ impl From for Status { } } +impl From> for Status { + fn from(_value: PoisonError) -> Self { + Status::wrapper(KCorruption, "PoisonError".into()) + } +} + impl Display for LevelError { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { diff --git a/src/util/status_test.rs b/src/util/status_test.rs index 7cf0698..baad832 100644 --- a/src/util/status_test.rs +++ b/src/util/status_test.rs @@ -1,8 +1,8 @@ mod test { - - - + use crate::util::r#const::COLON_WHITE_SPACE; + use crate::util::slice::Slice; + use crate::util::status::{LevelError, Status}; #[test] fn test_wraper() { -- Gitee From f00c900832fef03d60488287ed995eeacaea4d96 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Fri, 17 Mar 2023 17:16:19 +0800 Subject: [PATCH 24/50] =?UTF-8?q?skiplist=E5=A2=9E=E5=8A=A0=E8=BF=AD?= =?UTF-8?q?=E4=BB=A3=E5=99=A8=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/skip_list.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 449712f..367dd1d 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -7,6 +7,7 @@ use std::sync::{Arc, RwLock}; use rand::prelude::*; use crate::debug; use crate::traits::comparator_trait::Comparator; +use crate::traits::DataIterator; use crate::util::arena::ArenaRef; use crate::util::{Arena, Result}; @@ -42,6 +43,13 @@ pub struct SkipList { arena: ArenaRef, } +struct DataIter { + head: RawNode, + tail: RawNode, + current: RawNode, + cmp: Cmp, +} + pub struct Iter<'a, Cmp: Comparator> { list: &'a SkipList, node: RawNode, @@ -401,4 +409,52 @@ impl<'a, Cmp: Comparator> Iterator for Iter<'a, Cmp> { (&*self.node).key.as_ref() } } +} + +impl DataIterator for DataIter { + + #[inline] + fn valid(&self) -> bool { + unsafe { + (&*self.current).is_head_or_tail() + } + } + + #[inline] + fn seek_to_first(&mut self) { + self.current = self.head + } + + #[inline] + fn seek_to_last(&mut self) { + self.current = self.tail + } + + fn seek(&mut self, key: &Slice) { + todo!() + } + + fn next(&mut self) { + unsafe { + if (&*self.current).is_tail() { + return; + } + self.current = (&*self.current).get_node(0); + } + } + + fn pre(&mut self) { + todo!() + } + + fn key(&self) -> &Slice { + let mem_key = unsafe { + (&*self.current).key.as_ref().unwrap() + }; + mem_key + } + + fn value(&self) -> &Slice { + todo!() + } } \ No newline at end of file -- Gitee From 3ee34aafccaa2769e6002ab6160b29f3fb534643 Mon Sep 17 00:00:00 2001 From: fengyang Date: Mon, 20 Mar 2023 19:55:44 +0800 Subject: [PATCH 25/50] version edit decode_from --- src/db/file_meta_data.rs | 15 ++- src/db/version_edit.rs | 209 ++++++++++++++++++++++++++++++------ src/db/version_edit_test.rs | 21 +++- src/traits/coding_trait.rs | 5 +- src/util/coding.rs | 5 +- src/util/status.rs | 4 + src/util/status_test.rs | 3 + 7 files changed, 223 insertions(+), 39 deletions(-) diff --git a/src/db/file_meta_data.rs b/src/db/file_meta_data.rs index bc67682..7c9e3b6 100644 --- a/src/db/file_meta_data.rs +++ b/src/db/file_meta_data.rs @@ -68,12 +68,23 @@ impl FileMetaData { } } + pub fn get_number(&self) -> u64 { + self.number + } + + /// File size in bytes + pub fn get_file_size(&self) -> u64 { + self.file_size + } + + /// Smallest internal key served by table pub fn get_smallest(&self) -> &InternalKey { &self.smallest } - pub fn get_number(&self) -> u64 { - self.number + /// Largest internal key served by table + pub fn get_largest(&self) -> &InternalKey { + &self.largest } pub fn get_refs(&self) -> i32 { diff --git a/src/db/version_edit.rs b/src/db/version_edit.rs index e820e77..aa6bea4 100644 --- a/src/db/version_edit.rs +++ b/src/db/version_edit.rs @@ -1,8 +1,13 @@ +use std::fs::read; use std::iter::Map; use crate::db::db_format::InternalKey; use crate::db::file_meta_data::FileMetaData; +use crate::db::version_edit; +use crate::traits::coding_trait::CodingTrait; +use crate::util::coding::Coding; use crate::util::slice::Slice; use crate::util::Result; +use crate::util::status::{LevelError, Status}; pub struct VersionEdit { comparator_: String, @@ -17,35 +22,99 @@ pub struct VersionEdit { has_last_sequence_: bool, compact_pointers_: Vec<(u32, InternalKey)>, + // left: level; right: file number deleted_files_: Vec<(u32, u64)>, + // left: level; right: FileMetaData new_files_: Vec<(u32, FileMetaData)>, } -enum Tag { - // kComparator = 1, - // kLogNumber = 2, - // kNextFileNumber = 3, - // kLastSequence = 4, - // kCompactPointer = 5, - // kDeletedFile = 6, - // kNewFile = 7, - // // 8 was used for large value refs - // kPrevLogNumber = 9 - - kComparator, - kLogNumber, - kNextFileNumber, - kLastSequence, - kCompactPointer, - kDeletedFile, - kNewFile, +pub enum Tag { + k_comparator = 1, + kLogNumber = 2, + kNextFileNumber = 3, + kLastSequence = 4, + kCompactPointer = 5, + kDeletedFile = 6, + kNewFile = 7, // 8 was used for large value refs - kPrevLogNumber + kPrevLogNumber = 9 +} + +impl Tag { + /// 得到枚举 Tag 的固定值 + /// Tag numbers for serialized VersionEdit. These numbers are written to disk and should not be changed. + pub fn get_value(&self) -> i32 { + let val = match self { + Tag::k_comparator => 1, + Tag::kLogNumber => 2, + Tag::kNextFileNumber => 3, + Tag::kLastSequence => 4, + Tag::kCompactPointer => 5, + Tag::kDeletedFile => 6, + Tag::kNewFile => 7, + Tag::kPrevLogNumber => 9, + _ => 0 + }; + + val + } + + /// 根据值计算枚举 Tag + pub fn from_value(val: u32) -> Option { + let val = match val { + 1 => Some(Tag::k_comparator), + 2 => Some(Tag::kLogNumber), + 3 => Some(Tag::kNextFileNumber), + 4 => Some(Tag::kLastSequence), + 5 => Some(Tag::kCompactPointer), + 6 => Some(Tag::kDeletedFile), + 7 => Some(Tag::kNewFile), + 9 => Some(Tag::kPrevLogNumber), + _ => None + }; + + val + } } impl VersionEdit { + #[inline] + pub fn new() -> Self { + Self { + comparator_ : String::new(), + log_number_: 0, + prev_log_number_: 0, + next_file_number_: 0, + last_sequence_: 0, + has_comparator_: false, + has_log_number_: false, + has_prev_log_number_: false, + has_next_file_number_: false, + has_last_sequence_: false, + compact_pointers_: vec![], + deleted_files_: vec![], + new_files_: vec![] + } + } + + #[inline] + pub fn new_with_log_number(log_number: u64) -> Self { + let mut version_edit = VersionEdit::new(); + version_edit.set_log_number(log_number); + + version_edit + } + + #[inline] + pub fn new_with_prev_log_number(prev_log_number: u64) -> Self { + let mut version_edit = VersionEdit::new(); + version_edit.set_prev_log_number(prev_log_number); + + version_edit + } + /// 清空 - fn clear(&mut self) { + pub fn clear(&mut self) { self.comparator_.clear(); self.log_number_ = 0; self.prev_log_number_ = 0; @@ -62,27 +131,32 @@ impl VersionEdit { // compact_pointers_ don't clear } - fn set_comparator_name(&mut self, name: Slice){ + pub fn set_comparator_name(&mut self, name: Slice){ self.has_comparator_ = true; self.comparator_ = name.into(); } - fn set_prev_log_number(&mut self, num: u64){ + pub fn set_log_number(&mut self, num: u64){ + self.has_log_number_ = true; + self.log_number_ = num; + } + + pub fn set_prev_log_number(&mut self, num: u64){ self.has_prev_log_number_ = true; self.prev_log_number_ = num; } - fn set_next_file(&mut self, num: u64){ + pub fn set_next_file(&mut self, num: u64){ self.has_next_file_number_ = true; self.next_file_number_ = num; } - fn set_last_sequence(&mut self, seq: u64){ + pub fn set_last_sequence(&mut self, seq: u64){ self.has_last_sequence_ = true; self.last_sequence_ = seq; } - fn set_compact_pointer(&mut self, level: u32, key: InternalKey) { + pub fn set_compact_pointer(&mut self, level: u32, key: InternalKey) { self.compact_pointers_.push((level, key)) } @@ -105,13 +179,13 @@ impl VersionEdit { /// ``` /// /// ``` - fn add_file(&mut self, level: u32, file: u64, file_size: u64, smallest: InternalKey, largest: InternalKey) { + pub fn add_file(&mut self, level: u32, file: u64, file_size: u64, smallest: InternalKey, largest: InternalKey) { let file_meta_data = FileMetaData::new_with_number_file_size_internal_key(file, file_size, smallest, largest); self.new_files_.push((level, file_meta_data)); } - fn delete_file(&mut self, level: u32, file: u64) { + pub fn delete_file(&mut self, level: u32, file: u64) { self.deleted_files_.push((level, file)); } @@ -128,8 +202,59 @@ impl VersionEdit { /// ``` /// /// ``` - fn encode_to(&self, target: Vec) { - todo!() + pub fn encode_to(&self, target: &mut Vec) { + let mut position: usize = 0; + if self.has_comparator_ { + position += Coding::put_varint32(target, position, Tag::k_comparator.get_value() as u32); + position += Coding::put_length_prefixed_slice(target, position, self.comparator_.len()); + } + + if self.has_log_number_ { + let mut offset = Coding::put_varint32(target, position, Tag::kLogNumber.get_value() as u32); + position = position + offset; + + offset = Coding::put_varint64(target, position, self.log_number_); + position = position + offset; + } + + if self.has_prev_log_number_ { + position += Coding::put_varint32(target, position, Tag::kPrevLogNumber.get_value() as u32); + position += Coding::put_varint64(target, position, self.prev_log_number_); + } + + if self.has_next_file_number_ { + position += Coding::put_varint32(target, position, Tag::kNextFileNumber.get_value() as u32); + position += Coding::put_varint64(target, position, self.next_file_number_); + } + + if self.has_last_sequence_ { + position += Coding::put_varint32(target, position, Tag::kLastSequence.get_value() as u32); + position += Coding::put_varint64(target, position, self.last_sequence_); + } + + for i in 0..self.compact_pointers_.len() { + position += Coding::put_varint32(target, position, Tag::kCompactPointer.get_value() as u32); + position += Coding::put_varint32(target, position, self.compact_pointers_[i].0); + position += Coding::put_length_prefixed_slice(target, position, + self.compact_pointers_[i].1.encode_len()); + } + + for i in 0..self.deleted_files_.len() { + position += Coding::put_varint32(target, position, Tag::kDeletedFile.get_value() as u32); + position += Coding::put_varint32(target, position, self.deleted_files_[i].0); + position += Coding::put_varint64(target, position, self.deleted_files_[i].1); + } + + for i in 0..self.new_files_.len() { + let f: &FileMetaData = &self.new_files_[i].1; + position += Coding::put_varint32(target, position, Tag::kNewFile.get_value() as u32); + // level + position += Coding::put_varint32(target, position, self.new_files_[i].0); + position += Coding::put_varint64(target, position, f.get_number()); + position += Coding::put_varint64(target, position, f.get_file_size()); + position += Coding::put_length_prefixed_slice(target, position, f.get_smallest().encode_len()); + position += Coding::put_length_prefixed_slice(target, position, f.get_largest().encode_len()); + } } /// 将 source 中的数据解码至 self VersionEdit 中 @@ -145,18 +270,40 @@ impl VersionEdit { /// ``` /// /// ``` - fn decode_from(&mut self, source: Slice) { + pub fn decode_from(&mut self, source: &mut Slice) -> Status { self.clear(); + let msg : Option = Option::None; + // while msg == None && Coding::get_varint32(source) != 0_u32 { + // let tag_value = Coding::get_varint32(source); + // let tag = Tag::from_value(tag_value); + // + // if tag.is_none() { + // return LevelError::corruption_string("VersionEdit", "unknown tag"); + // } + // + // // match tag { + // // Tag::k_comparator => 1, + // // Tag::kLogNumber => 2, + // // Tag::kNextFileNumber => 3, + // // Tag::kLastSequence => 4, + // // Tag::kCompactPointer => 5, + // // Tag::kDeletedFile => 6, + // // Tag::kNewFile => 7, + // // Tag::kPrevLogNumber => 9, + // // _ => 0 + // // }; + // } todo!() } /// VersionEdit 输出调试信息 - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { todo!() } } +/// 静态方法 impl<'a> VersionEdit { pub fn get_internal_key(inout: Slice) -> Result { todo!() diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 3024e55..ffec818 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -1,10 +1,27 @@ mod test { + use crate::db::version_edit; + use crate::db::version_edit::{Tag, VersionEdit}; + #[test] - fn test_() { + fn test_Tag() { + let tag = Tag::kCompactPointer; + assert_eq!(tag.get_value(), 5); + + let tag1 = Tag::k_comparator; + let v = tag1.get_value(); + assert_eq!(v, 1); - println!("get_name: {}", "a"); + } + + #[test] + fn test_Version_edit() { + let mut target: Vec = vec![]; + let version_edit = VersionEdit::new_with_log_number(6); + version_edit.encode_to(&mut target); + println!("target: {}.", &target.len()); + assert_eq!(target.len(), 2); } } \ No newline at end of file diff --git a/src/traits/coding_trait.rs b/src/traits/coding_trait.rs index 3a0a49e..8498179 100644 --- a/src/traits/coding_trait.rs +++ b/src/traits/coding_trait.rs @@ -70,7 +70,7 @@ pub trait CodingTrait { /// # Arguments /// /// * `dst`: 目标字符串 - /// * `value`: Slice类型的编码值 + /// * `value_len`: Slice类型的编码值长度 /// /// returns: () /// @@ -79,7 +79,8 @@ pub trait CodingTrait { /// ``` /// /// ``` - fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: Slice) -> usize; + // fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: &Slice) -> usize; + fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value_len: usize) -> usize; /// 从slice的开头解码一个32位的变长整数, 并将slice的索引置于解码后的位置 /// /// # Arguments diff --git a/src/util/coding.rs b/src/util/coding.rs index 3a13b7d..a2a0f85 100644 --- a/src/util/coding.rs +++ b/src/util/coding.rs @@ -83,8 +83,9 @@ impl CodingTrait for Coding { offset } - fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: Slice) -> usize { - Self::put_varint64(dst, offset, value.size() as u64); + // fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value: &Slice) -> usize { + fn put_length_prefixed_slice(dst: &mut [u8], offset: usize, value_len: usize) -> usize { + Self::put_varint64(dst, offset, value_len as u64); offset } diff --git a/src/util/status.rs b/src/util/status.rs index 2049335..42663cd 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -270,6 +270,10 @@ impl LevelError { } } + pub fn corruption_string(msg: &str, msg2: &str) -> Status { + LevelError::corruption(Slice::from(msg), Slice::from(msg2)) + } + pub fn not_supported(mut msg: Slice, msg2: Slice) -> Status { let _ = &msg.merge(msg2, Some(String::from(COLON_WHITE_SPACE))); diff --git a/src/util/status_test.rs b/src/util/status_test.rs index baad832..a7dce28 100644 --- a/src/util/status_test.rs +++ b/src/util/status_test.rs @@ -37,6 +37,9 @@ mod test { String::from(msg2).into()); assert!(&err.is_corruption()); + let err1: Status = LevelError::corruption_string("AAaaa", "bbhugy"); + assert!(&err1.is_corruption()); + let err: Status = LevelError::not_found(String::from(msg1).into(), String::from(msg2).into()); assert!(&err.is_not_found()); -- Gitee From 631cc54b1e35dd259f9bd02a87e53fce72f3bac5 Mon Sep 17 00:00:00 2001 From: colagy Date: Mon, 20 Mar 2023 14:39:27 +0000 Subject: [PATCH 26/50] =?UTF-8?q?=E6=9B=B4=E6=96=B0cache=E8=BF=9B=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: colagy --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a86522..362db95 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release |-------------------------------|-----------------|------| | Arena (Memory Management) | wangboo | 100% | | bloom | fengyang | 100% | -| Cache | colagy | 10% | +| Cache | colagy | 30% | | Coding (Primitive Type SerDe) | colagy | 100% | | Comparator | fengyang | 100% | | CRC | wangboo、lxd5866 | 100% | -- Gitee From 37cd43dd32828fa06909cba35fdd0c5649c647d6 Mon Sep 17 00:00:00 2001 From: fengyang Date: Tue, 21 Mar 2023 10:07:52 +0800 Subject: [PATCH 27/50] =?UTF-8?q?coding=20get=5Fvarint32=20=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3=E7=94=B1&mut=20Slice=20=E6=94=B9=E4=B8=BA=20=20&Slice?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/db_format.rs | 7 +++++- src/db/version_edit.rs | 46 +++++++++++++++++++------------------ src/db/version_edit_test.rs | 19 +++++++++++---- src/traits/coding_trait.rs | 4 ++-- src/util/coding.rs | 4 ++-- src/util/slice.rs | 2 +- src/util/slice_test.rs | 2 +- 7 files changed, 51 insertions(+), 33 deletions(-) diff --git a/src/db/db_format.rs b/src/db/db_format.rs index 49e7194..d8d0bf2 100644 --- a/src/db/db_format.rs +++ b/src/db/db_format.rs @@ -183,10 +183,15 @@ impl InternalKey { todo!() } - fn encode(self) -> Slice { + pub fn encode(self) -> Slice { self.rep_ } + /// 取得 Slice的长度 + pub fn encode_len(&self) -> usize { + self.rep_.size() + } + fn user_key(self) -> Slice { ParsedInternalKey::extract_user_key(self.rep_) } diff --git a/src/db/version_edit.rs b/src/db/version_edit.rs index aa6bea4..c779677 100644 --- a/src/db/version_edit.rs +++ b/src/db/version_edit.rs @@ -236,7 +236,7 @@ impl VersionEdit { position += Coding::put_varint32(target, position, Tag::kCompactPointer.get_value() as u32); position += Coding::put_varint32(target, position, self.compact_pointers_[i].0); position += Coding::put_length_prefixed_slice(target, position, - self.compact_pointers_[i].1.encode_len()); + self.compact_pointers_[i].1.encode_len()); } for i in 0..self.deleted_files_.len() { @@ -270,30 +270,32 @@ impl VersionEdit { /// ``` /// /// ``` - pub fn decode_from(&mut self, source: &mut Slice) -> Status { + pub fn decode_from(&mut self, source: &Slice) -> Status { self.clear(); + let version_edit = VersionEdit::new(); + let msg : Option = Option::None; - // while msg == None && Coding::get_varint32(source) != 0_u32 { - // let tag_value = Coding::get_varint32(source); - // let tag = Tag::from_value(tag_value); - // - // if tag.is_none() { - // return LevelError::corruption_string("VersionEdit", "unknown tag"); - // } - // - // // match tag { - // // Tag::k_comparator => 1, - // // Tag::kLogNumber => 2, - // // Tag::kNextFileNumber => 3, - // // Tag::kLastSequence => 4, - // // Tag::kCompactPointer => 5, - // // Tag::kDeletedFile => 6, - // // Tag::kNewFile => 7, - // // Tag::kPrevLogNumber => 9, - // // _ => 0 - // // }; - // } + while msg.is_none() && Coding::get_varint32(source) != 0_u32 { + let tag_value = Coding::get_varint32(source); + let tag = Tag::from_value(tag_value); + + if tag.is_none() { + return LevelError::corruption_string("VersionEdit", "unknown tag"); + } + + // match tag { + // Tag::k_comparator => 1, + // Tag::kLogNumber => 2, + // Tag::kNextFileNumber => 3, + // Tag::kLastSequence => 4, + // Tag::kCompactPointer => 5, + // Tag::kDeletedFile => 6, + // Tag::kNewFile => 7, + // Tag::kPrevLogNumber => 9, + // _ => 0 + // }; + } todo!() } diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index ffec818..2c2efec 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -2,6 +2,7 @@ mod test { use crate::db::version_edit; use crate::db::version_edit::{Tag, VersionEdit}; + use crate::util::slice::Slice; #[test] fn test_Tag() { @@ -11,17 +12,27 @@ mod test { let tag1 = Tag::k_comparator; let v = tag1.get_value(); assert_eq!(v, 1); - - } #[test] - fn test_Version_edit() { + fn test_version_edit_encode_to() { let mut target: Vec = vec![]; let version_edit = VersionEdit::new_with_log_number(6); version_edit.encode_to(&mut target); println!("target: {}.", &target.len()); - assert_eq!(target.len(), 2); + // todo + // assert_eq!(target.len(), 2); + } + + #[test] + fn test_version_edit_decode_from() { + let source = Slice::from("a"); + + let mut version_edit = VersionEdit::new(); + let status = version_edit.decode_from(&source); + println!("status: {}.", status.get_error()); + // todo + // assert_eq!(target.len(), 2); } } \ No newline at end of file diff --git a/src/traits/coding_trait.rs b/src/traits/coding_trait.rs index ff9362d..fd49882 100644 --- a/src/traits/coding_trait.rs +++ b/src/traits/coding_trait.rs @@ -94,7 +94,7 @@ pub trait CodingTrait { /// ``` /// /// ``` - fn get_varint32(input: &mut Slice) -> u32; + fn get_varint32(input: &Slice) -> u32; /// 从slice的开头解码一个64位的变长整数, 并将slice的索引置于解码后的位置 /// /// # Arguments @@ -108,7 +108,7 @@ pub trait CodingTrait { /// ``` /// /// ``` - fn get_varint64(input: &mut Slice) -> u64; + fn get_varint64(input: &Slice) -> u64; /// 从slice数据中读取长度 返回长度的Slice /// /// # Arguments diff --git a/src/util/coding.rs b/src/util/coding.rs index a2617c1..421ea97 100644 --- a/src/util/coding.rs +++ b/src/util/coding.rs @@ -90,7 +90,7 @@ impl CodingTrait for Coding { offset } - fn get_varint32(input: &mut Slice) -> u32 { + fn get_varint32(input: &Slice) -> u32 { let cow = input.borrow_data(); let bytes = cow.as_bytes(); let mut result = 0_u32; @@ -110,7 +110,7 @@ impl CodingTrait for Coding { result } - fn get_varint64(input: &mut Slice) -> u64 { + fn get_varint64(input: &Slice) -> u64 { let cow = input.borrow_data(); let bytes = cow.as_bytes(); let mut result = 0_u64; diff --git a/src/util/slice.rs b/src/util/slice.rs index f172c83..1bfc316 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -112,7 +112,7 @@ impl Slice { impl<'a> Slice { /// 借取 Slice 中的数据, 调用方只拥有读权限 - pub fn borrow_data(&mut self) -> Cow<'a, String> { + pub fn borrow_data(&self) -> Cow<'a, String> { unsafe { // String & Vec has the same layout let s: &String = mem::transmute(&self.data); diff --git a/src/util/slice_test.rs b/src/util/slice_test.rs index 9baf1d1..c56f7a4 100644 --- a/src/util/slice_test.rs +++ b/src/util/slice_test.rs @@ -40,7 +40,7 @@ mod test { #[test] fn test_borrow_data() { - let mut a0 = Slice::from("123"); + let a0 = Slice::from("123"); let borrowed = a0.borrow_data(); assert_eq!(3, borrowed.len()); let owned = borrowed.to_owned(); -- Gitee From 99dd80d8bb06c2f907388eca8a66a69638d3dbf0 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Wed, 22 Mar 2023 09:49:44 +0800 Subject: [PATCH 28/50] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=20UnsafeSlice?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/db_format.rs | 13 +++- src/db/mem_table.rs | 15 +++-- src/db/mod.rs | 2 +- src/db/skip_list.rs | 80 ++++++++++++------------- src/db/skip_list_test.rs | 11 ++-- src/db/version_set.rs | 8 +-- src/traits/comparator_trait.rs | 5 +- src/traits/iterator.rs | 5 +- src/util/comparator.rs | 44 ++++++++++---- src/util/mod.rs | 1 + src/util/options.rs | 3 +- src/util/slice.rs | 7 +++ src/util/unsafe_slice.rs | 105 +++++++++++++++++++++++++++++++++ 13 files changed, 222 insertions(+), 77 deletions(-) create mode 100644 src/util/unsafe_slice.rs diff --git a/src/db/db_format.rs b/src/db/db_format.rs index 49e7194..b36a3c1 100644 --- a/src/db/db_format.rs +++ b/src/db/db_format.rs @@ -1,5 +1,6 @@ use std::cmp::Ordering; use std::io::Write; +use std::sync::Arc; use crate::db::db_format::ValueType::{KTypeDeletion, KTypeValue}; use crate::db::file_meta_data::FileMetaData; use crate::traits::coding_trait::CodingTrait; @@ -28,7 +29,7 @@ pub struct InternalKey { /// InternalKeyComparator pub struct InternalKeyComparator { - user_comparator_: dyn Comparator + user_comparator_: Arc } /// 查找键 @@ -201,8 +202,14 @@ impl InternalKey { } } +impl Default for InternalKeyComparator { + fn default() -> Self { + todo!() + } +} + impl InternalKeyComparator { - pub fn create(c: Box) -> Box { + pub fn create(_cmp: Box) -> Box { todo!() } @@ -223,7 +230,7 @@ impl Comparator for InternalKeyComparator { // todo!() // } - fn compare(&self, _a: &Slice, _b: &Slice) -> Option { + fn compare(&self, _a: &[u8], _b: &[u8]) -> Option { todo!() } diff --git a/src/db/mem_table.rs b/src/db/mem_table.rs index aff4c60..d2746f4 100644 --- a/src/db/mem_table.rs +++ b/src/db/mem_table.rs @@ -9,6 +9,7 @@ use crate::util::arena::ArenaRef; use crate::util::slice::Slice; use crate::util::{Arena, Result}; use crate::util::coding::Coding; +use crate::util::unsafe_slice::UnsafeSlice; /// 内存表 pub struct MemTable { @@ -65,9 +66,11 @@ impl MemTable { } /// 像内存表中写入或删除一个元素 - pub fn add(&mut self, seq_no: usize, v_type: ValueType, key: &Slice, value: Slice) -> Result<()> { - let key_size = key.size(); - let value_size = value.size(); + pub fn add>(&mut self, seq_no: usize, v_type: ValueType, key: &R, value: &R) -> Result<()> { + let key_buf = key.as_ref(); + let value_buf = value.as_ref(); + let key_size = key_buf.len(); + let value_size = value_buf.len(); let internal_key_size = key_size + 8; let encoded_len = Coding::varint_length(key_size) + internal_key_size @@ -79,13 +82,13 @@ impl MemTable { // write key size offset = Coding::encode_varint32(internal_key_size as u32, buf, offset); // write key slice - offset += (&mut buf[offset..]).write(key.as_ref())?; + offset += (&mut buf[offset..]).write(key_buf)?; // write seq_no and type offset = Coding::encode_fixed64((seq_no << 8 | v_type.get_value()) as u64, buf, offset); // write value slice - (&mut buf[offset..]).write(value.as_ref())?; + (&mut buf[offset..]).write(value_buf)?; let slice = Slice::from_buf(buf); - self.list.insert(slice) + self.list.insert(UnsafeSlice::new_with_arena(buf, self.arena.clone())?) } /// 通过 key 查找结果 diff --git a/src/db/mod.rs b/src/db/mod.rs index b82f563..24a37d4 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -23,4 +23,4 @@ mod version_edit_test; /// 默认调表 pub type DefaultSkipList = SkipList; /// 默认内存表 -pub type DefaultMemTable = MemTable; \ No newline at end of file +pub type DefaultMemTable = MemTable; diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 367dd1d..a043adc 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -12,16 +12,16 @@ use crate::traits::DataIterator; use crate::util::arena::ArenaRef; use crate::util::{Arena, Result}; use crate::util::slice::Slice; +use crate::util::unsafe_slice::UnsafeSlice; use crate::util::status::{LevelError, Status}; type RawNode = *mut Node; const MAX_LEVEL: usize = 8; -// todo struct Node { /// 存储的值, 如果为空,则是头指针或者尾指针 - key: Option, + key: Option, /// 数组元素首地址,代表一个数组,指向每层的下一个节点。 next_elems: *mut RawNode, /// 当前节点高度 @@ -43,16 +43,11 @@ pub struct SkipList { arena: ArenaRef, } -struct DataIter { +pub struct Iter { head: RawNode, tail: RawNode, current: RawNode, - cmp: Cmp, -} - -pub struct Iter<'a, Cmp: Comparator> { - list: &'a SkipList, - node: RawNode, + cmp: Arc, } impl SkipList { @@ -67,7 +62,7 @@ impl SkipList { } } - pub fn insert(&mut self, key: Slice) -> Result<()> { + pub fn insert(&mut self, key: UnsafeSlice) -> Result<()> { // TODO 这里是否可以优化 if self.contains(&key) { return Ok(()); @@ -82,9 +77,9 @@ impl SkipList { } #[inline] - fn insert_ele0(&mut self, key: Slice) -> Result<()> { + fn insert_ele0(&mut self, key: UnsafeSlice) -> Result<()> { let level = rand_level(); - debug!("insert {}, level: {}", &key, level); + debug!("insert {}, level: {}", String::from_utf8_lossy(key.as_ref()), level); let node = Node::create(key, level, self.arena.clone()); // head bind node // TODO, use macro to expand for-loop @@ -101,7 +96,7 @@ impl SkipList { return Ok(()); } - unsafe fn insert_elen(&mut self, key: Slice) -> Result<()> { + unsafe fn insert_elen(&mut self, key: UnsafeSlice) -> Result<()> { let mut current = self.head; let node_height = rand_level(); let node_top_level = node_height - 1; @@ -119,13 +114,13 @@ impl SkipList { (&mut *current).set_node(l, node_ptr); node.set_node(l, self.tail); debug!("bind: {} before: {}, after: , at level: {}", - node.key.as_ref().unwrap(), - (&*current).key.as_ref().unwrap(), + node.key.unwrap(), + (&*current).key.unwrap(), l); } break 'inner_loop; } else { - match self.cmp.compare(node.key.as_ref().unwrap(), ele.key.as_ref().unwrap()) { + match self.cmp.compare(node.key.unwrap().as_ref(), ele.key.unwrap().as_ref()) { Some(Ordering::Less) => { // node higher than current level at ele if node_top_level >= l { @@ -133,14 +128,14 @@ impl SkipList { node.set_node(l, ele_ptr); if (&*current).is_head() { debug!("bind: {} before: , after: {}, at level: {}", - node.key.as_ref().unwrap(), - ele.key.as_ref().unwrap(), + node.key.unwrap(), + ele.key.unwrap(), l); } else { debug!("bind: {} before: {}, after: {}, at level: {}", - node.key.as_ref().unwrap(), - (&*current).key.as_ref().unwrap(), - ele.key.as_ref().unwrap(), + node.key.unwrap(), + (&*current).key.unwrap(), + ele.key.unwrap(), l); } } @@ -172,8 +167,9 @@ impl SkipList { Ok(()) } - pub fn contains(&self, key: &Slice) -> bool { - debug!("================== begin contains, key: {} ==================", key); + pub fn contains>(&self, key: &R) -> bool { + let key_buf = key.as_ref(); + debug!("================== begin contains, key: {} ==================", String::from_utf8_lossy(key_buf)); if self.num == 0 { return false; } @@ -194,9 +190,9 @@ impl SkipList { } } { - debug!("node: {} at level: {}", ele.key.as_ref().unwrap(), level) + debug!("node: {} at level: {}", ele.key.unwrap(), level) } - match self.cmp.compare(key, ele.key.as_ref().unwrap()) { + match self.cmp.compare(key_buf, ele.key.unwrap().as_ref()) { None => return false, Some(Ordering::Equal) => return true, Some(Ordering::Less) => { @@ -261,7 +257,7 @@ impl ToString for SkipList { // calculate each item width let mut widths = Vec::with_capacity(tree.len()); self.iter().for_each(|s| { - widths.push(s.size()); + widths.push(s.len()); }); // print value list if self.num > 0 { @@ -270,7 +266,7 @@ impl ToString for SkipList { tree.push_str("[head]"); while !node.is_head_or_tail() { tree.push_str(" -> "); - tree.push_str(node.key.as_ref().unwrap().as_str()); + tree.push_str(node.key.unwrap().as_str()); let level_str = format!("({})", node.level); tree.push_str(level_str.as_str()); node = &*node.get_node(0); @@ -285,7 +281,7 @@ impl ToString for SkipList { impl Node { #[inline] - fn create(src: Slice, level: usize, arena: ArenaRef) -> RawNode { + fn create(src: UnsafeSlice, level: usize, arena: ArenaRef) -> RawNode { let node = box Self { key: Some(src), next_elems: allocate_next_elems(arena), @@ -386,32 +382,34 @@ fn min_max(a: usize, b: usize) -> (usize, usize) { } // 'b lifetime is bigger than 'a -impl<'a, Cmp: Comparator> Iter<'a, Cmp> { - fn create(list: &'a SkipList) -> Self { +impl Iter { + fn create(list: &SkipList) -> Self { Self { - list, - node: list.head, + head: list.head, + tail: list.tail, + current: list.head, + cmp: list.cmp.clone(), } } } -impl<'a, Cmp: Comparator> Iterator for Iter<'a, Cmp> { - type Item = &'a Slice; +impl Iterator for Iter { + type Item = UnsafeSlice; #[inline] fn next(&mut self) -> Option { unsafe { - if (&*self.node).is_tail() { + if (&*self.current).is_tail() { return None; } else { - self.node = (&*self.node).get_node(0); + self.current = (&*self.current).get_node(0); } - (&*self.node).key.as_ref() + (&*self.current).key } } } -impl DataIterator for DataIter { +impl DataIterator for Iter { #[inline] fn valid(&self) -> bool { @@ -447,14 +445,14 @@ impl DataIterator for DataIter { todo!() } - fn key(&self) -> &Slice { + fn key(&self) -> UnsafeSlice { let mem_key = unsafe { - (&*self.current).key.as_ref().unwrap() + (&*self.current).key.unwrap() }; mem_key } - fn value(&self) -> &Slice { + fn value(&self) -> UnsafeSlice { todo!() } } \ No newline at end of file diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs index 7ac6f6f..11ef4bd 100644 --- a/src/db/skip_list_test.rs +++ b/src/db/skip_list_test.rs @@ -8,15 +8,16 @@ mod test { use crate::util::comparator::BytewiseComparatorImpl; use crate::util::Result; use crate::util::slice::Slice; + use crate::util::unsafe_slice::TryIntoUnsafeSlice; #[test] fn test_add() -> Result<()> { let cmp = Arc::new(BytewiseComparatorImpl::default()); let arena = Arc::new(Mutex::new(Arena::default())); - let mut list = DefaultSkipList::create(cmp, arena); + let mut list = DefaultSkipList::create(cmp, arena.clone()); let len = 10; for i in 0..len { - list.insert(format!("key_{}", i).into()).expect("insert ok"); + list.insert(format!("key_{}", i).try_into_unsafe_slice(arena.clone())?).expect("insert ok"); } assert_eq!(10, list.len(), "expect 10, but actually is: {}", list.len()); debug!("{}", list.to_string()); @@ -35,7 +36,7 @@ mod test { fn test_rnd_add() -> Result<()> { let cmp = Arc::new(BytewiseComparatorImpl::default()); let arena = Arc::new(Mutex::new(Arena::default())); - let mut list = DefaultSkipList::create(cmp, arena); + let mut list = DefaultSkipList::create(cmp, arena.clone()); let len = 10; let mut rnd = rand::thread_rng(); let mut set = HashSet::new(); @@ -43,12 +44,12 @@ mod test { let j = rnd.gen_range(0..len); let key = format!("key_{}", j); set.insert(key.clone()); - list.insert(key.into())?; + list.insert(key.try_into_unsafe_slice(arena.clone())?)?; debug!("skiplist: {}", list.to_string()); } assert_eq!(set.len(), list.len(), "list length must eq: {}", list.len()); set.iter().for_each(|key| { - let c = list.contains(&key.clone().into()); + let c = list.contains(&key); assert!(c, "must contains key: {}", key) }); diff --git a/src/db/version_set.rs b/src/db/version_set.rs index 15dabb0..831183d 100644 --- a/src/db/version_set.rs +++ b/src/db/version_set.rs @@ -42,7 +42,7 @@ pub struct VersionSet { dbname_: Slice, options_: Options, table_cache_: TableCache, - icmp_: Box, + icmp_: InternalKeyComparator, next_file_number_: u64, manifest_file_number_: u64, last_sequence_: u64, @@ -101,7 +101,7 @@ struct GetStats { // ,cc line 163 struct LevelFileNumIterator { - icmp_: Rc, + icmp_: InternalKeyComparator, flist_: Vec, index_: u32, @@ -329,7 +329,7 @@ impl VersionSet { /// ``` /// /// ``` - fn find_file(icmp: &InternalKeyComparator, files:&Vec, key:&Slice) -> u32 { + fn find_file(icmp: InternalKeyComparator, files:&Vec, key:&Slice) -> u32 { todo!() } @@ -359,7 +359,7 @@ impl VersionSet { /// ``` /// /// ``` - fn some_file_overlaps_range(icmp: &InternalKeyComparator, disjoint_sorted_files:bool, + fn some_file_overlaps_range(icmp: InternalKeyComparator, disjoint_sorted_files:bool, files:&Vec, smallest_user_key:&Slice,largest_user_key:&Slice) -> bool { todo!() } diff --git a/src/traits/comparator_trait.rs b/src/traits/comparator_trait.rs index a7a74c1..477eb58 100644 --- a/src/traits/comparator_trait.rs +++ b/src/traits/comparator_trait.rs @@ -17,16 +17,17 @@ pub trait Comparator { /// /// ``` /// use std::cmp::Ordering; + /// use crate::util::slice::Slice; /// /// let comp = BytewiseComparatorImpl::default(); - /// optionVal = comp.compare(&Slice::from("a"), &Slice::from("ab")); + /// optionVal = comp.compare("a", "ab"); /// assert_eq!(optionVal.unwrap(), Ordering::Less); /// /// let comp = BytewiseComparatorImpl::default(); /// let optionVal = comp.compare(&Slice::from("b"), &Slice::from("abcd")); /// assert_eq!(optionVal.unwrap(), Ordering::Greater); /// ``` - fn compare(&self, a: &Slice, b: &Slice) -> Option; + fn compare(&self, a: &[u8], b: &[u8]) -> Option; /// 返回comparator的名字 fn get_name(&self) -> String; diff --git a/src/traits/iterator.rs b/src/traits/iterator.rs index 7717574..164fcc5 100644 --- a/src/traits/iterator.rs +++ b/src/traits/iterator.rs @@ -1,4 +1,5 @@ use crate::util::slice::Slice; +use crate::util::unsafe_slice::UnsafeSlice; pub trait DataIterator { /// 检查当前位置是否有效 @@ -92,7 +93,7 @@ pub trait DataIterator { /// ``` /// /// ``` - fn key(&self) -> &Slice; + fn key(&self) -> UnsafeSlice; /// 获取value值 /// /// # Arguments @@ -105,6 +106,6 @@ pub trait DataIterator { /// ``` /// /// ``` - fn value(&self) -> &Slice; + fn value(&self) -> UnsafeSlice; } diff --git a/src/util/comparator.rs b/src/util/comparator.rs index e7fb05d..661a930 100644 --- a/src/util/comparator.rs +++ b/src/util/comparator.rs @@ -1,8 +1,9 @@ - use std::cmp::{min, Ordering}; -use crate::traits::comparator_trait::{Comparator}; + +use crate::traits::comparator_trait::Comparator; use crate::util::slice::Slice; +#[derive(Copy, Clone)] pub struct BytewiseComparatorImpl {} /// @@ -11,14 +12,33 @@ pub struct BytewiseComparatorImpl {} /// 也就是说 i>helloworld,因为先比较i和h,i>h,比较直接结束 impl Default for BytewiseComparatorImpl { fn default() -> Self { - Self{} + Self {} } } +#[allow(improper_ctypes)] +extern { + fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; +} + impl Comparator for BytewiseComparatorImpl { - fn compare(&self, a: &Slice, b: &Slice) -> Option { - a.partial_cmp(b) + fn compare(&self, a: &[u8], b: &[u8]) -> Option { + let min = a.len().min(b.len()); + let cmp = unsafe { + memcmp( + a.as_ptr() as *const i8, + b.as_ptr() as *const i8, + min, + ) + }; + if cmp == 0 { + a.len().partial_cmp(&b.len()) + } else if cmp > 0 { + Some(Ordering::Greater) + } else { + Some(Ordering::Less) + } } fn get_name(&self) -> String { @@ -30,7 +50,7 @@ impl Comparator for BytewiseComparatorImpl { let min_length: usize = min(start.len(), limit.len()); let mut diff_index: usize = 0; - let mut start_char_vec: Vec = start.as_bytes().to_vec(); + let mut start_char_vec: Vec = start.as_bytes().to_vec(); let limit_char_vec: &Vec = &limit.to_vec(); // or use // let start_char_vec: Vec = start.chars().collect::>(); @@ -47,7 +67,7 @@ impl Comparator for BytewiseComparatorImpl { // 如果一个字符串是另个一字符串的前缀,无需做截短操作,否则进入 else。 if diff_index >= min_length { // 说明 start是limit的前缀,或者反之,此时不作修改,直接返回 - } else{ + } else { // 尝试执行字符start[diff_index]++, 设置start长度为diff_index+1,并返回 // ++条件:字符 < oxff 并且字符+1 < limit上该index的字符 let diff_byte: u8 = start_char_vec[diff_index]; @@ -61,8 +81,8 @@ impl Comparator for BytewiseComparatorImpl { } } - let shortest_separator: &[u8] = &start_char_vec[0..diff_index+1]; - let shortest_separator_val: String= Slice::from_buf(shortest_separator).into(); + let shortest_separator: &[u8] = &start_char_vec[0..diff_index + 1]; + let shortest_separator_val: String = Slice::from_buf(shortest_separator).into(); shortest_separator_val } @@ -71,15 +91,15 @@ impl Comparator for BytewiseComparatorImpl { // 如果找不到说明 key的字符都是 u8::MAX,直接返回 let key_len = key.len(); - let mut key_char_vec: Vec = key.as_bytes().to_vec(); + let mut key_char_vec: Vec = key.as_bytes().to_vec(); for i in 0..key_len { let byte_val: u8 = key_char_vec[i]; if byte_val != u8::MAX { key_char_vec[i] = byte_val + 1; - let short_successor: &[u8] = &key_char_vec[0..i+1]; + let short_successor: &[u8] = &key_char_vec[0..i + 1]; - let short_successor_val: String= Slice::from_buf(short_successor).into(); + let short_successor_val: String = Slice::from_buf(short_successor).into(); return short_successor_val; } } diff --git a/src/util/mod.rs b/src/util/mod.rs index 5a8fc32..ade5ddc 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -40,6 +40,7 @@ pub mod options; pub mod debug; pub mod linked_list; mod linked_list_test; +pub mod unsafe_slice; /// 定义别名 pub type Result = result::Result; diff --git a/src/util/options.rs b/src/util/options.rs index a63c9f6..9028980 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -1,4 +1,5 @@ use crate::db::db::Snapshot; +use crate::db::db_format::InternalKeyComparator; use crate::traits::comparator_trait::Comparator; use crate::util::comparator::BytewiseComparatorImpl; @@ -132,7 +133,7 @@ pub struct WriteOptions { impl Default for Options { fn default() -> Self { Self { - cmp: Box::new(BytewiseComparatorImpl::default()), + cmp: Box::new(InternalKeyComparator::default()), create_if_missing: false, error_if_exists: false, paranoid_checks: false, diff --git a/src/util/slice.rs b/src/util/slice.rs index f172c83..dde1318 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -154,6 +154,13 @@ impl > From for Slice { } } +impl AsRef<[u8]> for Slice { + #[inline] + fn as_ref(&self) -> &[u8] { + self.data.as_slice() + } +} + impl PartialEq for Slice { /// 判断两个 Slice 是否相同 #[inline] diff --git a/src/util/unsafe_slice.rs b/src/util/unsafe_slice.rs new file mode 100644 index 0000000..5705b41 --- /dev/null +++ b/src/util/unsafe_slice.rs @@ -0,0 +1,105 @@ +use std::alloc::{alloc, Layout}; +use std::fmt::{Display, Formatter}; +use std::io::Write; +use std::mem::ManuallyDrop; + +use crate::util::arena::ArenaRef; +use crate::util::Result; +use crate::util::slice::Slice; + +/// 提供一种将其它结构体转为 UnsafeSlice 的特质 +pub trait TryIntoUnsafeSlice { + /// 尝试将结构体通过 arena 内存分配器,构造出一个新的 UnsafeSlice + fn try_into_unsafe_slice(&self, arena: ArenaRef) -> Result; +} + +/// 内存不安全的 Slice, 内存由 Arena 分配和管理。 +/// 实现了 Copy 语义,有更高效的读 api +#[derive(Copy, Clone)] +pub struct UnsafeSlice { + ptr: *mut u8, + len: usize, +} + +impl UnsafeSlice { + + /// 利用 arena 生成 UnsafeSlice + pub fn new_with_arena>(data: B, arena: ArenaRef) -> Result { + let mut lock = arena.lock()?; + let src = data.as_ref(); + let mut buf = lock.allocate(src.len()); + buf.write(src)?; + Ok(Self { + ptr: buf.as_mut_ptr(), + len: buf.len(), + }) + } + + #[inline] + pub fn len(&self) -> usize { + self.len + } + + #[inline] + pub fn as_str(&self) -> &str { + unsafe { + core::str::from_utf8_unchecked(self.as_ref()) + } + } +} + +impl UnsafeSlice { + + /// 返回子串。这个方法是高效的,在内部只复制了裸指针偏的移量。 + pub unsafe fn sub_slice(&self, start: usize, len: usize) -> Self { + assert!(start + len < self.len, "sub_slice out of range"); + Self { + ptr: self.ptr.offset(start as isize), + len, + } + } + + /// 生成 Slice 串,由于 Slice 是内存安全的,所以实现上会有内存拷贝。 + /// 高性能场景优先考虑 UnsafeSlice + pub fn to_slice(&self) -> Slice { + unsafe { + let raw_ptr = alloc(Layout::from_size_align_unchecked(self.len, 8)); + Slice::from_raw_parts(raw_ptr, self.len) + } + } +} + +impl AsRef<[u8]> for UnsafeSlice { + + #[inline] + fn as_ref(&self) -> &[u8] { + unsafe { + core::slice::from_raw_parts(self.ptr, self.len) + } + } +} + +impl Display for UnsafeSlice { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + unsafe { + let string = ManuallyDrop::new( + String::from_raw_parts(self.ptr, self.len, self.len) + ); + f.write_str(string.as_str()) + } + } +} + +impl TryIntoUnsafeSlice for &str { + #[inline] + fn try_into_unsafe_slice(&self, arena: ArenaRef) -> Result { + UnsafeSlice::new_with_arena(self.as_bytes(), arena) + } +} + +impl TryIntoUnsafeSlice for String { + #[inline] + fn try_into_unsafe_slice(&self, arena: ArenaRef) -> Result { + UnsafeSlice::new_with_arena(self.as_bytes(), arena) + } +} \ No newline at end of file -- Gitee From 1dca5ce78ac89f9b59eff360a4005e2a8080fdc0 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Wed, 22 Mar 2023 17:56:05 +0800 Subject: [PATCH 29/50] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=B7=A5=E4=BD=9C?= =?UTF-8?q?=E8=BF=9B=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 50 ++++++++++++++++++++-------------------- src/db/mem_table.rs | 2 +- src/util/unsafe_slice.rs | 2 ++ 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 362db95..97f5fa6 100644 --- a/README.md +++ b/README.md @@ -71,31 +71,31 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release ### 1.1.0 版本, 完成基础零部件 -| 功能模块 | 完成人 | 进度 | -|----------------------------------------------------------------------------------|----------------------|-----| -| util.Options(ReadOptions, WriteOptions) | kazeseiriou | | -| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | | -| util.Logger | peach | | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy | | -| FilterBlock, FilterBlockReader | colagy | | -| table.format(Footer, BlockHandle) | 半支烟 | 20% | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | -| db.SkipList | wangboo | 80% | -| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | | -| IteratorWrapper | kazeseiriou | | -| db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | -| SSTable | fengyang | 0% | -| table.Table | peach,tzcyujunyong | | -| db.leveldb_util | wangboo | | -| db.log_format | wangboo | | -| db.LogReader | wangboo | 90% | -| db.LogWriter | wangboo | 90% | -| db.TableCache | colagy | 10% | -| LinkedList | fengyang | 60% | -| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 10% | -| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 10% | -| WriteBatch | tzcyujunyong | | -| | 半支烟 | | +| 功能模块 | 完成人 | 进度 | +|----------------------------------------------------------------------------------|----------------------|------| +| util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | +| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | +| util.Logger/Log日志库 | peach | 50% | +| table.Block, BlockBuilder, FilterBlockBuilder | colagy | 0% | +| FilterBlock, FilterBlockReader | colagy | 0% | +| table.format(Footer, BlockHandle) | 半支烟 | 20% | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | +| db.SkipList | wangboo | 100% | +| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | 0% | +| IteratorWrapper | kazeseiriou | 0% | +| db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | +| SSTable | fengyang | 0% | +| table.Table | peach,tzcyujunyong | | +| db.leveldb_util | wangboo | 0% | +| db.log_format | wangboo | 90% | +| db.LogReader | wangboo | 90% | +| db.LogWriter | wangboo | 90% | +| db.TableCache | colagy | 10% | +| LinkedList | fengyang | 60% | +| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | +| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | +| WriteBatch | tzcyujunyong,wangboo | 50% | +| | 半支烟 | 40% | diff --git a/src/db/mem_table.rs b/src/db/mem_table.rs index d2746f4..3a521fb 100644 --- a/src/db/mem_table.rs +++ b/src/db/mem_table.rs @@ -87,7 +87,7 @@ impl MemTable { offset = Coding::encode_fixed64((seq_no << 8 | v_type.get_value()) as u64, buf, offset); // write value slice (&mut buf[offset..]).write(value_buf)?; - let slice = Slice::from_buf(buf); + // let slice = Slice::from_buf(buf); self.list.insert(UnsafeSlice::new_with_arena(buf, self.arena.clone())?) } diff --git a/src/util/unsafe_slice.rs b/src/util/unsafe_slice.rs index 5705b41..c48d2bb 100644 --- a/src/util/unsafe_slice.rs +++ b/src/util/unsafe_slice.rs @@ -35,6 +35,8 @@ impl UnsafeSlice { }) } + + #[inline] pub fn len(&self) -> usize { self.len -- Gitee From 7921bfa5022ba134df206b6ba94580c1818caed6 Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 23 Mar 2023 10:15:24 +0800 Subject: [PATCH 30/50] Status get_msg bugfix --- src/db/version_edit_test.rs | 17 +++++++++++++---- src/util/status.rs | 7 +++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 2c2efec..54f7836 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -26,13 +26,22 @@ mod test { } #[test] - fn test_version_edit_decode_from() { + fn test_version_edit_decode_from_default() { let source = Slice::from("a"); let mut version_edit = VersionEdit::new(); let status = version_edit.decode_from(&source); - println!("status: {}.", status.get_error()); - // todo - // assert_eq!(target.len(), 2); + assert!(&status.is_corruption()); + assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); + } + + #[test] + fn test_version_edit_decode_from() { + let source = Slice::from("a"); + + let mut version_edit = VersionEdit::new_with_log_number(6); + let status = version_edit.decode_from(&source); + assert!(&status.is_corruption()); + assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); } } \ No newline at end of file diff --git a/src/util/status.rs b/src/util/status.rs index eedb41f..97b33d6 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -103,10 +103,13 @@ impl Status { self.err.is_invalid_argument() } - pub fn get_error_string(&self) -> String { - self.err.to_string() + pub fn get_msg(&self) -> String { + let msg = &self.msg; + + String::from(msg.as_str()) } + /// 得到 LevelError /// 请注意, err 的所有权会发生转移!!! pub fn get_error(self) -> LevelError { self.err -- Gitee From 1f568d13b7a330f282efb4aad6423ababa108aad Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Wed, 29 Mar 2023 18:22:37 +0800 Subject: [PATCH 31/50] =?UTF-8?q?=E5=A2=9E=E5=8A=A0benchmark?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 18 +++++++++ benches/crc_bench.rs | 24 ++++++++++++ benches/skiplist_bench.rs | 64 +++++++++++++++++++++++++++++++ benches/skiplist_memory_useage.rs | 55 ++++++++++++++++++++++++++ src/lib.rs | 4 +- 5 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 benches/crc_bench.rs create mode 100644 benches/skiplist_bench.rs create mode 100644 benches/skiplist_memory_useage.rs diff --git a/Cargo.toml b/Cargo.toml index 129478b..b60d1d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,9 +12,27 @@ path = "src/lib.rs" [dependencies] rand = "0.8.5" tokio = "1.24.1" +jemallocator = "0.5" +jemalloc-sys = {version = "0.5", features = ["stats"]} + +[dev-dependencies] +criterion = "0.3.0" +crc32fast = "1.3.2" +skiplist = "0.4.0" [profile.dev] [profile.release] +[[bench]] +name = "crc_bench" +harness = false + +[[bench]] +name = "skiplist_bench" +harness = false + +[[bench]] +name = "skiplist_memory_useage" +harness = false \ No newline at end of file diff --git a/benches/crc_bench.rs b/benches/crc_bench.rs new file mode 100644 index 0000000..e1bdc08 --- /dev/null +++ b/benches/crc_bench.rs @@ -0,0 +1,24 @@ +use criterion::{Criterion, criterion_group, criterion_main}; +use rand::RngCore; +use level_db_rust::util::crc::CRC; + +pub const SRC_DATA: [u8; 512] = [0; 512]; + +pub fn default_crc_bench(c: &mut Criterion) { + let mut rnd = rand::thread_rng(); + c.bench_function("default_crc", |b| { + b.iter(|| { + rnd.fill_bytes(&mut SRC_DATA); + CRC::value(&SRC_DATA); + }); + }); + c.bench_function("crc32fast", |b| { + b.iter(|| { + rnd.fill_bytes(&mut SRC_DATA); + crc32fast::hash(&SRC_DATA); + }); + }); +} + +criterion_group!(benches, default_crc_bench); +criterion_main!(benches); \ No newline at end of file diff --git a/benches/skiplist_bench.rs b/benches/skiplist_bench.rs new file mode 100644 index 0000000..bc95bd7 --- /dev/null +++ b/benches/skiplist_bench.rs @@ -0,0 +1,64 @@ +use std::sync::{Arc, Mutex}; + +use criterion::{Criterion, criterion_group, criterion_main}; +use rand::Rng; +use skiplist::OrderedSkipList; + +use level_db_rust::db::skip_list::SkipList; +use level_db_rust::util::Arena; +use level_db_rust::util::arena::ArenaRef; +use level_db_rust::util::comparator::BytewiseComparatorImpl; +use level_db_rust::util::unsafe_slice::TryIntoUnsafeSlice; + + +const BENCH_TIMES: usize = 128; + +pub fn skiplist_bench(c: &mut Criterion) { + // 生成测试样本,保证两次测试都是相同的次数 + let mut rnd = rand::thread_rng(); + let mut every_bench_times = [0; BENCH_TIMES]; + for i in 0..BENCH_TIMES { + every_bench_times[i] = rnd.gen_range(32..20480); + } + + c.bench_function("default_skiplist", |b| { + let mut i = 0; + b.iter(|| { + let cmp = Arc::new(BytewiseComparatorImpl::default()); + let arena = Arc::new(Mutex::new(Arena::default())); + let list = SkipList::create(cmp, arena.clone()); + bench_default_skiplist(list, arena, every_bench_times[i % BENCH_TIMES]); + i += 1; + }); + }); + + c.bench_function("skiplist-0.4.0", |b| { + let mut i = 0; + b.iter(|| { + let list: OrderedSkipList = unsafe { + OrderedSkipList::with_comp(|a: &String, b: &String| { + a.cmp(b) + }) + }; + bench_skiplist_v_0_4_0(list, every_bench_times[i % BENCH_TIMES]); + i += 1; + }); + }); +} + +fn bench_default_skiplist(mut list: SkipList, arena: ArenaRef, record_count: usize) { + for j in 0..record_count { + let value = format!("key_{}", j); + list.insert(value.try_into_unsafe_slice(arena.clone()).unwrap()).unwrap(); + } +} + +fn bench_skiplist_v_0_4_0(mut list: OrderedSkipList, record_count: usize) { + for j in 0..record_count { + let value = format!("key_{}", j); + list.insert(value.clone()); + } +} + +criterion_group!(benches, skiplist_bench); +criterion_main!(benches); \ No newline at end of file diff --git a/benches/skiplist_memory_useage.rs b/benches/skiplist_memory_useage.rs new file mode 100644 index 0000000..deaee47 --- /dev/null +++ b/benches/skiplist_memory_useage.rs @@ -0,0 +1,55 @@ +use std::ffi::{c_char, c_void}; +use std::ptr::{null, null_mut}; +use std::sync::{Arc, Mutex}; +use skiplist::OrderedSkipList; +use level_db_rust::db::skip_list::SkipList; +use level_db_rust::util::Arena; +use level_db_rust::util::arena::ArenaRef; +use level_db_rust::util::comparator::BytewiseComparatorImpl; +use level_db_rust::util::unsafe_slice::TryIntoUnsafeSlice; + +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + +extern "C" fn write_cb(_: *mut c_void, message: *const c_char) { + print!("{}", String::from_utf8_lossy(unsafe { + std::ffi::CStr::from_ptr(message as *const i8).to_bytes() + })); +} + +fn mem_print() { + unsafe { jemalloc_sys::malloc_stats_print(Some(write_cb), null_mut(), null()) } +} + +fn bench_default_skiplist(mut list: SkipList, arena: ArenaRef, record_count: usize) { + for j in 0..record_count { + let value = format!("key_{}", j); + list.insert(value.try_into_unsafe_slice(arena.clone()).unwrap()).unwrap(); + } + println!("bench_default_skiplist: "); + mem_print(); +} + +fn bench_skiplist_v_0_4_0(mut list: OrderedSkipList, record_count: usize) { + for j in 0..record_count { + let value = format!("key_{}", j); + list.insert(value.clone()); + } + println!("bench_skiplist_v_0_4_0: "); + mem_print(); +} + +fn main() { + let record_count = 100 * 1024; + // let cmp = Arc::new(BytewiseComparatorImpl::default()); + // let arena = Arc::new(Mutex::new(Arena::default())); + // let list = SkipList::create(cmp, arena.clone()); + // bench_default_skiplist(list, arena, record_count); + + let list: OrderedSkipList = unsafe { + OrderedSkipList::with_comp(|a: &String, b: &String| { + a.cmp(b) + }) + }; + bench_skiplist_v_0_4_0(list, record_count); +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index b3fbd97..c8b17d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,9 +4,9 @@ extern crate core; -mod db; +pub mod db; mod table; -mod util; +pub mod util; mod traits; mod test { -- Gitee From 772d41240d2948113aef0d4ded3232e8d1373f3e Mon Sep 17 00:00:00 2001 From: fengyang Date: Wed, 29 Mar 2023 18:23:59 +0800 Subject: [PATCH 32/50] Status get_msg bugfix --- src/db/version_edit.rs | 44 ++++++++++++++++++++++++++----------- src/db/version_edit_test.rs | 7 ++++++ 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/db/version_edit.rs b/src/db/version_edit.rs index c779677..792d00a 100644 --- a/src/db/version_edit.rs +++ b/src/db/version_edit.rs @@ -276,6 +276,8 @@ impl VersionEdit { let version_edit = VersionEdit::new(); let msg : Option = Option::None; + + // todo Coding::get_varint32 存在问题。开发暂停 while msg.is_none() && Coding::get_varint32(source) != 0_u32 { let tag_value = Coding::get_varint32(source); let tag = Tag::from_value(tag_value); @@ -284,31 +286,47 @@ impl VersionEdit { return LevelError::corruption_string("VersionEdit", "unknown tag"); } - // match tag { - // Tag::k_comparator => 1, - // Tag::kLogNumber => 2, - // Tag::kNextFileNumber => 3, - // Tag::kLastSequence => 4, - // Tag::kCompactPointer => 5, - // Tag::kDeletedFile => 6, - // Tag::kNewFile => 7, - // Tag::kPrevLogNumber => 9, - // _ => 0 - // }; } todo!() } /// VersionEdit 输出调试信息 pub fn debug_string(&self) -> Slice { - todo!() + let debug_str = String::from("VersionEdit {"); + + let mut has_comparator_str = String::default(); + if(self.has_comparator_){ + has_comparator_str.push_str(format!("\n Comparator: {}", self.comparator_.as_str()).as_str()); + } + + let mut has_log_number__str = String::default(); + // if(self.has_log_number_){ + // todo + // // let append_log_number = logging.AppendNumberTo(&r, self.log_number_); + // let append_log_number = self.log_number_ + "".as_ref(); + // has_log_number__str.push_str(format!("\n LogNumber: {}", append_log_number).as_str()); + // } + + let rs = format!("{}{}{}", debug_str, has_log_number__str, "\n}\n"); + + Slice::from(rs) } } /// 静态方法 impl<'a> VersionEdit { - pub fn get_internal_key(inout: Slice) -> Result { + pub fn get_internal_key(input: Slice) -> Result { + let key= InternalKey::default(); + todo!() + + // Slice str; + // if (GetLengthPrefixedSlice(input, &str)) { + // dst->DecodeFrom(str); + // return true; + // } else { + // return false; + // } } /// 从 Slice 中解出 level 值 diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 54f7836..b5c06d8 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -44,4 +44,11 @@ mod test { assert!(&status.is_corruption()); assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); } + + #[test] + fn test_version_edit_debug_string() { + let mut version_edit = VersionEdit::new_with_log_number(6); + let debug_str = version_edit.debug_string(); + println!("debug_str: \n {}", debug_str); + } } \ No newline at end of file -- Gitee From a9a8464e1c40b5f431eef04390cbf2dd7315e045 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Wed, 29 Mar 2023 19:21:50 +0800 Subject: [PATCH 33/50] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=BF=9B=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 97f5fa6..54ba069 100644 --- a/README.md +++ b/README.md @@ -71,32 +71,32 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release ### 1.1.0 版本, 完成基础零部件 -| 功能模块 | 完成人 | 进度 | -|----------------------------------------------------------------------------------|----------------------|------| -| util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | -| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | -| util.Logger/Log日志库 | peach | 50% | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy | 0% | -| FilterBlock, FilterBlockReader | colagy | 0% | -| table.format(Footer, BlockHandle) | 半支烟 | 20% | -| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | -| db.SkipList | wangboo | 100% | -| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | 0% | -| IteratorWrapper | kazeseiriou | 0% | -| db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | -| SSTable | fengyang | 0% | -| table.Table | peach,tzcyujunyong | | -| db.leveldb_util | wangboo | 0% | -| db.log_format | wangboo | 90% | -| db.LogReader | wangboo | 90% | -| db.LogWriter | wangboo | 90% | -| db.TableCache | colagy | 10% | -| LinkedList | fengyang | 60% | -| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | -| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | -| WriteBatch | tzcyujunyong,wangboo | 50% | -| | 半支烟 | 40% | - +| 功能模块 | 完成人 | 进度 | +|------------------------------------------------------------------------------------|------------------------|--------| +| util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | +| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | +| util.Logger/Log日志库 | peach | 50% | +| table.Block, BlockBuilder, FilterBlockBuilder | colagy | 0% | +| FilterBlock, FilterBlockReader | colagy | 0% | +| table.format(Footer, BlockHandle) | 半支烟 | 20% | +| db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | +| db.SkipList | wangboo | 100% | +| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | 0% | +| IteratorWrapper | kazeseiriou | 0% | +| db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | +| SSTable | fengyang | 0% | +| table.Table | peach,tzcyujunyong | | +| db.leveldb_util | wangboo | 0% | +| db.log_format | wangboo | 90% | +| db.LogReader | wangboo | 90% | +| db.LogWriter | wangboo | 90% | +| db.TableCache | colagy | 10% | +| LinkedList | fengyang | 60% | +| db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | +| db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | +| WriteBatch | tzcyujunyong,wangboo | 50% | +| | 半支烟 | 90% | +| ---------------------------------------------------------------------------------- | ---------------------- | ------ | -- Gitee From fbd7bad138c31d0253a8adb50eec9d2dbe3eff27 Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 30 Mar 2023 13:03:02 +0800 Subject: [PATCH 34/50] =?UTF-8?q?FilterBlock=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 +- src/db/version_set.rs | 4 +- src/table/filter_block.rs | 181 ++++++++++++++++++++++++++++-- src/table/filter_block_test.rs | 50 +++++++++ src/table/mod.rs | 1 + src/traits/filter_policy_trait.rs | 2 + src/util/filter_policy.rs | 2 +- src/util/options.rs | 8 +- 8 files changed, 234 insertions(+), 18 deletions(-) create mode 100644 src/table/filter_block_test.rs diff --git a/README.md b/README.md index 97f5fa6..52b2659 100644 --- a/README.md +++ b/README.md @@ -76,8 +76,8 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | | util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | | util.Logger/Log日志库 | peach | 50% | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy | 0% | -| FilterBlock, FilterBlockReader | colagy | 0% | +| table.Block, BlockBuilder, FilterBlockBuilder | colagy,fengyang | 20% | +| FilterBlock, FilterBlockReader | colagy,fengyang | 80% | | table.format(Footer, BlockHandle) | 半支烟 | 20% | | db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | | db.SkipList | wangboo | 100% | diff --git a/src/db/version_set.rs b/src/db/version_set.rs index 831183d..0a58d3a 100644 --- a/src/db/version_set.rs +++ b/src/db/version_set.rs @@ -93,7 +93,9 @@ pub struct Compaction { // size_t level_ptrs_[config::kNumLevels]; } -// .h line 68 - 71 +/// Lookup the value for key. If found, store it in *val and +/// return OK. Else return a non-OK status. Fills *stats. +/// REQUIRES: lock is not held struct GetStats { seek_file: Rc, seek_file_level: i32 diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 524c324..b6fb16c 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,10 +1,35 @@ +use std::sync::Arc; +use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::slice::Slice; use crate::util::Result; -pub struct FilterBlockBuilder {} +/// +/// meta block 构建器 +/// +pub trait FilterBlock { + + /// + /// 构造一个 FilterBlockBuilder + /// + /// # Arguments + /// + /// * `policy`: + /// + /// returns: Self + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use level_db_rust::util::filter_policy::BloomFilterPolicy; + /// + /// let policy = Arc::new(BloomFilterPolicy::new(2)); + /// let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + /// ``` + #[inline] + fn new_with_policy(policy: Arc) -> Self; -impl FilterBlockBuilder { /// 设置block的起始位置 /// /// # Arguments @@ -18,9 +43,8 @@ impl FilterBlockBuilder { /// ``` /// filter_block_builder.start_block(1024_u64); /// ``` - pub fn start_block(&mut self, _block_offset: u64) { - todo!() - } + #[inline] + fn start_block(&mut self, block_offset: u64); /// 添加key到builder /// @@ -35,9 +59,8 @@ impl FilterBlockBuilder { /// ``` /// /// ``` - pub fn add_key(&mut self, _key: &Slice) { - todo!() - } + fn add_key(&mut self, key: &Slice); + /// 构造filterBlock /// /// # Examples @@ -45,15 +68,149 @@ impl FilterBlockBuilder { /// ``` /// filter_block_builder.finish(); /// ``` - pub fn finish(&mut self) -> Result { + fn finish(&mut self) -> Result; + + fn get_policy(&self) -> Box<&FP>; + + fn get_key(&self) -> &str; + + fn get_start(&self) -> Vec; + + fn get_result(&self) -> &str; + + fn get_tmp_keys(&self) -> Vec; + + fn get_tmp_filter_offsets(&self) -> Vec; +} + +/// SSTable 文件里面的 meta block 构建器, 按内存里面指定的格式整理在内存中 +pub struct FilterBlockBuilder { + policy: Arc, + // Flattened key contents + key: String, + // Starting index in keys_ of each key + start: Vec, + // Filter data computed so far + result: String, + // policy_->CreateFilter() argument + tmp_keys: Vec, + filter_offsets: Vec, +} + +pub struct FilterBlockReader { + policy: Arc, + // Pointer to filter data (at block-start) + data: String, + // Pointer to beginning of offset array (at block-end) + offset: String, + // Number of entries in offset array + num: usize, + // Encoding parameter (see kFilterBaseLg in .cc file) + base_lg: usize +} + +impl FilterBlock for FilterBlockBuilder { + fn new_with_policy(policy: Arc) -> Self { + let key = String::new(); + let start:Vec = vec![]; + let result = String::new(); + let tmp_keys:Vec = vec![]; + let filter_offsets:Vec = vec![]; + + Self { + policy, + key, + start, + result, + tmp_keys, + filter_offsets + } + } + + fn start_block(&mut self, block_offset: u64) { + self.generate_filter(); + + todo!() + } + + fn add_key(&mut self, key: &Slice) { + todo!() + } + + fn finish(&mut self) -> Result { + self.generate_filter(); + + todo!() + } + + fn get_policy(&self) -> Box<&FP> { + Box::new(self.policy.as_ref()) + } + + fn get_key(&self) -> &str { + self.key.as_str() + } + + fn get_start(&self) -> Vec { + self.start.to_vec() + } + + fn get_result(&self) -> &str { + self.result.as_str() + } + + fn get_tmp_keys(&self) -> Vec { + self.tmp_keys.to_vec() + } + + fn get_tmp_filter_offsets(&self) -> Vec { + self.filter_offsets.to_vec() + } +} + +impl FilterBlockBuilder { + fn generate_filter(&mut self) { todo!() } } -pub struct FilterBlockReader {} +impl FilterBlockReader { + pub fn new_with_policy(policy: Arc, contents: Slice) -> Self { + let data = String::new(); + let offset = String::new(); -impl FilterBlockReader { - pub fn key_may_match(&self, _block_offset: u64, _key: &Slice) -> bool { + let contents_len = contents.len(); + + // 1 byte for base_lg_ and 4 for start of offset array + if contents_len < 5 { + return Self { + policy, + data, + offset, + num: 0, + base_lg: 0 + } + }; + + Self { + policy, + data, + offset, + num: 0, + base_lg: 0 + } + } + + pub fn key_may_match(&self, block_offset: u64, key: &Slice) -> bool { todo!() } + + pub fn get_policy(&self) -> Box<&FP> { + Box::new(self.policy.as_ref()) + } + + // data, + // offset, + // num: 0, + // base_lg: 0 } \ No newline at end of file diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs new file mode 100644 index 0000000..74e87ed --- /dev/null +++ b/src/table/filter_block_test.rs @@ -0,0 +1,50 @@ + +mod test { + use std::sync::Arc; + use crate::table::filter_block; + use crate::table::filter_block::{FilterBlock, FilterBlockBuilder, FilterBlockReader}; + use crate::traits::filter_policy_trait::FilterPolicy; + use crate::util::filter_policy::BloomFilterPolicy; + use crate::util::slice::Slice; + + #[test] + fn test_filter_block_new_with_policy() { + let policy = Arc::new(BloomFilterPolicy::new(2)); + + let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + + let fp = filter_block.get_policy(); + let filter_policy_name = fp.name(); + assert_eq!(filter_policy_name, "leveldb.BuiltinBloomFilter"); + assert_eq!(filter_block.get_key(), ""); + assert_eq!(filter_block.get_result(), ""); + assert_eq!(filter_block.get_start().len(), 0); + assert_eq!(filter_block.get_tmp_keys().len(), 0); + assert_eq!(filter_block.get_tmp_filter_offsets().len(), 0); + } + + #[test] + fn test_filter_block_reader_new_with_policy_empty_content() { + let policy = Arc::new(BloomFilterPolicy::new(2)); + let contents = Slice::default(); + + let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + + let fp_reader = filter_block_reader.get_policy(); + let _reader_filter_policy_name = fp_reader.name(); + assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); + } + + // #[test] + // fn test_filter_block_reader_new_with_policy_with_content() { + // let policy = Arc::new(BloomFilterPolicy::new(2)); + // let contents = Slice::default(""); + // + // let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + // + // let fp_reader = filter_block_reader.get_policy(); + // let _reader_filter_policy_name = fp_reader.name(); + // assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); + // // assert_eq!(filter_block_reader.get_key(), ""); + // } +} \ No newline at end of file diff --git a/src/table/mod.rs b/src/table/mod.rs index ade478f..f928426 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,6 +1,7 @@ pub mod block; pub mod block_builder; pub mod filter_block; +mod filter_block_test; pub mod format; mod format_test; pub(crate) mod ss_table; diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index f3e4ad0..6aa0f06 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -23,6 +23,8 @@ pub trait FilterPolicy { /// # Examples /// /// ``` + /// use crate::util::slice::Slice; + /// /// let mut keys : Vec = Vec::new(); /// keys.push(Slice::try_from(String::from("hello")).unwrap()); /// keys.push(Slice::try_from(String::from("world")).unwrap()); diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index f493364..83127d2 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -79,7 +79,7 @@ impl FromPolicy for BloomFilterPolicy { impl FilterPolicy for BloomFilterPolicy { fn name(&self) -> String { - String::from("leveldb.BuiltinBloomFilter2") + String::from("leveldb.BuiltinBloomFilter") } fn create_filter(&self, keys: Vec) -> Slice { diff --git a/src/util/options.rs b/src/util/options.rs index 9028980..fffba75 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -1,6 +1,8 @@ +use std::sync::Arc; use crate::db::db::Snapshot; use crate::db::db_format::InternalKeyComparator; use crate::traits::comparator_trait::Comparator; +use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::comparator::BytewiseComparatorImpl; pub enum CompressionType { @@ -13,7 +15,9 @@ pub struct Env {} pub struct Cache {} -pub struct FilterPolicy {} +// 使用如下定义(后续路径会重构) +// use crate::traits::filter_policy_trait::FilterPolicy; +// pub struct FilterPolicy {} pub struct Options { @@ -96,7 +100,7 @@ pub struct Options { /// If non-null, use the specified filter policy to reduce disk reads. /// Many applications will benefit from passing the result of /// NewBloomFilterPolicy() here. - pub filter_policy: Option, + pub filter_policy: Option>, } /// Options that control read operations pub struct ReadOptions { -- Gitee From b9b272a550057cab7721c570073fe035617278a3 Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 30 Mar 2023 17:50:15 +0800 Subject: [PATCH 35/50] =?UTF-8?q?FilterBlock=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/table/filter_block.rs | 89 ++++++++++++++++++----- src/table/filter_block_test.rs | 127 ++++++++++++++++++++++++++++++++- src/util/mod.rs | 4 +- 3 files changed, 198 insertions(+), 22 deletions(-) diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index b6fb16c..38e1d16 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,9 +1,15 @@ +use std::io::Write; use std::sync::Arc; use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::coding::Coding; use crate::util::slice::Slice; use crate::util::Result; +// Generate new filter every 2KB of data +const FILTER_BASE_LG: usize = 11; +const FILTER_BASE: usize = 1 << FILTER_BASE_LG; + /// /// meta block 构建器 /// @@ -46,6 +52,8 @@ pub trait FilterBlock { #[inline] fn start_block(&mut self, block_offset: u64); + fn add_key_from_str(&mut self, key: &str); + /// 添加key到builder /// /// # Arguments @@ -72,7 +80,7 @@ pub trait FilterBlock { fn get_policy(&self) -> Box<&FP>; - fn get_key(&self) -> &str; + fn get_keys(&self) -> &str; fn get_start(&self) -> Vec; @@ -87,7 +95,7 @@ pub trait FilterBlock { pub struct FilterBlockBuilder { policy: Arc, // Flattened key contents - key: String, + keys: String, // Starting index in keys_ of each key start: Vec, // Filter data computed so far @@ -100,9 +108,9 @@ pub struct FilterBlockBuilder { pub struct FilterBlockReader { policy: Arc, // Pointer to filter data (at block-start) - data: String, + data: Vec, // Pointer to beginning of offset array (at block-end) - offset: String, + offset: Vec, // Number of entries in offset array num: usize, // Encoding parameter (see kFilterBaseLg in .cc file) @@ -111,7 +119,7 @@ pub struct FilterBlockReader { impl FilterBlock for FilterBlockBuilder { fn new_with_policy(policy: Arc) -> Self { - let key = String::new(); + let keys = String::new(); let start:Vec = vec![]; let result = String::new(); let tmp_keys:Vec = vec![]; @@ -119,7 +127,7 @@ impl FilterBlock for FilterBlockBuilder { Self { policy, - key, + keys, start, result, tmp_keys, @@ -128,9 +136,16 @@ impl FilterBlock for FilterBlockBuilder { } fn start_block(&mut self, block_offset: u64) { - self.generate_filter(); + let filter_index = block_offset / (FILTER_BASE as u64); + assert!(filter_index >= self.filter_offsets.len() as u64); - todo!() + while filter_index > self.filter_offsets.len() as u64 { + self.generate_filter(); + } + } + + fn add_key_from_str(&mut self, key: &str) { + self.add_key(&Slice::from(key)) } fn add_key(&mut self, key: &Slice) { @@ -147,8 +162,8 @@ impl FilterBlock for FilterBlockBuilder { Box::new(self.policy.as_ref()) } - fn get_key(&self) -> &str { - self.key.as_str() + fn get_keys(&self) -> &str { + self.keys.as_str() } fn get_start(&self) -> Vec { @@ -170,14 +185,38 @@ impl FilterBlock for FilterBlockBuilder { impl FilterBlockBuilder { fn generate_filter(&mut self) { - todo!() + let num_keys = self.start.len(); + + if num_keys == 0 { + // Fast path if there are no keys for this filter + self.filter_offsets.push(self.result.len() as u32); + return; + } + + /* Make list of keys from flattened key structure */ + // Simplify length computation + self.start.push(self.keys.len()); + // 如果 new_len 大于 len ,则 Vec 由差异扩展,每个额外的插槽都用 value 填充。如果 new_len 小于 len ,则 Vec 将被截断。 + self.tmp_keys.resize(num_keys, Slice::default()); + + for i in 0..num_keys { + let base = &self.keys.as_bytes()[self.start[i]..]; + let length = self.start[i+1] - self.start[i]; + + let mut tmp_key = Vec::with_capacity(length); + tmp_key.write(&base); + self.tmp_keys[i] = Slice::from_vec(tmp_key); + } + + // Generate filter for current set of keys and append to result_. + } } impl FilterBlockReader { pub fn new_with_policy(policy: Arc, contents: Slice) -> Self { - let data = String::new(); - let offset = String::new(); + let data = Vec::new(); + let offset = Vec::new(); let contents_len = contents.len(); @@ -192,6 +231,11 @@ impl FilterBlockReader { } }; + // let buf = contents.as_ref()[contents_len-5..]; + + // let base_lg_ = contentsVe[contents_len-1]; + + // let last_word: u32 = Coding::decode_fixed32(buf)); Self { policy, data, @@ -209,8 +253,19 @@ impl FilterBlockReader { Box::new(self.policy.as_ref()) } - // data, - // offset, - // num: 0, - // base_lg: 0 + pub fn get_data(&self) -> Vec { + self.data.to_vec() + } + + pub fn get_offset(&self) -> Vec { + self.offset.to_vec() + } + + pub fn get_num(&self) -> usize { + self.num + } + + pub fn get_base_lg(&self) -> usize { + self.base_lg + } } \ No newline at end of file diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs index 74e87ed..4deb8b8 100644 --- a/src/table/filter_block_test.rs +++ b/src/table/filter_block_test.rs @@ -1,11 +1,107 @@ mod test { + use std::borrow::BorrowMut; use std::sync::Arc; use crate::table::filter_block; use crate::table::filter_block::{FilterBlock, FilterBlockBuilder, FilterBlockReader}; + use crate::traits::coding_trait::CodingTrait; use crate::traits::filter_policy_trait::FilterPolicy; + use crate::util::coding::Coding; use crate::util::filter_policy::BloomFilterPolicy; use crate::util::slice::Slice; + use crate::util::hash::{Hash, ToHash}; + + use crate::util::Result; + + pub struct TestHashFilter { + //. + } + + impl TestHashFilter { + fn new() -> Self { + Self { + + } + } + } + + impl FilterPolicy for TestHashFilter { + fn name(&self) -> String { + String::from("TestHashFilter") + } + + fn create_filter(&self, keys: Vec) -> Slice { + let mut n: usize = 0; + for i in 0..keys.len() { + n += keys[i].len(); + } + + let mut dst_chars = vec![0; n]; + let dst_chars_u8 = dst_chars.borrow_mut(); + + let mut offset: usize = 0; + for i in 0..keys.len() { + let h = Hash::hash_code(keys[i].as_ref(), 1); + let of = Coding::put_fixed32(dst_chars_u8, offset, h); + offset += of; + } + + Slice::from_buf(dst_chars_u8) + } + + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + let h = Hash::hash_code(key.to_vec().as_slice(), 1); + + let mut pos = 0; + while pos <= bloom_filter.size() { + let buf = &bloom_filter.as_ref()[pos..]; + + if h == Coding::decode_fixed32(buf) { + return true + } + + pos += 4; + } + + false + } + } + + #[test] + fn test_create_filter() { + let policy = TestHashFilter::new(); + + let mut keys : Vec = Vec::new(); + keys.push(Slice::try_from(String::from("hello")).unwrap()); + keys.push(Slice::try_from(String::from("world")).unwrap()); + + let bloom_filter: Slice = policy.create_filter(keys); + + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + } #[test] fn test_filter_block_new_with_policy() { @@ -16,13 +112,31 @@ mod test { let fp = filter_block.get_policy(); let filter_policy_name = fp.name(); assert_eq!(filter_policy_name, "leveldb.BuiltinBloomFilter"); - assert_eq!(filter_block.get_key(), ""); + assert_eq!(filter_block.get_keys(), ""); assert_eq!(filter_block.get_result(), ""); assert_eq!(filter_block.get_start().len(), 0); assert_eq!(filter_block.get_tmp_keys().len(), 0); assert_eq!(filter_block.get_tmp_filter_offsets().len(), 0); } + #[test] + fn test_filter_block_new_with_policy_and_addkey() { + let policy = Arc::new(BloomFilterPolicy::new(2)); + let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + + filter_block_builder.start_block(100); + filter_block_builder.add_key_from_str("foo"); + filter_block_builder.add_key_from_str("bar"); + filter_block_builder.add_key_from_str("box"); + filter_block_builder.start_block(200); + filter_block_builder.add_key_from_str("box"); + filter_block_builder.start_block(300); + filter_block_builder.add_key_from_str("hello"); + + let sliceRs: Result = filter_block_builder.finish(); + + } + #[test] fn test_filter_block_reader_new_with_policy_empty_content() { let policy = Arc::new(BloomFilterPolicy::new(2)); @@ -33,18 +147,25 @@ mod test { let fp_reader = filter_block_reader.get_policy(); let _reader_filter_policy_name = fp_reader.name(); assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); + assert_eq!(filter_block_reader.get_data().len(), 0); + assert_eq!(filter_block_reader.get_offset().len(), 0); + assert_eq!(filter_block_reader.get_num(), 0); + assert_eq!(filter_block_reader.get_base_lg(), 0); } // #[test] // fn test_filter_block_reader_new_with_policy_with_content() { // let policy = Arc::new(BloomFilterPolicy::new(2)); - // let contents = Slice::default(""); + // let contents = Slice::from("\000"); // // let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); // // let fp_reader = filter_block_reader.get_policy(); // let _reader_filter_policy_name = fp_reader.name(); // assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); - // // assert_eq!(filter_block_reader.get_key(), ""); + // assert_eq!(filter_block_reader.get_data().len(), 0); + // assert_eq!(filter_block_reader.get_offset().len(), 0); + // assert_eq!(filter_block_reader.get_num(), 0); + // assert_eq!(filter_block_reader.get_base_lg(), 0); // } } \ No newline at end of file diff --git a/src/util/mod.rs b/src/util/mod.rs index ade5ddc..b24bec5 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -30,9 +30,9 @@ mod filter_policy_test; pub mod histogram; mod histogram_test; -mod hash; +pub mod hash; mod hash_test; -mod mutex_lock; +pub mod mutex_lock; mod mutex_lock_test; pub mod random; mod random_test; -- Gitee From d23bf8af089d34fee2acbe95859d64442acdc440 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 30 Mar 2023 21:08:24 +0800 Subject: [PATCH 36/50] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E6=80=A7=E8=83=BD=E6=B5=8B=E8=AF=95=E6=A1=88=E4=BE=8B=EF=BC=8C?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=20jemalloc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 4 +-- benches/skiplist_memory_useage.rs | 55 ------------------------------- benches/u32_shift.rs | 46 ++++++++++++++++++++++++++ src/db/skip_list.rs | 41 +++++++++++++++++++---- src/db/skip_list_test.rs | 51 ++++++++++++++++++++++++++++ src/db/version_edit_test.rs | 2 +- src/lib.rs | 5 +-- src/util/comparator_test.rs | 4 +-- src/util/hash.rs | 3 +- src/util/mem_debug.rs | 12 +++++++ src/util/mod.rs | 1 + src/util/status_test.rs | 2 +- 12 files changed, 155 insertions(+), 71 deletions(-) delete mode 100644 benches/skiplist_memory_useage.rs create mode 100644 benches/u32_shift.rs create mode 100644 src/util/mem_debug.rs diff --git a/Cargo.toml b/Cargo.toml index b60d1d1..8b4c9c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ jemallocator = "0.5" jemalloc-sys = {version = "0.5", features = ["stats"]} [dev-dependencies] -criterion = "0.3.0" +criterion = {version = "0.4.0", features = ["html_reports"]} crc32fast = "1.3.2" skiplist = "0.4.0" @@ -34,5 +34,5 @@ name = "skiplist_bench" harness = false [[bench]] -name = "skiplist_memory_useage" +name = "u32_shift" harness = false \ No newline at end of file diff --git a/benches/skiplist_memory_useage.rs b/benches/skiplist_memory_useage.rs deleted file mode 100644 index deaee47..0000000 --- a/benches/skiplist_memory_useage.rs +++ /dev/null @@ -1,55 +0,0 @@ -use std::ffi::{c_char, c_void}; -use std::ptr::{null, null_mut}; -use std::sync::{Arc, Mutex}; -use skiplist::OrderedSkipList; -use level_db_rust::db::skip_list::SkipList; -use level_db_rust::util::Arena; -use level_db_rust::util::arena::ArenaRef; -use level_db_rust::util::comparator::BytewiseComparatorImpl; -use level_db_rust::util::unsafe_slice::TryIntoUnsafeSlice; - -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - -extern "C" fn write_cb(_: *mut c_void, message: *const c_char) { - print!("{}", String::from_utf8_lossy(unsafe { - std::ffi::CStr::from_ptr(message as *const i8).to_bytes() - })); -} - -fn mem_print() { - unsafe { jemalloc_sys::malloc_stats_print(Some(write_cb), null_mut(), null()) } -} - -fn bench_default_skiplist(mut list: SkipList, arena: ArenaRef, record_count: usize) { - for j in 0..record_count { - let value = format!("key_{}", j); - list.insert(value.try_into_unsafe_slice(arena.clone()).unwrap()).unwrap(); - } - println!("bench_default_skiplist: "); - mem_print(); -} - -fn bench_skiplist_v_0_4_0(mut list: OrderedSkipList, record_count: usize) { - for j in 0..record_count { - let value = format!("key_{}", j); - list.insert(value.clone()); - } - println!("bench_skiplist_v_0_4_0: "); - mem_print(); -} - -fn main() { - let record_count = 100 * 1024; - // let cmp = Arc::new(BytewiseComparatorImpl::default()); - // let arena = Arc::new(Mutex::new(Arena::default())); - // let list = SkipList::create(cmp, arena.clone()); - // bench_default_skiplist(list, arena, record_count); - - let list: OrderedSkipList = unsafe { - OrderedSkipList::with_comp(|a: &String, b: &String| { - a.cmp(b) - }) - }; - bench_skiplist_v_0_4_0(list, record_count); -} \ No newline at end of file diff --git a/benches/u32_shift.rs b/benches/u32_shift.rs new file mode 100644 index 0000000..f9a2eb5 --- /dev/null +++ b/benches/u32_shift.rs @@ -0,0 +1,46 @@ +use std::{mem, slice}; +use std::alloc::{alloc, Layout}; +use std::io::Write; + +use criterion::{Criterion, criterion_group, criterion_main}; +use level_db_rust::debug; + +pub fn u32_shift_bench(c: &mut Criterion) { + let mut data = [0_u8; 4]; + let mut buf = data.as_mut_slice(); + let value = 12345678_u32; + let mut g = c.benchmark_group("u32_shift"); + + g.bench_function("to_ne_bytes", |g| { + g.iter(|| { + buf.write(&value.to_be_bytes()).unwrap(); + }); + }); + buf = data.as_mut_slice(); + buf.fill(0); // reset + debug!("is big endian: {}", cfg!(target_endian = "big")); + g.bench_function("raw_write", |g| { + g.iter(|| { + unsafe { + if cfg!(target_endian = "big") { + (buf.as_mut_ptr() as *mut u32).write(value); + } else { + (buf.as_mut_ptr() as *mut u32).write(value.swap_bytes()); + } + } + }); + }); + buf = data.as_mut_slice(); + buf.fill(0); // reset + g.bench_function("shift_bytes", |g| { + g.iter(|| { + buf[0] = ((value >> 0) & 0xff) as u8; + buf[1] = ((value >> 1) & 0xff) as u8; + buf[2] = ((value >> 2) & 0xff) as u8; + buf[3] = ((value >> 3) & 0xff) as u8; + }); + }); +} + +criterion_group!(benches, u32_shift_bench); +criterion_main!(benches); \ No newline at end of file diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index a043adc..4d13c12 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -5,15 +5,15 @@ use std::ptr::null_mut; use std::sync::{Arc, RwLock}; use rand::prelude::*; + use crate::debug; use crate::traits::comparator_trait::Comparator; use crate::traits::DataIterator; - -use crate::util::arena::ArenaRef; use crate::util::{Arena, Result}; +use crate::util::arena::ArenaRef; use crate::util::slice::Slice; -use crate::util::unsafe_slice::UnsafeSlice; use crate::util::status::{LevelError, Status}; +use crate::util::unsafe_slice::UnsafeSlice; type RawNode = *mut Node; @@ -213,6 +213,10 @@ impl SkipList { false } + unsafe fn find_eq_or_greater>(&self, key: &R) -> Option { + todo!() + } + #[inline] pub fn max_height(&self) -> usize { MAX_LEVEL @@ -346,6 +350,28 @@ impl Node { assert!(level < MAX_LEVEL); self.next_elems.offset(level as isize).write(node); } + + /// 找到最后一个数据元素 + unsafe fn seek_to_last(&self) -> Option { + if self.is_tail() { + return None; + } + let mut pre = self; + let mut cur = &*self.next_top_node(); + loop { + if cur.is_tail() { + return Some(pre as *const Node as *mut Node); + } + pre = cur; + cur = &*cur.next_top_node(); + } + } + + /// 找到最上层的下一个元素 + #[inline] + unsafe fn next_top_node(&self) -> RawNode { + self.get_node(self.level - 1) + } } fn rand_level() -> usize { @@ -410,7 +436,6 @@ impl Iterator for Iter { } impl DataIterator for Iter { - #[inline] fn valid(&self) -> bool { unsafe { @@ -420,12 +445,16 @@ impl DataIterator for Iter { #[inline] fn seek_to_first(&mut self) { - self.current = self.head + self.current = unsafe { + (&*self.head).get_node(0) + } } #[inline] fn seek_to_last(&mut self) { - self.current = self.tail + unsafe { + self.current = (&*self.current).seek_to_last().unwrap_or(self.tail) + } } fn seek(&mut self, key: &Slice) { diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs index 11ef4bd..11b4573 100644 --- a/src/db/skip_list_test.rs +++ b/src/db/skip_list_test.rs @@ -1,11 +1,21 @@ mod test { use std::collections::HashSet; + use std::env::args; + use std::ffi::{c_char, c_void}; + use std::ptr::{null, null_mut}; use std::sync::{Arc, Mutex}; + + use criterion::{Criterion, criterion_group, criterion_main}; use rand::Rng; + use skiplist::OrderedSkipList; + use crate::db::DefaultSkipList; + use crate::db::skip_list::SkipList; use crate::debug; use crate::util::Arena; + use crate::util::arena::ArenaRef; use crate::util::comparator::BytewiseComparatorImpl; + use crate::util::mem_debug::mem_print; use crate::util::Result; use crate::util::slice::Slice; use crate::util::unsafe_slice::TryIntoUnsafeSlice; @@ -55,4 +65,45 @@ mod test { Ok(()) } + + + fn default_skiplist(mut list: SkipList, arena: ArenaRef, record_count: usize) { + for j in 0..record_count { + let value = format!("key_{}", j); + list.insert(value.try_into_unsafe_slice(arena.clone()).unwrap()).unwrap(); + } + println!("bench_default_skiplist: "); + mem_print(); + } + + fn bench_skiplist_v_0_4_0(mut list: OrderedSkipList, record_count: usize) { + for j in 0..record_count { + let value = format!("key_{}", j); + list.insert(value.clone()); + } + println!("bench_skiplist_v_0_4_0: "); + mem_print(); + } + + #[test] + fn bench_default_skiplist() { + let record_count = 100 * 1024; + println!("bench default skiplist"); + let cmp = Arc::new(BytewiseComparatorImpl::default()); + let arena = Arc::new(Mutex::new(Arena::default())); + let list = SkipList::create(cmp, arena.clone()); + default_skiplist(list, arena, record_count); + } + + #[test] + fn bench_crate_skiplist() { + let record_count = 100 * 1024; + println!("bench crate skiplist"); + let list: OrderedSkipList = unsafe { + OrderedSkipList::with_comp(|a: &String, b: &String| { + a.cmp(b) + }) + }; + bench_skiplist_v_0_4_0(list, record_count); + } } \ No newline at end of file diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 2c2efec..c38442d 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -5,7 +5,7 @@ mod test { use crate::util::slice::Slice; #[test] - fn test_Tag() { + fn test_tag() { let tag = Tag::kCompactPointer; assert_eq!(tag.get_value(), 5); diff --git a/src/lib.rs b/src/lib.rs index c8b17d3..3b67f8e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,4 @@ #![feature(box_syntax)] -#![feature(let_else)] -#![feature(generic_associated_types)] extern crate core; @@ -9,6 +7,9 @@ mod table; pub mod util; mod traits; +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + mod test { #[test] diff --git a/src/util/comparator_test.rs b/src/util/comparator_test.rs index d6d311b..b018bf6 100644 --- a/src/util/comparator_test.rs +++ b/src/util/comparator_test.rs @@ -93,7 +93,7 @@ mod test { // u8max 结尾 let mut u8_vec: Vec = vec![]; - u8_vec.write(&String::from("helloWorld").as_bytes().to_vec()); + u8_vec.write(&String::from("helloWorld").as_bytes().to_vec()).unwrap(); u8_vec.push(u8::MAX); let u8_array_str = String::from(Slice::from_buf(u8_vec.as_slice())); @@ -106,7 +106,7 @@ mod test { // u8max 开头 let mut u8_vec: Vec = vec![]; u8_vec.push(u8::MAX); - u8_vec.write(&String::from("helloWorld").as_bytes().to_vec()); + u8_vec.write(&String::from("helloWorld").as_bytes().to_vec()).unwrap(); let u8_max_str = String::from(Slice::from_buf(u8_vec.as_slice())); let comp = BytewiseComparatorImpl::default(); diff --git a/src/util/hash.rs b/src/util/hash.rs index 8aa51e2..15a1a03 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -11,10 +11,9 @@ use crate::util::slice::Slice; /// 一种可以计算 hash 的特质 pub trait ToHash { - #[inline] + fn to_hash(&self) -> u32; - #[inline] fn to_hash_with_seed(&self, seed: u32) -> u32; } diff --git a/src/util/mem_debug.rs b/src/util/mem_debug.rs new file mode 100644 index 0000000..bef8a83 --- /dev/null +++ b/src/util/mem_debug.rs @@ -0,0 +1,12 @@ +use std::ffi::{c_char, c_void}; +use std::ptr::{null, null_mut}; + +extern "C" fn write_cb(_: *mut c_void, message: *const c_char) { + print!("{}", String::from_utf8_lossy(unsafe { + std::ffi::CStr::from_ptr(message as *const i8).to_bytes() + })); +} + +pub fn mem_print() { + unsafe { jemalloc_sys::malloc_stats_print(Some(write_cb), null_mut(), null()) } +} \ No newline at end of file diff --git a/src/util/mod.rs b/src/util/mod.rs index ade5ddc..d777f71 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -41,6 +41,7 @@ pub mod debug; pub mod linked_list; mod linked_list_test; pub mod unsafe_slice; +pub mod mem_debug; /// 定义别名 pub type Result = result::Result; diff --git a/src/util/status_test.rs b/src/util/status_test.rs index a7dce28..c186cee 100644 --- a/src/util/status_test.rs +++ b/src/util/status_test.rs @@ -100,7 +100,7 @@ mod test { } #[test] - fn test_level_error_toString() { + fn test_level_error_to_string() { // ok let status: Status = LevelError::ok(); assert_eq!("OK", status.to_string()); -- Gitee From f4900f46d9e0fb2a4195deccba8efc445f9438db Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 30 Mar 2023 21:09:18 +0800 Subject: [PATCH 37/50] =?UTF-8?q?FilterBlock=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/table/filter_block.rs | 75 +++++++++++++---- src/table/filter_block_test.rs | 129 ++++++++++++++++-------------- src/traits/filter_policy_trait.rs | 4 +- src/util/filter_policy.rs | 16 +++- src/util/filter_policy_test.rs | 9 ++- 5 files changed, 148 insertions(+), 85 deletions(-) diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 38e1d16..183c678 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,5 +1,6 @@ use std::io::Write; use std::sync::Arc; +use crate::traits::coding_trait::CodingTrait; use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::coding::Coding; use crate::util::slice::Slice; @@ -21,6 +22,7 @@ pub trait FilterBlock { /// # Arguments /// /// * `policy`: + /// * `capacity`: 初始化容量 /// /// returns: Self /// @@ -34,7 +36,7 @@ pub trait FilterBlock { /// let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); /// ``` #[inline] - fn new_with_policy(policy: Arc) -> Self; + fn new_with_policy(policy: Arc, capacity: usize) -> Self; /// 设置block的起始位置 /// @@ -80,11 +82,11 @@ pub trait FilterBlock { fn get_policy(&self) -> Box<&FP>; - fn get_keys(&self) -> &str; + fn get_keys(&self) -> Vec; fn get_start(&self) -> Vec; - fn get_result(&self) -> &str; + fn get_result(&self) -> Vec; fn get_tmp_keys(&self) -> Vec; @@ -95,11 +97,11 @@ pub trait FilterBlock { pub struct FilterBlockBuilder { policy: Arc, // Flattened key contents - keys: String, + keys: Vec, // Starting index in keys_ of each key start: Vec, // Filter data computed so far - result: String, + result: Vec, // policy_->CreateFilter() argument tmp_keys: Vec, filter_offsets: Vec, @@ -118,10 +120,10 @@ pub struct FilterBlockReader { } impl FilterBlock for FilterBlockBuilder { - fn new_with_policy(policy: Arc) -> Self { - let keys = String::new(); - let start:Vec = vec![]; - let result = String::new(); + fn new_with_policy(policy: Arc, capacity: usize) -> Self { + let keys:Vec = Vec::with_capacity(capacity); + let start:Vec = Vec::with_capacity(capacity); + let result:Vec = Vec::with_capacity(capacity); let tmp_keys:Vec = vec![]; let filter_offsets:Vec = vec![]; @@ -149,29 +151,54 @@ impl FilterBlock for FilterBlockBuilder { } fn add_key(&mut self, key: &Slice) { - todo!() + self.start.push(key.len()); + self.keys.write(key.as_str().as_bytes()).expect("add_key error!"); } fn finish(&mut self) -> Result { - self.generate_filter(); + if self.start.len() != 0 { + self.generate_filter(); + } - todo!() + // Append array of per-filter offsets + let array_offset = self.result.len() as u32; + // 当前需要写入的位置。result 中可能存在数据,因此为 self.result.len() 的位置 + let mut pos: usize = self.result.len(); + + // todo 判断是否需要扩容 + let result_total_capacity = self.result.capacity(); + + let dst_append = self.result.as_mut_slice(); + + for i in 0..self.filter_offsets.len() { + // 判断当前 pos + len 4 + let filter_offset_val = self.filter_offsets[i]; + pos = Coding::put_fixed32(dst_append, pos, filter_offset_val); + } + + pos = Coding::put_fixed32(dst_append, pos, array_offset); + + // Save encoding parameter in result + // todo 判断是否需要扩容 + Coding::put_varint64(self.result.as_mut_slice(), pos, FILTER_BASE_LG as u64); + + Ok(Slice::from_buf(&self.result)) } fn get_policy(&self) -> Box<&FP> { Box::new(self.policy.as_ref()) } - fn get_keys(&self) -> &str { - self.keys.as_str() + fn get_keys(&self) -> Vec { + self.keys.to_vec() } fn get_start(&self) -> Vec { self.start.to_vec() } - fn get_result(&self) -> &str { - self.result.as_str() + fn get_result(&self) -> Vec { + self.result.to_vec() } fn get_tmp_keys(&self) -> Vec { @@ -200,7 +227,7 @@ impl FilterBlockBuilder { self.tmp_keys.resize(num_keys, Slice::default()); for i in 0..num_keys { - let base = &self.keys.as_bytes()[self.start[i]..]; + let base = &self.keys[self.start[i]..]; let length = self.start[i+1] - self.start[i]; let mut tmp_key = Vec::with_capacity(length); @@ -209,7 +236,21 @@ impl FilterBlockBuilder { } // Generate filter for current set of keys and append to result_. + self.filter_offsets.push(self.result.len() as u32); + + let mut keys: Vec<&Slice> = Vec::new(); + keys.push(&self.tmp_keys[0]); + let create_filter:Slice = self.policy.create_filter_with_len(num_keys, keys); + + // let result_len = self.result.len(); + // let result_total_capacity = self.result.capacity(); + self.result.write(create_filter.as_ref()); + // let result_len = self.result.len(); + // let result_total_capacity = self.result.capacity(); + self.tmp_keys.clear(); + self.keys.clear(); + self.start.clear(); } } diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs index 4deb8b8..0e57a1a 100644 --- a/src/table/filter_block_test.rs +++ b/src/table/filter_block_test.rs @@ -30,12 +30,18 @@ mod test { String::from("TestHashFilter") } - fn create_filter(&self, keys: Vec) -> Slice { + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { let mut n: usize = 0; for i in 0..keys.len() { n += keys[i].len(); } + self.create_filter_with_len(n, keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + let mut n: usize = len; + let mut dst_chars = vec![0; n]; let dst_chars_u8 = dst_chars.borrow_mut(); @@ -67,62 +73,81 @@ mod test { } } - #[test] - fn test_create_filter() { - let policy = TestHashFilter::new(); - - let mut keys : Vec = Vec::new(); - keys.push(Slice::try_from(String::from("hello")).unwrap()); - keys.push(Slice::try_from(String::from("world")).unwrap()); - - let bloom_filter: Slice = policy.create_filter(keys); - - let mut key_may_match = policy.key_may_match( - &Slice::try_from(String::from("hello")).unwrap(), - &bloom_filter); - assert!(key_may_match); - - key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), - &bloom_filter); - assert!(key_may_match); - - let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - } + // #[test] + // fn test_create_filter() { + // let policy = TestHashFilter::new(); + // + // let s1 = Slice::try_from(String::from("hello")).unwrap(); + // let s2 = Slice::try_from(String::from("world")).unwrap(); + // let mut keys : Vec<&Slice> = Vec::new(); + // keys.push(&s1); + // keys.push(&s2); + // + // let bloom_filter: Slice = policy.create_filter(keys); + // + // let mut key_may_match = policy.key_may_match( + // &Slice::try_from(String::from("hello")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + // + // key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + // + // let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // } #[test] fn test_filter_block_new_with_policy() { - let policy = Arc::new(BloomFilterPolicy::new(2)); + let policy = Arc::new(TestHashFilter::new()); - let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy, 10); let fp = filter_block.get_policy(); let filter_policy_name = fp.name(); - assert_eq!(filter_policy_name, "leveldb.BuiltinBloomFilter"); - assert_eq!(filter_block.get_keys(), ""); - assert_eq!(filter_block.get_result(), ""); + assert_eq!(filter_policy_name, "TestHashFilter"); + assert_eq!(filter_block.get_keys().len(), 0); + assert_eq!(filter_block.get_result().len(), 0); assert_eq!(filter_block.get_start().len(), 0); assert_eq!(filter_block.get_tmp_keys().len(), 0); assert_eq!(filter_block.get_tmp_filter_offsets().len(), 0); } + #[test] + fn test_filter_block_reader_new_with_policy_empty_content() { + let policy = Arc::new(TestHashFilter::new()); + let contents = Slice::default(); + + let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + + let fp_reader = filter_block_reader.get_policy(); + let _reader_filter_policy_name = fp_reader.name(); + assert_eq!(_reader_filter_policy_name, "TestHashFilter"); + assert_eq!(filter_block_reader.get_data().len(), 0); + assert_eq!(filter_block_reader.get_offset().len(), 0); + assert_eq!(filter_block_reader.get_num(), 0); + assert_eq!(filter_block_reader.get_base_lg(), 0); + } + #[test] fn test_filter_block_new_with_policy_and_addkey() { - let policy = Arc::new(BloomFilterPolicy::new(2)); - let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + let policy = Arc::new(TestHashFilter::new()); + let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy( + policy, 10); filter_block_builder.start_block(100); filter_block_builder.add_key_from_str("foo"); @@ -134,23 +159,7 @@ mod test { filter_block_builder.add_key_from_str("hello"); let sliceRs: Result = filter_block_builder.finish(); - - } - - #[test] - fn test_filter_block_reader_new_with_policy_empty_content() { - let policy = Arc::new(BloomFilterPolicy::new(2)); - let contents = Slice::default(); - - let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); - - let fp_reader = filter_block_reader.get_policy(); - let _reader_filter_policy_name = fp_reader.name(); - assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); - assert_eq!(filter_block_reader.get_data().len(), 0); - assert_eq!(filter_block_reader.get_offset().len(), 0); - assert_eq!(filter_block_reader.get_num(), 0); - assert_eq!(filter_block_reader.get_base_lg(), 0); + assert_eq!("a", "leveldb.BuiltinBloomFilter"); } // #[test] diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index 6aa0f06..3920604 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -32,7 +32,9 @@ pub trait FilterPolicy { /// let policy = BloomFilterPolicy::new(800); /// let bloom_filter: Slice = policy.create_filter(keys); /// ``` - fn create_filter(&self, keys: Vec) -> Slice; + fn create_filter(&self, keys: Vec<&Slice>) -> Slice; + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice; /// /// diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 83127d2..726e541 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -82,8 +82,12 @@ impl FilterPolicy for BloomFilterPolicy { String::from("leveldb.BuiltinBloomFilter") } - fn create_filter(&self, keys: Vec) -> Slice { - let n: usize = keys.len(); + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + let n: usize = len; let mut bits: usize = n * self.bits_per_key; @@ -100,7 +104,7 @@ impl FilterPolicy for BloomFilterPolicy { dst_chars[bytes] = self.k as u8; for i in 0..n { - let slice = keys.get(i).unwrap(); + let slice = keys[i]; let mut h : u32 = slice.bloom_hash(); let delta : u32 = (h >> 17) | (h << 15); @@ -173,7 +177,11 @@ impl FilterPolicy for InternalFilterPolicy { todo!() } - fn create_filter(&self, keys: Vec) -> Slice { + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, // 并把根据这些key创建的filter追加到 dst中。 diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index e84ee12..bea6d2a 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -33,9 +33,12 @@ fn test_new() { fn test_create_filter() { let policy = BloomFilterPolicy::new(800); - let mut keys : Vec = Vec::new(); - keys.push(Slice::try_from(String::from("hello")).unwrap()); - keys.push(Slice::try_from(String::from("world")).unwrap()); + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from(String::from("world")).unwrap(); + + let mut keys : Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); let bloom_filter: Slice = policy.create_filter(keys); -- Gitee From db74b32f56222640d8eb9ccb55dbf5eb7e443013 Mon Sep 17 00:00:00 2001 From: fengyang Date: Fri, 31 Mar 2023 18:54:34 +0800 Subject: [PATCH 38/50] =?UTF-8?q?SST=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 16 ++++++-- src/db/builder.rs | 72 +++++++++++++++++++++++++++++++++ src/db/builder_test.rs | 0 src/db/db_format.rs | 33 +++++++-------- src/db/file_meta_data.rs | 4 ++ src/db/filename.rs | 11 +++++ src/db/filename_test.rs | 0 src/db/mod.rs | 4 ++ src/db/skip_list.rs | 4 ++ src/db/version_set.rs | 3 +- src/table/block_builder.rs | 5 +++ src/table/mod.rs | 6 ++- src/table/table_builder.rs | 59 +++++++++++++++++++++++++++ src/table/table_builder_test.rs | 0 src/traits/iterator.rs | 9 +++++ src/util/env.rs | 13 ++++++ src/util/env_test.rs | 0 src/util/mod.rs | 2 + src/util/options.rs | 4 +- 19 files changed, 220 insertions(+), 25 deletions(-) create mode 100644 src/db/builder.rs create mode 100644 src/db/builder_test.rs create mode 100644 src/db/filename.rs create mode 100644 src/db/filename_test.rs create mode 100644 src/table/table_builder.rs create mode 100644 src/table/table_builder_test.rs create mode 100644 src/util/env.rs create mode 100644 src/util/env_test.rs diff --git a/README.md b/README.md index 52b2659..b76735c 100644 --- a/README.md +++ b/README.md @@ -76,14 +76,21 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | | util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | | util.Logger/Log日志库 | peach | 50% | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy,fengyang | 20% | -| FilterBlock, FilterBlockReader | colagy,fengyang | 80% | | table.format(Footer, BlockHandle) | 半支烟 | 20% | | db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | | db.SkipList | wangboo | 100% | -| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | 0% | +| table.Iterator(DBIter、EmptyIterator) | kazeseiriou | 0% | +| table.Iterator(merger.MergingIterator) | kazeseiriou | 0% | +| table.Iterator(TwoLevelIterator) | kazeseiriou | 0% | +| table.Iterator(tabletest.KeyConvertingIterator) | kazeseiriou | 0% | +| table.Iterator(dbtest.ModelIter) | kazeseiriou | 0% | +| table.Iterator(Block::Iter) | fengyang | 0% | | IteratorWrapper | kazeseiriou | 0% | | db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | +| db.Builder | fengyang | 20% | +| table.Block | fengyang | 80% | +| table.BlockBuilder, table.FilterBlockBuilder | | | +| FilterBlock, FilterBlockReader | fengyang | 80% | | SSTable | fengyang | 0% | | table.Table | peach,tzcyujunyong | | | db.leveldb_util | wangboo | 0% | @@ -95,6 +102,9 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | | db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | | WriteBatch | tzcyujunyong,wangboo | 50% | +| table.table_builder | | | +| filename | | | +| env | | | | | 半支烟 | 40% | diff --git a/src/db/builder.rs b/src/db/builder.rs new file mode 100644 index 0000000..2793bc0 --- /dev/null +++ b/src/db/builder.rs @@ -0,0 +1,72 @@ +use std::ops::Deref; +use std::sync::Arc; +use crate::db::file_meta_data::FileMetaData; +use crate::db::filename::FileName; +use crate::db::table_cache::TableCache; +use crate::table::table_builder::TableBuilder; +use crate::traits::DataIterator; +use crate::util::env::{Env, WritableFile}; +use crate::util::options::{Options}; +use crate::util::Result; +use crate::util::slice::Slice; +use crate::util::status::{LevelError, Status}; + +pub struct BuildTable {} + +impl BuildTable { + + /// + /// 生成 SSTable + /// + /// Build a Table file from the contents of *iter. + /// The generated file will be named according to meta->number. + /// On success, the rest of meta will be filled with metadata about the generated table. + /// If no data is present in *iter, meta->file_size will be set to zero, and no Table file will be produced. + /// + /// # Arguments + /// + /// * `dbname`: + /// * `env`: + /// * `options`: + /// * `table_cache`: + /// * `iter`: + /// * `meta`: + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn build_table(dbname: &Slice, env: &Env, options: &Options, + table_cache: &TableCache, mut iter: Box, + meta: &mut FileMetaData) -> Result { + meta.set_file_size(0); + iter.seek_to_first(); + + let file_name = FileName::table_file_name(dbname, meta.get_number()); + + if iter.valid() { + let fileRS: Result = env.new_writable_file(&file_name); + if(!fileRS.is_ok()){ + return Err(fileRS.err().unwrap()); + } + + let writableFile = Arc::new(fileRS.unwrap()); + let builder: TableBuilder = TableBuilder::new_with_writable_file(options, writableFile); + + meta.get_smallest().decode_from(&iter.key()); + while iter.valid() && iter.has_next(){ + iter.next(); + + let key = iter.key(); + meta.get_largest().decode_from(&key); + // builder.add(key, iter.value()); + } + } + + + Err(Status::wrapper_str(LevelError::KBadRecord, "a")) + } +} \ No newline at end of file diff --git a/src/db/builder_test.rs b/src/db/builder_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/db/db_format.rs b/src/db/db_format.rs index bbe73b1..f318f2c 100644 --- a/src/db/db_format.rs +++ b/src/db/db_format.rs @@ -7,6 +7,7 @@ use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; use crate::util::coding::Coding; use crate::util::slice::Slice; +use crate::util::unsafe_slice::UnsafeSlice; pub enum ValueType { /// 0x0 @@ -93,21 +94,21 @@ impl Default for ParsedInternalKey { impl ParsedInternalKey { - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { Slice::default() } /// Return the length of the encoding of "key". - fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { + pub fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { key.user_key.size() + 8 } // 将 self 的数据追加到 result 中 - fn append_internal_key(&self, result: Slice) { + pub fn append_internal_key(&self, result: Slice) { todo!() } - fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + pub fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { Self { user_key, sequence, @@ -118,13 +119,13 @@ impl ParsedInternalKey { /// Attempt to parse an internal key from "internal_key". On success, /// stores the parsed data in "*result", and returns true. /// On error, returns false, leaves "*result" in an undefined state. - fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { + pub fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { // line 173 todo!() } /// Returns the user key portion of an internal key. - fn extract_user_key(internal_key : Slice) -> Slice { + pub fn extract_user_key(internal_key : Slice) -> Slice { todo!() } } @@ -147,7 +148,7 @@ impl PartialEq for InternalKey { } impl InternalKey { - fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + pub fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { // line 145 let result: Slice = Slice::default(); ParsedInternalKey::new(user_key, sequence, value_type) @@ -171,7 +172,7 @@ impl InternalKey { /// ``` /// /// ``` - fn decode_from(&self, input: Slice) { + pub fn decode_from(&self, input: &UnsafeSlice) { todo!() // wangbo @@ -179,7 +180,7 @@ impl InternalKey { } /// 输出 InternalKey 调试信息 - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { // line 164 todo!() } @@ -193,16 +194,16 @@ impl InternalKey { self.rep_.size() } - fn user_key(self) -> Slice { + pub fn user_key(self) -> Slice { ParsedInternalKey::extract_user_key(self.rep_) } - fn set_from(self, p: ParsedInternalKey) { + pub fn set_from(self, p: ParsedInternalKey) { // self.rep_.clear(); p.append_internal_key(self.rep_); } - fn clear(self) { + pub fn clear(self) { // self.rep_.clear(); } } @@ -255,7 +256,7 @@ impl Comparator for InternalKeyComparator { impl LookupKey { /// Initialize *this for looking up user_key at a snapshot with /// the specified sequence number. - fn new(user_key: Slice, sequence: usize) -> Self { + pub fn new(user_key: Slice, sequence: usize) -> Self { let user_key_size = user_key.size(); let need = user_key_size + 13; // A conservative estimate let mut data = Vec::with_capacity(need); @@ -278,12 +279,12 @@ impl LookupKey { } /// Return a key suitable for lookup in a MemTable. - fn mem_table_key(&self) -> Slice { + pub fn mem_table_key(&self) -> Slice { self.data.clone() } /// Return an internal key (suitable for passing to an internal iterator) - fn internal_key(&self) -> Slice { + pub fn internal_key(&self) -> Slice { // line 204 let buf = self.data.as_ref(); let internal_key_buf = &buf[self.user_key_start..]; @@ -291,7 +292,7 @@ impl LookupKey { } /// Return the user key - fn user_key(&self) -> Slice { + pub fn user_key(&self) -> Slice { // line 207 todo!() } diff --git a/src/db/file_meta_data.rs b/src/db/file_meta_data.rs index 7c9e3b6..b04f6d4 100644 --- a/src/db/file_meta_data.rs +++ b/src/db/file_meta_data.rs @@ -77,6 +77,10 @@ impl FileMetaData { self.file_size } + pub fn set_file_size(&mut self, file_size: u64) { + self.file_size = file_size; + } + /// Smallest internal key served by table pub fn get_smallest(&self) -> &InternalKey { &self.smallest diff --git a/src/db/filename.rs b/src/db/filename.rs new file mode 100644 index 0000000..fb1db13 --- /dev/null +++ b/src/db/filename.rs @@ -0,0 +1,11 @@ +use crate::util::slice::Slice; + +pub struct FileName { + +} + +impl FileName { + pub fn table_file_name(dbname: &Slice, number : u64) -> Slice{ + todo!() + } +} \ No newline at end of file diff --git a/src/db/filename_test.rs b/src/db/filename_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/db/mod.rs b/src/db/mod.rs index 24a37d4..0d48432 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -19,6 +19,10 @@ pub mod version_set; mod version_set_test; pub mod version_edit; mod version_edit_test; +pub mod builder; +mod builder_test; +pub mod filename; +mod filename_test; /// 默认调表 pub type DefaultSkipList = SkipList; diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index a043adc..ad8b5c3 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -441,6 +441,10 @@ impl DataIterator for Iter { } } + fn has_next(&self) -> bool { + todo!() + } + fn pre(&mut self) { todo!() } diff --git a/src/db/version_set.rs b/src/db/version_set.rs index 0a58d3a..6175203 100644 --- a/src/db/version_set.rs +++ b/src/db/version_set.rs @@ -6,7 +6,8 @@ use crate::db::table_cache::TableCache; use crate::db::version_edit::VersionEdit; use crate::traits::comparator_trait::Comparator; use crate::util::cache::Cache; -use crate::util::options::{Env, Options, ReadOptions}; +use crate::util::env::Env; +use crate::util::options::{Options, ReadOptions}; use crate::util::slice::Slice; use crate::util::Result; diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs index a58922a..2d6467d 100644 --- a/src/table/block_builder.rs +++ b/src/table/block_builder.rs @@ -1,3 +1,4 @@ +use crate::util::options::Options; use crate::util::slice::Slice; use crate::util::Result; @@ -5,6 +6,10 @@ use crate::util::Result; pub struct BlockBuilder {} impl BlockBuilder { + pub fn new(options: &Options) -> Self { + todo!() + } + /// 添加数据到block /// /// # Arguments diff --git a/src/table/mod.rs b/src/table/mod.rs index f928426..cc22b85 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -4,6 +4,8 @@ pub mod filter_block; mod filter_block_test; pub mod format; mod format_test; -pub(crate) mod ss_table; +pub mod ss_table; mod ss_table_test; -mod iterator_wrapper; \ No newline at end of file +pub mod iterator_wrapper; +pub mod table_builder; +mod table_builder_test; \ No newline at end of file diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs new file mode 100644 index 0000000..bd74361 --- /dev/null +++ b/src/table/table_builder.rs @@ -0,0 +1,59 @@ +use std::borrow::Borrow; +use std::sync::Arc; +use crate::table::block_builder::BlockBuilder; +use crate::table::filter_block::FilterBlockBuilder; +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::env::WritableFile; +use crate::util::options::Options; +use crate::util::slice::Slice; +use crate::util::status::Status; + +pub struct TableBuilder { + rep: Rep +} + +struct Rep { + // options: Box, + // index_block_options: Options, + file: Arc, + offset: u64, + status: Status, + // data_block: BlockBuilder, + // index_block: BlockBuilder, + last_key: Slice, + num_entries: u64, + // Either Finish() or Abandon() has been called. + closed: bool, +} + +impl TableBuilder { + pub fn new_with_writable_file(options: &Options, writableFile: Arc) -> Self { + let rep = Rep::new(options, writableFile); + + // Self { + // rep + // } + + todo!() + } + + pub fn add(&self, key: &Slice, value: &Slice) { + todo!() + } +} + +impl Rep { + pub fn new(opt: &Options, writableFile: Arc) -> Self { + Self { + // options: Box::new(*opt), + file: writableFile, + offset: 0, + // todo default Status::OK + status: Status::default(), + // data_block: BlockBuilder::new(&opt), + last_key: Default::default(), + num_entries: 0, + closed: false, + } + } +} \ No newline at end of file diff --git a/src/table/table_builder_test.rs b/src/table/table_builder_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/traits/iterator.rs b/src/traits/iterator.rs index 164fcc5..41a331a 100644 --- a/src/traits/iterator.rs +++ b/src/traits/iterator.rs @@ -1,6 +1,8 @@ use crate::util::slice::Slice; use crate::util::unsafe_slice::UnsafeSlice; +/// +/// Iterator 迭代器定义 pub trait DataIterator { /// 检查当前位置是否有效 /// @@ -68,6 +70,10 @@ pub trait DataIterator { /// /// ``` fn next(&mut self); + + /// 是否存在下一个元素 + fn has_next(&self) -> bool; + /// 定位到上一个元素 /// /// # Arguments @@ -81,6 +87,7 @@ pub trait DataIterator { /// /// ``` fn pre(&mut self); + /// 获取key值 /// /// # Arguments @@ -93,7 +100,9 @@ pub trait DataIterator { /// ``` /// /// ``` + /// todo UnsafeSlice 与 Slice 应该存在一个共同traits或者struct 便于API操作 fn key(&self) -> UnsafeSlice; + /// 获取value值 /// /// # Arguments diff --git a/src/util/env.rs b/src/util/env.rs new file mode 100644 index 0000000..dbeb438 --- /dev/null +++ b/src/util/env.rs @@ -0,0 +1,13 @@ + +use crate::util::Result; +use crate::util::slice::Slice; + +pub struct Env {} + +pub struct WritableFile {} + +impl Env { + pub fn new_writable_file(&self, fname: &Slice) -> Result { + todo!() + } +} \ No newline at end of file diff --git a/src/util/env_test.rs b/src/util/env_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/util/mod.rs b/src/util/mod.rs index b24bec5..c2869aa 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -41,6 +41,8 @@ pub mod debug; pub mod linked_list; mod linked_list_test; pub mod unsafe_slice; +pub mod env; +mod env_test; /// 定义别名 pub type Result = result::Result; diff --git a/src/util/options.rs b/src/util/options.rs index fffba75..4764b88 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -4,15 +4,13 @@ use crate::db::db_format::InternalKeyComparator; use crate::traits::comparator_trait::Comparator; use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::comparator::BytewiseComparatorImpl; +use crate::util::env::Env; pub enum CompressionType { NoCompression, SnappyCompression } -/// TODO temp -pub struct Env {} - pub struct Cache {} // 使用如下定义(后续路径会重构) -- Gitee From 21942c95b51077165283fbbefd3615201aeee20e Mon Sep 17 00:00:00 2001 From: fengyang Date: Fri, 31 Mar 2023 19:43:30 +0800 Subject: [PATCH 39/50] =?UTF-8?q?SST=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/builder.rs | 59 ++++++++++++++++++++++++++++++++++---- src/db/builder_test.rs | 9 ++++++ src/table/table_builder.rs | 46 +++++++++++++++++++++++++---- src/util/env.rs | 6 ++-- 4 files changed, 105 insertions(+), 15 deletions(-) diff --git a/src/db/builder.rs b/src/db/builder.rs index 2793bc0..7ec8840 100644 --- a/src/db/builder.rs +++ b/src/db/builder.rs @@ -1,3 +1,6 @@ +use std::error::Error; +use std::fs::File; +use std::io; use std::ops::Deref; use std::sync::Arc; use crate::db::file_meta_data::FileMetaData; @@ -5,7 +8,7 @@ use crate::db::filename::FileName; use crate::db::table_cache::TableCache; use crate::table::table_builder::TableBuilder; use crate::traits::DataIterator; -use crate::util::env::{Env, WritableFile}; +use crate::util::env::Env; use crate::util::options::{Options}; use crate::util::Result; use crate::util::slice::Slice; @@ -43,30 +46,76 @@ impl BuildTable { table_cache: &TableCache, mut iter: Box, meta: &mut FileMetaData) -> Result { meta.set_file_size(0); + // 迭代器移动到第一个节点 iter.seek_to_first(); - + // 生成一个 SSTable 文件名 let file_name = FileName::table_file_name(dbname, meta.get_number()); if iter.valid() { - let fileRS: Result = env.new_writable_file(&file_name); + let fileRS: Result = env.new_writable_file(&file_name); if(!fileRS.is_ok()){ return Err(fileRS.err().unwrap()); } let writableFile = Arc::new(fileRS.unwrap()); + // 生成一个 TableBuilder let builder: TableBuilder = TableBuilder::new_with_writable_file(options, writableFile); meta.get_smallest().decode_from(&iter.key()); + // todo 逻辑 check + // 调用迭代器,依次将每个键-值对加入 TableBuilder while iter.valid() && iter.has_next(){ iter.next(); let key = iter.key(); meta.get_largest().decode_from(&key); - // builder.add(key, iter.value()); + builder.add(&key, &iter.value()); + } + + // Finish and check for builder errors + // 调用 TableBuilder 的 Finish 函数生成 SSTable 文件 + let mut s : Status = builder.finish(); + if s.is_ok() { + meta.set_file_size(builder.get_file_size()); + assert!(meta.get_file_size() > 0); } - } + // Finish and check for file errors + // 将文件刷新到磁盘 + if s.is_ok() { + let rs:io::Result<()> = writableFile.sync_data(); + if rs.is_ok() { + s = Status::default(); + }else{ + s = Status::wrapper_str(LevelError::KIOError, rs.unwrap_err().to_string().as_str()); + } + } + // // 关闭文件 + // if s.is_ok() { + // writableFile.close + // } + + if s.is_ok() { +// // Verify that the table is usable +// Iterator* it = table_cache->NewIterator(ReadOptions(), +// meta->number, +// meta->file_size); +// s = it->status(); +// delete it; + } + } // if end +// // Check for input iterator errors +// if (!iter->status().ok()) { +// s = iter->status(); +// } +// +// if (s.ok() && meta->file_size > 0) { +// // Keep it +// } else { +// env->DeleteFile(fname); +// } +// return s; Err(Status::wrapper_str(LevelError::KBadRecord, "a")) } } \ No newline at end of file diff --git a/src/db/builder_test.rs b/src/db/builder_test.rs index e69de29..44c68c4 100644 --- a/src/db/builder_test.rs +++ b/src/db/builder_test.rs @@ -0,0 +1,9 @@ + +mod test { + #[test] + fn test_() { + + println!("get_name: {}", "a"); + + } +} \ No newline at end of file diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs index bd74361..1c89579 100644 --- a/src/table/table_builder.rs +++ b/src/table/table_builder.rs @@ -1,12 +1,14 @@ use std::borrow::Borrow; +use std::fs::File; use std::sync::Arc; use crate::table::block_builder::BlockBuilder; use crate::table::filter_block::FilterBlockBuilder; +use crate::table::format::BlockHandle; use crate::traits::filter_policy_trait::FilterPolicy; -use crate::util::env::WritableFile; -use crate::util::options::Options; +use crate::util::options::{CompressionType, Options}; use crate::util::slice::Slice; use crate::util::status::Status; +use crate::util::unsafe_slice::UnsafeSlice; pub struct TableBuilder { rep: Rep @@ -15,7 +17,7 @@ pub struct TableBuilder { struct Rep { // options: Box, // index_block_options: Options, - file: Arc, + file: Arc, offset: u64, status: Status, // data_block: BlockBuilder, @@ -27,7 +29,7 @@ struct Rep { } impl TableBuilder { - pub fn new_with_writable_file(options: &Options, writableFile: Arc) -> Self { + pub fn new_with_writable_file(options: &Options, writableFile: Arc) -> Self { let rep = Rep::new(options, writableFile); // Self { @@ -37,13 +39,45 @@ impl TableBuilder { todo!() } - pub fn add(&self, key: &Slice, value: &Slice) { + pub fn add(&self, key: &UnsafeSlice, value: &UnsafeSlice) { + todo!() + } + + pub fn flush(&self) { + todo!() + } + + pub fn write_block(&self, block: &BlockBuilder, handler: &BlockHandle) { + todo!() + } + + pub fn write_raw_block(&self, block_contents: &UnsafeSlice, compression_type: CompressionType, handler: &BlockHandle) { + todo!() + } + + pub fn status(&self) -> Status { + todo!() + } + + pub fn finish(&self) -> Status { + todo!() + } + + pub fn abandon(&self) { + todo!() + } + + pub fn get_num_entries(&self) -> u64 { + todo!() + } + + pub fn get_file_size(&self) -> u64 { todo!() } } impl Rep { - pub fn new(opt: &Options, writableFile: Arc) -> Self { + pub fn new(opt: &Options, writableFile: Arc) -> Self { Self { // options: Box::new(*opt), file: writableFile, diff --git a/src/util/env.rs b/src/util/env.rs index dbeb438..8bdb966 100644 --- a/src/util/env.rs +++ b/src/util/env.rs @@ -1,13 +1,11 @@ - +use std::fs::File; use crate::util::Result; use crate::util::slice::Slice; pub struct Env {} -pub struct WritableFile {} - impl Env { - pub fn new_writable_file(&self, fname: &Slice) -> Result { + pub fn new_writable_file(&self, fname: &Slice) -> Result { todo!() } } \ No newline at end of file -- Gitee From 15a6e97e86ec5e60c897b4a9f4e8a963c6a2634d Mon Sep 17 00:00:00 2001 From: fengyang Date: Fri, 31 Mar 2023 19:44:07 +0800 Subject: [PATCH 40/50] =?UTF-8?q?SST=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/builder.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/db/builder.rs b/src/db/builder.rs index 7ec8840..d6d9491 100644 --- a/src/db/builder.rs +++ b/src/db/builder.rs @@ -80,16 +80,16 @@ impl BuildTable { assert!(meta.get_file_size() > 0); } - // Finish and check for file errors - // 将文件刷新到磁盘 - if s.is_ok() { - let rs:io::Result<()> = writableFile.sync_data(); - if rs.is_ok() { - s = Status::default(); - }else{ - s = Status::wrapper_str(LevelError::KIOError, rs.unwrap_err().to_string().as_str()); - } - } + // // Finish and check for file errors + // // 将文件刷新到磁盘 + // if s.is_ok() { + // let rs:io::Result<()> = writableFile.sync_data(); + // if rs.is_ok() { + // s = Status::default(); + // }else{ + // s = Status::wrapper_str(LevelError::KIOError, rs.unwrap_err().to_string().as_str()); + // } + // } // // 关闭文件 // if s.is_ok() { // writableFile.close -- Gitee From dd389d6727d4cca92a147059d196d4b241f3bf32 Mon Sep 17 00:00:00 2001 From: fengyang Date: Sat, 1 Apr 2023 09:55:47 +0800 Subject: [PATCH 41/50] doc --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b76735c..6f6197b 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | 功能模块 | 完成人 | 进度 | |----------------------------------------------------------------------------------|----------------------|------| | util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | -| util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | +| util.ENV(SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | | util.Logger/Log日志库 | peach | 50% | | table.format(Footer, BlockHandle) | 半支烟 | 20% | | db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | @@ -88,7 +88,7 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | IteratorWrapper | kazeseiriou | 0% | | db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | | db.Builder | fengyang | 20% | -| table.Block | fengyang | 80% | +| table.Block | fengyang | 30% | | table.BlockBuilder, table.FilterBlockBuilder | | | | FilterBlock, FilterBlockReader | fengyang | 80% | | SSTable | fengyang | 0% | @@ -102,9 +102,8 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | | db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | | WriteBatch | tzcyujunyong,wangboo | 50% | -| table.table_builder | | | -| filename | | | -| env | | | +| table.table_builder | | 30% | +| db.filename | | | | | 半支烟 | 40% | -- Gitee From 51d8b6fdfabd132016dd7f7558fe31fe3c999991 Mon Sep 17 00:00:00 2001 From: fengyang Date: Sat, 1 Apr 2023 10:11:00 +0800 Subject: [PATCH 42/50] =?UTF-8?q?table=20build=20=E5=BC=80=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/builder.rs | 24 ++++++++++++------------ src/db/builder_test.rs | 10 +++++++++- src/db/skip_list.rs | 4 ---- src/db/table_cache.rs | 5 +++++ src/traits/iterator.rs | 3 --- 5 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/db/builder.rs b/src/db/builder.rs index d6d9491..3814e10 100644 --- a/src/db/builder.rs +++ b/src/db/builder.rs @@ -64,7 +64,7 @@ impl BuildTable { meta.get_smallest().decode_from(&iter.key()); // todo 逻辑 check // 调用迭代器,依次将每个键-值对加入 TableBuilder - while iter.valid() && iter.has_next(){ + while iter.valid() { iter.next(); let key = iter.key(); @@ -80,17 +80,17 @@ impl BuildTable { assert!(meta.get_file_size() > 0); } - // // Finish and check for file errors - // // 将文件刷新到磁盘 - // if s.is_ok() { - // let rs:io::Result<()> = writableFile.sync_data(); - // if rs.is_ok() { - // s = Status::default(); - // }else{ - // s = Status::wrapper_str(LevelError::KIOError, rs.unwrap_err().to_string().as_str()); - // } - // } - // // 关闭文件 + // Finish and check for file errors + // 将文件刷新到磁盘 + if s.is_ok() { + let rs:io::Result<()> = writableFile.sync_data(); + if rs.is_ok() { + s = Status::default(); + }else{ + s = Status::wrapper_str(LevelError::KIOError, rs.unwrap_err().to_string().as_str()); + } + } + // 关闭文件 // if s.is_ok() { // writableFile.close // } diff --git a/src/db/builder_test.rs b/src/db/builder_test.rs index 44c68c4..7f1746a 100644 --- a/src/db/builder_test.rs +++ b/src/db/builder_test.rs @@ -1,8 +1,16 @@ mod test { + use crate::db::builder::BuildTable; + use crate::db::table_cache::TableCache; + use crate::util::options::Options; + use crate::util::slice::Slice; + #[test] - fn test_() { + fn test_build_table() { + // BuildTable::build_table(&Slice::from("a"), Options::default(), + // TableCache::new(), + // Box::new()); println!("get_name: {}", "a"); } diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 34679ee..4d13c12 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -470,10 +470,6 @@ impl DataIterator for Iter { } } - fn has_next(&self) -> bool { - todo!() - } - fn pre(&mut self) { todo!() } diff --git a/src/db/table_cache.rs b/src/db/table_cache.rs index ad6079b..458eb13 100644 --- a/src/db/table_cache.rs +++ b/src/db/table_cache.rs @@ -11,6 +11,11 @@ struct Table {} pub struct TableCache {} impl TableCache { + pub fn new() -> Self { + Self { + + } + } /// 从缓存中获取Table /// /// # Arguments diff --git a/src/traits/iterator.rs b/src/traits/iterator.rs index 41a331a..2ffaad4 100644 --- a/src/traits/iterator.rs +++ b/src/traits/iterator.rs @@ -71,9 +71,6 @@ pub trait DataIterator { /// ``` fn next(&mut self); - /// 是否存在下一个元素 - fn has_next(&self) -> bool; - /// 定位到上一个元素 /// /// # Arguments -- Gitee From c309e0077165fa03f959cff9a7a87f2be72385ed Mon Sep 17 00:00:00 2001 From: fengyang Date: Mon, 3 Apr 2023 20:21:56 +0800 Subject: [PATCH 43/50] =?UTF-8?q?BlockBuilder=20=E5=BC=80=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 8 +++-- src/db/builder.rs | 54 ++++++++++++++++++++------------- src/db/skip_list.rs | 4 +++ src/db/table_cache.rs | 12 ++++---- src/table/block_builder.rs | 32 +++++++++++++++++++- src/table/filter_block.rs | 38 ++++++++++++----------- src/table/filter_block_test.rs | 6 ++-- src/table/mod.rs | 4 ++- src/table/table.rs | 25 ++++++++++++++++ src/table/table_builder.rs | 55 +++++++++++++++++++++++++++------- src/table/table_test.rs | 0 src/traits/iterator.rs | 3 ++ 12 files changed, 177 insertions(+), 64 deletions(-) create mode 100644 src/table/table.rs create mode 100644 src/table/table_test.rs diff --git a/README.md b/README.md index 6f6197b..f05c752 100644 --- a/README.md +++ b/README.md @@ -87,9 +87,11 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | table.Iterator(Block::Iter) | fengyang | 0% | | IteratorWrapper | kazeseiriou | 0% | | db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | -| db.Builder | fengyang | 20% | +| db.Builder | fengyang | 85% | +| table.table_builder | fengyang | 30% | | table.Block | fengyang | 30% | -| table.BlockBuilder, table.FilterBlockBuilder | | | +| table.BlockBuilder | fengyang | 30% | +| table.FilterBlockBuilder | fengyang | 30% | | FilterBlock, FilterBlockReader | fengyang | 80% | | SSTable | fengyang | 0% | | table.Table | peach,tzcyujunyong | | @@ -102,7 +104,7 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | | db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | | WriteBatch | tzcyujunyong,wangboo | 50% | -| table.table_builder | | 30% | +| table.table | | 30% | | db.filename | | | | | 半支烟 | 40% | diff --git a/src/db/builder.rs b/src/db/builder.rs index 3814e10..9140e5e 100644 --- a/src/db/builder.rs +++ b/src/db/builder.rs @@ -6,10 +6,11 @@ use std::sync::Arc; use crate::db::file_meta_data::FileMetaData; use crate::db::filename::FileName; use crate::db::table_cache::TableCache; +use crate::table::table::Table; use crate::table::table_builder::TableBuilder; use crate::traits::DataIterator; use crate::util::env::Env; -use crate::util::options::{Options}; +use crate::util::options::{Options, ReadOptions}; use crate::util::Result; use crate::util::slice::Slice; use crate::util::status::{LevelError, Status}; @@ -51,6 +52,8 @@ impl BuildTable { // 生成一个 SSTable 文件名 let file_name = FileName::table_file_name(dbname, meta.get_number()); + let mut s : Status = Status::default(); + if iter.valid() { let fileRS: Result = env.new_writable_file(&file_name); if(!fileRS.is_ok()){ @@ -62,7 +65,7 @@ impl BuildTable { let builder: TableBuilder = TableBuilder::new_with_writable_file(options, writableFile); meta.get_smallest().decode_from(&iter.key()); - // todo 逻辑 check + // 调用迭代器,依次将每个键-值对加入 TableBuilder while iter.valid() { iter.next(); @@ -74,7 +77,7 @@ impl BuildTable { // Finish and check for builder errors // 调用 TableBuilder 的 Finish 函数生成 SSTable 文件 - let mut s : Status = builder.finish(); + s = builder.finish(); if s.is_ok() { meta.set_file_size(builder.get_file_size()); assert!(meta.get_file_size() > 0); @@ -96,26 +99,35 @@ impl BuildTable { // } if s.is_ok() { -// // Verify that the table is usable -// Iterator* it = table_cache->NewIterator(ReadOptions(), -// meta->number, -// meta->file_size); -// s = it->status(); -// delete it; + let readOptions = ReadOptions::default(); + // Verify that the table is usable + let it: Box = table_cache.new_iterator(&readOptions, + meta.get_number(), + meta.get_file_size() as usize, + &Table::new()) + .expect("table_cache.new_iterator error"); + s = it.status(); } } // if end -// // Check for input iterator errors -// if (!iter->status().ok()) { -// s = iter->status(); -// } -// -// if (s.ok() && meta->file_size > 0) { -// // Keep it -// } else { -// env->DeleteFile(fname); -// } -// return s; - Err(Status::wrapper_str(LevelError::KBadRecord, "a")) + // Check for input iterator errors + if !iter.status().is_ok() { + s = iter.status(); + } + + if s.is_ok() && meta.get_file_size() > 0 { + // Keep it + } else { + // DeleteFile fname + // todo + } + + if s.is_ok() { + // todo + // return Ok(meta); + return Ok(FileMetaData::default()); + }else{ + return Err(s); + } } } \ No newline at end of file diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 4d13c12..a1643db 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -484,4 +484,8 @@ impl DataIterator for Iter { fn value(&self) -> UnsafeSlice { todo!() } + + fn status(&self) -> Status { + todo!() + } } \ No newline at end of file diff --git a/src/db/table_cache.rs b/src/db/table_cache.rs index 458eb13..5824617 100644 --- a/src/db/table_cache.rs +++ b/src/db/table_cache.rs @@ -1,13 +1,11 @@ +use crate::table::table::Table; use crate::traits::DataIterator; +use crate::util::options::ReadOptions; use crate::util::slice::Slice; use crate::util::Result; struct Saver {} -struct ReadOptions {} - -struct Table {} - pub struct TableCache {} impl TableCache { @@ -33,7 +31,7 @@ impl TableCache { /// ``` /// /// ``` - fn get(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _k: &Slice, _arg: &mut Saver, _handle_result: F) + pub fn get(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _k: &Slice, _arg: &mut Saver, _handle_result: F) where F: FnMut(&mut Saver, &Slice, &Slice) -> Result<()> { () } @@ -50,7 +48,7 @@ impl TableCache { /// ``` /// /// ``` - fn evict(&mut self, _file_number: u64) { + pub fn evict(&mut self, _file_number: u64) { todo!() } @@ -70,7 +68,7 @@ impl TableCache { /// ``` /// /// ``` - fn new_iterator(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _table: &Table) -> Result> { + pub fn new_iterator(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _table: &Table) -> Result> { todo!() } } \ No newline at end of file diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs index 2d6467d..3ca4eec 100644 --- a/src/table/block_builder.rs +++ b/src/table/block_builder.rs @@ -1,9 +1,34 @@ +use std::fs::File; +use std::sync::Arc; use crate::util::options::Options; use crate::util::slice::Slice; use crate::util::Result; +use crate::util::status::Status; -pub struct BlockBuilder {} +// 智能指针 Rc, 引用计数器,用来记录一个值是否被使用,如果计数为零可清除。 +// 适用于堆中数据需要被程序多部分使用,但编译时不能确定谁最后完成。 + +// Arc 是一种能够使得数据在线程间安全共享的智能指针. +// Arc会追踪这个指针的所有拷贝,当最后一份拷贝离开作用域时,它就会安全释放内存。 + +// 智能指针 Box。 box 允许你将一个值放在堆上而不是栈上。留在栈上的则是指向堆数据的指针。 +pub struct BlockBuilder { + // 在 BlockBuilder 初始化时,指定的配置项 + options: Box, + index_block_options: Box, + + // SSTable 生成后的文件 + file: Arc, + + offset: u64, + status: Status, + + // 生成 SSTable 中的数据区域 + data_block: Arc, + // 生成 SSTable 中的数据索引区域 + index_block: Arc, +} impl BlockBuilder { pub fn new(options: &Options) -> Self { @@ -27,6 +52,7 @@ impl BlockBuilder { pub fn add(&mut self, _key: &Slice, _value: &Slice) { todo!() } + /// 重置builder /// /// # Examples @@ -37,6 +63,7 @@ impl BlockBuilder { pub fn reset(&mut self) { todo!() } + /// 构造block /// /// @@ -48,6 +75,7 @@ impl BlockBuilder { pub fn finish(&mut self) -> Result { todo!() } + /// 判断builder是否为空 /// /// # Examples @@ -58,6 +86,7 @@ impl BlockBuilder { pub fn empty(&self) -> bool { todo!() } + /// 估算当前的block大小 /// /// # Examples @@ -68,4 +97,5 @@ impl BlockBuilder { pub fn current_size_estimate(&self) -> usize { todo!() } + } \ No newline at end of file diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 183c678..625f2c9 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -14,7 +14,9 @@ const FILTER_BASE: usize = 1 << FILTER_BASE_LG; /// /// meta block 构建器 /// -pub trait FilterBlock { +pub trait FilterBlock { + #[inline] + fn new_with_policy(policy: Box) -> Self; /// /// 构造一个 FilterBlockBuilder @@ -29,14 +31,10 @@ pub trait FilterBlock { /// # Examples /// /// ``` - /// use std::sync::Arc; - /// use level_db_rust::util::filter_policy::BloomFilterPolicy; /// - /// let policy = Arc::new(BloomFilterPolicy::new(2)); - /// let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); /// ``` #[inline] - fn new_with_policy(policy: Arc, capacity: usize) -> Self; + fn new_with_policy_capacity(policy: Box, capacity: usize) -> Self; /// 设置block的起始位置 /// @@ -80,7 +78,7 @@ pub trait FilterBlock { /// ``` fn finish(&mut self) -> Result; - fn get_policy(&self) -> Box<&FP>; + fn get_policy(&self) -> Box<&dyn FilterPolicy>; fn get_keys(&self) -> Vec; @@ -94,8 +92,8 @@ pub trait FilterBlock { } /// SSTable 文件里面的 meta block 构建器, 按内存里面指定的格式整理在内存中 -pub struct FilterBlockBuilder { - policy: Arc, +pub struct FilterBlockBuilder { + policy: Box, // Flattened key contents keys: Vec, // Starting index in keys_ of each key @@ -107,8 +105,8 @@ pub struct FilterBlockBuilder { filter_offsets: Vec, } -pub struct FilterBlockReader { - policy: Arc, +pub struct FilterBlockReader { + policy: Box, // Pointer to filter data (at block-start) data: Vec, // Pointer to beginning of offset array (at block-end) @@ -119,8 +117,12 @@ pub struct FilterBlockReader { base_lg: usize } -impl FilterBlock for FilterBlockBuilder { - fn new_with_policy(policy: Arc, capacity: usize) -> Self { +impl FilterBlock for FilterBlockBuilder { + fn new_with_policy(policy: Box) -> Self { + FilterBlock::new_with_policy_capacity(policy, 64) + } + + fn new_with_policy_capacity(policy: Box, capacity: usize) -> Self { let keys:Vec = Vec::with_capacity(capacity); let start:Vec = Vec::with_capacity(capacity); let result:Vec = Vec::with_capacity(capacity); @@ -185,7 +187,7 @@ impl FilterBlock for FilterBlockBuilder { Ok(Slice::from_buf(&self.result)) } - fn get_policy(&self) -> Box<&FP> { + fn get_policy(&self) -> Box<&dyn FilterPolicy> { Box::new(self.policy.as_ref()) } @@ -210,7 +212,7 @@ impl FilterBlock for FilterBlockBuilder { } } -impl FilterBlockBuilder { +impl FilterBlockBuilder { fn generate_filter(&mut self) { let num_keys = self.start.len(); @@ -254,8 +256,8 @@ impl FilterBlockBuilder { } } -impl FilterBlockReader { - pub fn new_with_policy(policy: Arc, contents: Slice) -> Self { +impl FilterBlockReader { + pub fn new_with_policy(policy: Box, contents: Slice) -> Self { let data = Vec::new(); let offset = Vec::new(); @@ -290,7 +292,7 @@ impl FilterBlockReader { todo!() } - pub fn get_policy(&self) -> Box<&FP> { + pub fn get_policy(&self) -> Box<&dyn FilterPolicy> { Box::new(self.policy.as_ref()) } diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs index 0e57a1a..9582b2a 100644 --- a/src/table/filter_block_test.rs +++ b/src/table/filter_block_test.rs @@ -113,7 +113,7 @@ mod test { #[test] fn test_filter_block_new_with_policy() { - let policy = Arc::new(TestHashFilter::new()); + let policy = Box::new(TestHashFilter::new()); let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy, 10); @@ -129,7 +129,7 @@ mod test { #[test] fn test_filter_block_reader_new_with_policy_empty_content() { - let policy = Arc::new(TestHashFilter::new()); + let policy = Box::new(TestHashFilter::new()); let contents = Slice::default(); let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); @@ -145,7 +145,7 @@ mod test { #[test] fn test_filter_block_new_with_policy_and_addkey() { - let policy = Arc::new(TestHashFilter::new()); + let policy = Box::new(TestHashFilter::new()); let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy( policy, 10); diff --git a/src/table/mod.rs b/src/table/mod.rs index cc22b85..f4e0a94 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -8,4 +8,6 @@ pub mod ss_table; mod ss_table_test; pub mod iterator_wrapper; pub mod table_builder; -mod table_builder_test; \ No newline at end of file +mod table_builder_test; +pub mod table; +mod table_test; \ No newline at end of file diff --git a/src/table/table.rs b/src/table/table.rs new file mode 100644 index 0000000..65db038 --- /dev/null +++ b/src/table/table.rs @@ -0,0 +1,25 @@ +use std::fs::File; +use std::sync::Arc; +use crate::util::options::Options; +use crate::util::Result; +use crate::util::status::Status; + +pub struct Table { + rep: Rep +} + +struct Rep { + options: Box, + status: Status, + file: Arc, +} + +impl Table { + pub fn new() -> Self{ + todo!() + } + + pub fn open(&self, options:&Options, randomAccessFile:&File, file_size: u64, table:&Table) -> Result{ + todo!() + } +} \ No newline at end of file diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs index 1c89579..4615fdc 100644 --- a/src/table/table_builder.rs +++ b/src/table/table_builder.rs @@ -2,7 +2,7 @@ use std::borrow::Borrow; use std::fs::File; use std::sync::Arc; use crate::table::block_builder::BlockBuilder; -use crate::table::filter_block::FilterBlockBuilder; +use crate::table::filter_block::{FilterBlock, FilterBlockBuilder}; use crate::table::format::BlockHandle; use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::options::{CompressionType, Options}; @@ -11,21 +11,39 @@ use crate::util::status::Status; use crate::util::unsafe_slice::UnsafeSlice; pub struct TableBuilder { - rep: Rep + rep: Box } -struct Rep { - // options: Box, - // index_block_options: Options, +/// TableBuilder Rep 结构体, 内部使用 +struct Rep<> { + options: Box, + index_block_options: Box, + + // SSTable 生成后的文件 file: Arc, + offset: u64, status: Status, - // data_block: BlockBuilder, - // index_block: BlockBuilder, + + // 生成 SSTable 中的数据区域 + data_block: BlockBuilder, + // 生成 SSTable 中的数据索引区域 + index_block: BlockBuilder, + last_key: Slice, num_entries: u64, // Either Finish() or Abandon() has been called. closed: bool, + + // 生成 SSTable 中的元数据区域 + filter_block: Option, + // 判断是否需要生成 SSTable中的数据索引, SSTable中每次生成一个完整的块之后,需要将该值置为 true, 说明需要为该块添加索引 + pending_index_entry: bool, + // Handle to add to index block + // pending_handle 记录需要生成数据索引的数据块在 SSTable 中的偏移量和大小 + pending_handle: BlockHandle, + + compressed_output: Slice, } impl TableBuilder { @@ -78,16 +96,33 @@ impl TableBuilder { impl Rep { pub fn new(opt: &Options, writableFile: Arc) -> Self { + // todo 如何赋值? Box::new(opt) + let options = Box::new(Default::default()); + let index_block_options = Box::new(Default::default()); + + let mut filter_block: Option; + if opt.filter_policy.is_none() { + filter_block = None; + }else { + filter_block = Some(FilterBlockBuilder::new_with_policy(opt.filter_policy.unwrap())); + } + Self { - // options: Box::new(*opt), + options, + index_block_options, file: writableFile, offset: 0, - // todo default Status::OK + // default Status::OK status: Status::default(), - // data_block: BlockBuilder::new(&opt), + data_block: BlockBuilder::new(&options.as_ref()), + index_block: BlockBuilder::new(&index_block_options.as_ref()), last_key: Default::default(), num_entries: 0, closed: false, + filter_block, + pending_index_entry: false, + pending_handle: Default::default(), + compressed_output: Default::default(), } } } \ No newline at end of file diff --git a/src/table/table_test.rs b/src/table/table_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/traits/iterator.rs b/src/traits/iterator.rs index 2ffaad4..69bb7c1 100644 --- a/src/traits/iterator.rs +++ b/src/traits/iterator.rs @@ -1,4 +1,5 @@ use crate::util::slice::Slice; +use crate::util::status::Status; use crate::util::unsafe_slice::UnsafeSlice; /// @@ -114,4 +115,6 @@ pub trait DataIterator { /// ``` fn value(&self) -> UnsafeSlice; + fn status(&self) -> Status; + } -- Gitee From 95aa9599aa68da4cf1a92a65aae180c4278e278c Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Mon, 3 Apr 2023 20:32:10 +0800 Subject: [PATCH 44/50] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20box=20syntax?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/log_wr_test.rs | 4 +- src/db/skip_list.rs | 12 +-- src/db/skip_list_test.rs | 217 +++++++++++++++++++-------------------- src/lib.rs | 2 - 4 files changed, 116 insertions(+), 119 deletions(-) diff --git a/src/db/log_wr_test.rs b/src/db/log_wr_test.rs index a5ad510..001b1e7 100644 --- a/src/db/log_wr_test.rs +++ b/src/db/log_wr_test.rs @@ -11,7 +11,7 @@ mod test { #[test] fn write() -> Result<()> { - let file = box File::create("../../1.bin")?; + let file = Box::new(File::create("../../1.bin")?); let mut writer = LogWriter::new(file); let sample: Vec = ('0'..='9').map(|a|a as u8).collect(); for i in 0..100 { @@ -23,7 +23,7 @@ mod test { #[test] fn read() -> Result<()> { - let file = box File::open("../../1.bin")?; + let file = Box::new(File::open("../../1.bin")?); let mut reader = LogReader::new(file, true, 0); let sample: Vec = ('0'..='9').map(|a|a as u8).collect(); for i in 0..100 { diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index 4d13c12..a7eab7c 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -286,31 +286,31 @@ impl ToString for SkipList { impl Node { #[inline] fn create(src: UnsafeSlice, level: usize, arena: ArenaRef) -> RawNode { - let node = box Self { + let node = Box::new(Self { key: Some(src), next_elems: allocate_next_elems(arena), level, - }; + }); Box::into_raw(node) } #[inline] fn create_head(arena: ArenaRef) -> RawNode { - let node = box Self { + let node = Box::new(Self { key: None, next_elems: allocate_next_elems(arena), level: MAX_LEVEL, - }; + }); Box::into_raw(node) } #[inline] fn create_tail() -> RawNode { - let node = box Self { + let node = Box::new(Self { key: None, next_elems: null_mut(), level: 0, - }; + }); Box::into_raw(node) } diff --git a/src/db/skip_list_test.rs b/src/db/skip_list_test.rs index 11b4573..721adb1 100644 --- a/src/db/skip_list_test.rs +++ b/src/db/skip_list_test.rs @@ -1,109 +1,108 @@ -mod test { - use std::collections::HashSet; - use std::env::args; - use std::ffi::{c_char, c_void}; - use std::ptr::{null, null_mut}; - use std::sync::{Arc, Mutex}; - - use criterion::{Criterion, criterion_group, criterion_main}; - use rand::Rng; - use skiplist::OrderedSkipList; - - use crate::db::DefaultSkipList; - use crate::db::skip_list::SkipList; - use crate::debug; - use crate::util::Arena; - use crate::util::arena::ArenaRef; - use crate::util::comparator::BytewiseComparatorImpl; - use crate::util::mem_debug::mem_print; - use crate::util::Result; - use crate::util::slice::Slice; - use crate::util::unsafe_slice::TryIntoUnsafeSlice; - - #[test] - fn test_add() -> Result<()> { - let cmp = Arc::new(BytewiseComparatorImpl::default()); - let arena = Arc::new(Mutex::new(Arena::default())); - let mut list = DefaultSkipList::create(cmp, arena.clone()); - let len = 10; - for i in 0..len { - list.insert(format!("key_{}", i).try_into_unsafe_slice(arena.clone())?).expect("insert ok"); - } - assert_eq!(10, list.len(), "expect 10, but actually is: {}", list.len()); - debug!("{}", list.to_string()); - for i in 0..len { - let key: Slice = format!("key_{}", i).into(); - debug!("contains key: {}", key); - assert!(list.contains(&key), "contains key: {}", key); - } - list.iter().for_each(|slice| { - debug!("slice: {}", slice.as_str()) - }); - Ok(()) - } - - #[test] - fn test_rnd_add() -> Result<()> { - let cmp = Arc::new(BytewiseComparatorImpl::default()); - let arena = Arc::new(Mutex::new(Arena::default())); - let mut list = DefaultSkipList::create(cmp, arena.clone()); - let len = 10; - let mut rnd = rand::thread_rng(); - let mut set = HashSet::new(); - for _i in 0..10 { - let j = rnd.gen_range(0..len); - let key = format!("key_{}", j); - set.insert(key.clone()); - list.insert(key.try_into_unsafe_slice(arena.clone())?)?; - debug!("skiplist: {}", list.to_string()); - } - assert_eq!(set.len(), list.len(), "list length must eq: {}", list.len()); - set.iter().for_each(|key| { - let c = list.contains(&key); - assert!(c, "must contains key: {}", key) - }); - - Ok(()) - } - - - fn default_skiplist(mut list: SkipList, arena: ArenaRef, record_count: usize) { - for j in 0..record_count { - let value = format!("key_{}", j); - list.insert(value.try_into_unsafe_slice(arena.clone()).unwrap()).unwrap(); - } - println!("bench_default_skiplist: "); - mem_print(); - } - - fn bench_skiplist_v_0_4_0(mut list: OrderedSkipList, record_count: usize) { - for j in 0..record_count { - let value = format!("key_{}", j); - list.insert(value.clone()); - } - println!("bench_skiplist_v_0_4_0: "); - mem_print(); - } - - #[test] - fn bench_default_skiplist() { - let record_count = 100 * 1024; - println!("bench default skiplist"); - let cmp = Arc::new(BytewiseComparatorImpl::default()); - let arena = Arc::new(Mutex::new(Arena::default())); - let list = SkipList::create(cmp, arena.clone()); - default_skiplist(list, arena, record_count); - } - - #[test] - fn bench_crate_skiplist() { - let record_count = 100 * 1024; - println!("bench crate skiplist"); - let list: OrderedSkipList = unsafe { - OrderedSkipList::with_comp(|a: &String, b: &String| { - a.cmp(b) - }) - }; - bench_skiplist_v_0_4_0(list, record_count); - } -} \ No newline at end of file +// mod test { +// use std::collections::HashSet; +// use std::env::args; +// use std::ffi::{c_char, c_void}; +// use std::ptr::{null, null_mut}; +// use std::sync::{Arc, Mutex}; +// +// use rand::Rng; +// use skiplist::OrderedSkipList; +// +// use crate::db::DefaultSkipList; +// use crate::db::skip_list::SkipList; +// use crate::debug; +// use crate::util::Arena; +// use crate::util::arena::ArenaRef; +// use crate::util::comparator::BytewiseComparatorImpl; +// use crate::util::mem_debug::mem_print; +// use crate::util::Result; +// use crate::util::slice::Slice; +// use crate::util::unsafe_slice::TryIntoUnsafeSlice; +// +// #[test] +// fn test_add() -> Result<()> { +// let cmp = Arc::new(BytewiseComparatorImpl::default()); +// let arena = Arc::new(Mutex::new(Arena::default())); +// let mut list = DefaultSkipList::create(cmp, arena.clone()); +// let len = 10; +// for i in 0..len { +// list.insert(format!("key_{}", i).try_into_unsafe_slice(arena.clone())?).expect("insert ok"); +// } +// assert_eq!(10, list.len(), "expect 10, but actually is: {}", list.len()); +// debug!("{}", list.to_string()); +// for i in 0..len { +// let key: Slice = format!("key_{}", i).into(); +// debug!("contains key: {}", key); +// assert!(list.contains(&key), "contains key: {}", key); +// } +// list.iter().for_each(|slice| { +// debug!("slice: {}", slice.as_str()) +// }); +// Ok(()) +// } +// +// #[test] +// fn test_rnd_add() -> Result<()> { +// let cmp = Arc::new(BytewiseComparatorImpl::default()); +// let arena = Arc::new(Mutex::new(Arena::default())); +// let mut list = DefaultSkipList::create(cmp, arena.clone()); +// let len = 10; +// let mut rnd = rand::thread_rng(); +// let mut set = HashSet::new(); +// for _i in 0..10 { +// let j = rnd.gen_range(0..len); +// let key = format!("key_{}", j); +// set.insert(key.clone()); +// list.insert(key.try_into_unsafe_slice(arena.clone())?)?; +// debug!("skiplist: {}", list.to_string()); +// } +// assert_eq!(set.len(), list.len(), "list length must eq: {}", list.len()); +// set.iter().for_each(|key| { +// let c = list.contains(&key); +// assert!(c, "must contains key: {}", key) +// }); +// +// Ok(()) +// } +// +// +// fn default_skiplist(mut list: SkipList, arena: ArenaRef, record_count: usize) { +// for j in 0..record_count { +// let value = format!("key_{}", j); +// list.insert(value.try_into_unsafe_slice(arena.clone()).unwrap()).unwrap(); +// } +// println!("bench_default_skiplist: "); +// mem_print(); +// } +// +// fn bench_skiplist_v_0_4_0(mut list: OrderedSkipList, record_count: usize) { +// for j in 0..record_count { +// let value = format!("key_{}", j); +// list.insert(value.clone()); +// } +// println!("bench_skiplist_v_0_4_0: "); +// mem_print(); +// } +// +// #[test] +// fn bench_default_skiplist() { +// let record_count = 100 * 1024; +// println!("bench default skiplist"); +// let cmp = Arc::new(BytewiseComparatorImpl::default()); +// let arena = Arc::new(Mutex::new(Arena::default())); +// let list = SkipList::create(cmp, arena.clone()); +// default_skiplist(list, arena, record_count); +// } +// +// #[test] +// fn bench_crate_skiplist() { +// let record_count = 100 * 1024; +// println!("bench crate skiplist"); +// let list: OrderedSkipList = unsafe { +// OrderedSkipList::with_comp(|a: &String, b: &String| { +// a.cmp(b) +// }) +// }; +// bench_skiplist_v_0_4_0(list, record_count); +// } +// } \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 3b67f8e..c0dce00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,3 @@ -#![feature(box_syntax)] - extern crate core; pub mod db; -- Gitee From fc17673fbc90a5583fbd5d3844b2084aaa976c5b Mon Sep 17 00:00:00 2001 From: fengyang Date: Mon, 3 Apr 2023 20:55:39 +0800 Subject: [PATCH 45/50] =?UTF-8?q?BlockBuilder=20=E5=BC=80=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/builder.rs | 6 +++--- src/table/filter_block.rs | 29 ++++++++++++++--------------- src/table/filter_block_test.rs | 13 +++++++------ src/table/table_builder.rs | 15 +++++++-------- src/util/options.rs | 7 ++++++- 5 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/db/builder.rs b/src/db/builder.rs index 9140e5e..a98102e 100644 --- a/src/db/builder.rs +++ b/src/db/builder.rs @@ -60,9 +60,9 @@ impl BuildTable { return Err(fileRS.err().unwrap()); } - let writableFile = Arc::new(fileRS.unwrap()); + let writable_file = Arc::new(fileRS.unwrap()); // 生成一个 TableBuilder - let builder: TableBuilder = TableBuilder::new_with_writable_file(options, writableFile); + let builder: TableBuilder = TableBuilder::new_with_writable_file(options, writable_file.clone()); meta.get_smallest().decode_from(&iter.key()); @@ -86,7 +86,7 @@ impl BuildTable { // Finish and check for file errors // 将文件刷新到磁盘 if s.is_ok() { - let rs:io::Result<()> = writableFile.sync_data(); + let rs:io::Result<()> = writable_file.sync_data(); if rs.is_ok() { s = Status::default(); }else{ diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 625f2c9..53597ae 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -11,12 +11,13 @@ use crate::util::Result; const FILTER_BASE_LG: usize = 11; const FILTER_BASE: usize = 1 << FILTER_BASE_LG; +pub type FilterPolicyRef = Arc>; + /// /// meta block 构建器 /// pub trait FilterBlock { - #[inline] - fn new_with_policy(policy: Box) -> Self; + fn new_with_policy(policy: FilterPolicyRef) -> Self; /// /// 构造一个 FilterBlockBuilder @@ -33,8 +34,7 @@ pub trait FilterBlock { /// ``` /// /// ``` - #[inline] - fn new_with_policy_capacity(policy: Box, capacity: usize) -> Self; + fn new_with_policy_capacity(policy: FilterPolicyRef, capacity: usize) -> Self; /// 设置block的起始位置 /// @@ -49,7 +49,6 @@ pub trait FilterBlock { /// ``` /// filter_block_builder.start_block(1024_u64); /// ``` - #[inline] fn start_block(&mut self, block_offset: u64); fn add_key_from_str(&mut self, key: &str); @@ -78,7 +77,7 @@ pub trait FilterBlock { /// ``` fn finish(&mut self) -> Result; - fn get_policy(&self) -> Box<&dyn FilterPolicy>; + fn get_policy(&self) -> FilterPolicyRef; fn get_keys(&self) -> Vec; @@ -93,7 +92,7 @@ pub trait FilterBlock { /// SSTable 文件里面的 meta block 构建器, 按内存里面指定的格式整理在内存中 pub struct FilterBlockBuilder { - policy: Box, + policy: FilterPolicyRef, // Flattened key contents keys: Vec, // Starting index in keys_ of each key @@ -106,7 +105,7 @@ pub struct FilterBlockBuilder { } pub struct FilterBlockReader { - policy: Box, + policy: FilterPolicyRef, // Pointer to filter data (at block-start) data: Vec, // Pointer to beginning of offset array (at block-end) @@ -118,11 +117,11 @@ pub struct FilterBlockReader { } impl FilterBlock for FilterBlockBuilder { - fn new_with_policy(policy: Box) -> Self { + fn new_with_policy(policy: FilterPolicyRef) -> Self { FilterBlock::new_with_policy_capacity(policy, 64) } - fn new_with_policy_capacity(policy: Box, capacity: usize) -> Self { + fn new_with_policy_capacity(policy: FilterPolicyRef, capacity: usize) -> Self { let keys:Vec = Vec::with_capacity(capacity); let start:Vec = Vec::with_capacity(capacity); let result:Vec = Vec::with_capacity(capacity); @@ -187,8 +186,8 @@ impl FilterBlock for FilterBlockBuilder { Ok(Slice::from_buf(&self.result)) } - fn get_policy(&self) -> Box<&dyn FilterPolicy> { - Box::new(self.policy.as_ref()) + fn get_policy(&self) -> FilterPolicyRef { + self.policy.clone() } fn get_keys(&self) -> Vec { @@ -257,7 +256,7 @@ impl FilterBlockBuilder { } impl FilterBlockReader { - pub fn new_with_policy(policy: Box, contents: Slice) -> Self { + pub fn new_with_policy(policy: FilterPolicyRef, contents: Slice) -> Self { let data = Vec::new(); let offset = Vec::new(); @@ -292,8 +291,8 @@ impl FilterBlockReader { todo!() } - pub fn get_policy(&self) -> Box<&dyn FilterPolicy> { - Box::new(self.policy.as_ref()) + pub fn get_policy(&self) -> FilterPolicyRef { + self.policy.clone() } pub fn get_data(&self) -> Vec { diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs index 9582b2a..2350619 100644 --- a/src/table/filter_block_test.rs +++ b/src/table/filter_block_test.rs @@ -113,9 +113,10 @@ mod test { #[test] fn test_filter_block_new_with_policy() { - let policy = Box::new(TestHashFilter::new()); + let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); - let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy, 10); + let filter_block: FilterBlockBuilder = FilterBlockBuilder:: + new_with_policy_capacity(policy, 10); let fp = filter_block.get_policy(); let filter_policy_name = fp.name(); @@ -129,10 +130,10 @@ mod test { #[test] fn test_filter_block_reader_new_with_policy_empty_content() { - let policy = Box::new(TestHashFilter::new()); + let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); let contents = Slice::default(); - let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); let fp_reader = filter_block_reader.get_policy(); let _reader_filter_policy_name = fp_reader.name(); @@ -145,8 +146,8 @@ mod test { #[test] fn test_filter_block_new_with_policy_and_addkey() { - let policy = Box::new(TestHashFilter::new()); - let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy( + let policy: Arc> = Arc::new(Box::new(TestHashFilter::new())); + let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy_capacity( policy, 10); filter_block_builder.start_block(100); diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs index 4615fdc..e940246 100644 --- a/src/table/table_builder.rs +++ b/src/table/table_builder.rs @@ -5,7 +5,7 @@ use crate::table::block_builder::BlockBuilder; use crate::table::filter_block::{FilterBlock, FilterBlockBuilder}; use crate::table::format::BlockHandle; use crate::traits::filter_policy_trait::FilterPolicy; -use crate::util::options::{CompressionType, Options}; +use crate::util::options::{CompressionType, OptionRef, Options}; use crate::util::slice::Slice; use crate::util::status::Status; use crate::util::unsafe_slice::UnsafeSlice; @@ -47,8 +47,8 @@ struct Rep<> { } impl TableBuilder { - pub fn new_with_writable_file(options: &Options, writableFile: Arc) -> Self { - let rep = Rep::new(options, writableFile); + pub fn new_with_writable_file(options: &Options, writable_file: Arc) -> Self { + let rep = Rep::new(options, writable_file); // Self { // rep @@ -95,16 +95,15 @@ impl TableBuilder { } impl Rep { - pub fn new(opt: &Options, writableFile: Arc) -> Self { - // todo 如何赋值? Box::new(opt) - let options = Box::new(Default::default()); - let index_block_options = Box::new(Default::default()); + pub fn new(opt: OptionRef, writableFile: Arc) -> Self { + let options = Box::new(opt.clone()); + let index_block_options = Box::new(opt.clone()); let mut filter_block: Option; if opt.filter_policy.is_none() { filter_block = None; }else { - filter_block = Some(FilterBlockBuilder::new_with_policy(opt.filter_policy.unwrap())); + filter_block = Some(FilterBlockBuilder::new_with_policy(opt.filter_policy.unwrap().clone())); } Self { diff --git a/src/util/options.rs b/src/util/options.rs index 4764b88..17dc0da 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -6,6 +6,8 @@ use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::comparator::BytewiseComparatorImpl; use crate::util::env::Env; +pub type OptionRef = Arc>; + pub enum CompressionType { NoCompression, SnappyCompression @@ -17,6 +19,9 @@ pub struct Cache {} // use crate::traits::filter_policy_trait::FilterPolicy; // pub struct FilterPolicy {} +// pub cmp: Box, +// | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the trait `Clone` is not implemented for `dyn Comparator` +// #[derive(Clone)] pub struct Options { /// Comparator used to define the order of keys in the table. @@ -98,7 +103,7 @@ pub struct Options { /// If non-null, use the specified filter policy to reduce disk reads. /// Many applications will benefit from passing the result of /// NewBloomFilterPolicy() here. - pub filter_policy: Option>, + pub filter_policy: Option>>, } /// Options that control read operations pub struct ReadOptions { -- Gitee From c6654bb7c15d67aa789e4ccc992d83a763156f6c Mon Sep 17 00:00:00 2001 From: fengyang Date: Tue, 4 Apr 2023 10:40:46 +0800 Subject: [PATCH 46/50] =?UTF-8?q?Ptr=E5=AE=9A=E4=B9=89=E4=B8=8E=E7=BC=96?= =?UTF-8?q?=E8=AF=91=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/db/builder.rs | 4 ++-- src/db/builder_test.rs | 5 ----- src/db/table_cache.rs | 9 +++++---- src/table/block_builder.rs | 16 ++++++++++------ src/table/filter_block.rs | 24 +++++++++++------------- src/table/table_builder.rs | 29 +++++++++++++---------------- src/traits/filter_policy_trait.rs | 5 +++++ src/util/options.rs | 7 ++++--- 8 files changed, 50 insertions(+), 49 deletions(-) diff --git a/src/db/builder.rs b/src/db/builder.rs index a98102e..5a35ddc 100644 --- a/src/db/builder.rs +++ b/src/db/builder.rs @@ -10,7 +10,7 @@ use crate::table::table::Table; use crate::table::table_builder::TableBuilder; use crate::traits::DataIterator; use crate::util::env::Env; -use crate::util::options::{Options, ReadOptions}; +use crate::util::options::{Options, OptionsPtr, ReadOptions}; use crate::util::Result; use crate::util::slice::Slice; use crate::util::status::{LevelError, Status}; @@ -43,7 +43,7 @@ impl BuildTable { /// ``` /// /// ``` - pub fn build_table(dbname: &Slice, env: &Env, options: &Options, + pub fn build_table(dbname: &Slice, env: &Env, options: OptionsPtr, table_cache: &TableCache, mut iter: Box, meta: &mut FileMetaData) -> Result { meta.set_file_size(0); diff --git a/src/db/builder_test.rs b/src/db/builder_test.rs index 7f1746a..46899e3 100644 --- a/src/db/builder_test.rs +++ b/src/db/builder_test.rs @@ -7,11 +7,6 @@ mod test { #[test] fn test_build_table() { - - // BuildTable::build_table(&Slice::from("a"), Options::default(), - // TableCache::new(), - // Box::new()); println!("get_name: {}", "a"); - } } \ No newline at end of file diff --git a/src/db/table_cache.rs b/src/db/table_cache.rs index 5824617..6fa4804 100644 --- a/src/db/table_cache.rs +++ b/src/db/table_cache.rs @@ -4,7 +4,7 @@ use crate::util::options::ReadOptions; use crate::util::slice::Slice; use crate::util::Result; -struct Saver {} +pub struct Saver {} pub struct TableCache {} @@ -31,9 +31,10 @@ impl TableCache { /// ``` /// /// ``` - pub fn get(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, _k: &Slice, _arg: &mut Saver, _handle_result: F) - where F: FnMut(&mut Saver, &Slice, &Slice) -> Result<()> { - () + pub fn get(&self, _options: &ReadOptions, _file_number: u64, _file_size: usize, + _k: &Slice, _arg: &mut Saver, _handle_result: F) + where F: FnMut(&mut Saver, &Slice, &Slice) { + todo!() } /// 根据文件号消除缓存 /// diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs index 3ca4eec..63166ce 100644 --- a/src/table/block_builder.rs +++ b/src/table/block_builder.rs @@ -1,6 +1,6 @@ use std::fs::File; use std::sync::Arc; -use crate::util::options::Options; +use crate::util::options::{Options, OptionsPtr}; use crate::util::slice::Slice; use crate::util::Result; @@ -13,10 +13,14 @@ use crate::util::status::Status; // Arc会追踪这个指针的所有拷贝,当最后一份拷贝离开作用域时,它就会安全释放内存。 // 智能指针 Box。 box 允许你将一个值放在堆上而不是栈上。留在栈上的则是指向堆数据的指针。 + +/// BlockBuilder 的 `Arc` 别名 +pub type BlockBuilderPtr = Arc; + pub struct BlockBuilder { // 在 BlockBuilder 初始化时,指定的配置项 - options: Box, - index_block_options: Box, + options: OptionsPtr, + index_block_options: OptionsPtr, // SSTable 生成后的文件 file: Arc, @@ -25,13 +29,13 @@ pub struct BlockBuilder { status: Status, // 生成 SSTable 中的数据区域 - data_block: Arc, + data_block: BlockBuilderPtr, // 生成 SSTable 中的数据索引区域 - index_block: Arc, + index_block: BlockBuilderPtr, } impl BlockBuilder { - pub fn new(options: &Options) -> Self { + pub fn new(options: OptionsPtr) -> Self { todo!() } diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 53597ae..c24694e 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,7 +1,7 @@ use std::io::Write; use std::sync::Arc; use crate::traits::coding_trait::CodingTrait; -use crate::traits::filter_policy_trait::FilterPolicy; +use crate::traits::filter_policy_trait::{FilterPolicy, FilterPolicyPtr}; use crate::util::coding::Coding; use crate::util::slice::Slice; @@ -11,13 +11,11 @@ use crate::util::Result; const FILTER_BASE_LG: usize = 11; const FILTER_BASE: usize = 1 << FILTER_BASE_LG; -pub type FilterPolicyRef = Arc>; - /// /// meta block 构建器 /// pub trait FilterBlock { - fn new_with_policy(policy: FilterPolicyRef) -> Self; + fn new_with_policy(policy: FilterPolicyPtr) -> Self; /// /// 构造一个 FilterBlockBuilder @@ -34,7 +32,7 @@ pub trait FilterBlock { /// ``` /// /// ``` - fn new_with_policy_capacity(policy: FilterPolicyRef, capacity: usize) -> Self; + fn new_with_policy_capacity(policy: FilterPolicyPtr, capacity: usize) -> Self; /// 设置block的起始位置 /// @@ -77,7 +75,7 @@ pub trait FilterBlock { /// ``` fn finish(&mut self) -> Result; - fn get_policy(&self) -> FilterPolicyRef; + fn get_policy(&self) -> FilterPolicyPtr; fn get_keys(&self) -> Vec; @@ -92,7 +90,7 @@ pub trait FilterBlock { /// SSTable 文件里面的 meta block 构建器, 按内存里面指定的格式整理在内存中 pub struct FilterBlockBuilder { - policy: FilterPolicyRef, + policy: FilterPolicyPtr, // Flattened key contents keys: Vec, // Starting index in keys_ of each key @@ -105,7 +103,7 @@ pub struct FilterBlockBuilder { } pub struct FilterBlockReader { - policy: FilterPolicyRef, + policy: FilterPolicyPtr, // Pointer to filter data (at block-start) data: Vec, // Pointer to beginning of offset array (at block-end) @@ -117,11 +115,11 @@ pub struct FilterBlockReader { } impl FilterBlock for FilterBlockBuilder { - fn new_with_policy(policy: FilterPolicyRef) -> Self { + fn new_with_policy(policy: FilterPolicyPtr) -> Self { FilterBlock::new_with_policy_capacity(policy, 64) } - fn new_with_policy_capacity(policy: FilterPolicyRef, capacity: usize) -> Self { + fn new_with_policy_capacity(policy: FilterPolicyPtr, capacity: usize) -> Self { let keys:Vec = Vec::with_capacity(capacity); let start:Vec = Vec::with_capacity(capacity); let result:Vec = Vec::with_capacity(capacity); @@ -186,7 +184,7 @@ impl FilterBlock for FilterBlockBuilder { Ok(Slice::from_buf(&self.result)) } - fn get_policy(&self) -> FilterPolicyRef { + fn get_policy(&self) -> FilterPolicyPtr { self.policy.clone() } @@ -256,7 +254,7 @@ impl FilterBlockBuilder { } impl FilterBlockReader { - pub fn new_with_policy(policy: FilterPolicyRef, contents: Slice) -> Self { + pub fn new_with_policy(policy: FilterPolicyPtr, contents: Slice) -> Self { let data = Vec::new(); let offset = Vec::new(); @@ -291,7 +289,7 @@ impl FilterBlockReader { todo!() } - pub fn get_policy(&self) -> FilterPolicyRef { + pub fn get_policy(&self) -> FilterPolicyPtr { self.policy.clone() } diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs index e940246..7e0155e 100644 --- a/src/table/table_builder.rs +++ b/src/table/table_builder.rs @@ -5,7 +5,7 @@ use crate::table::block_builder::BlockBuilder; use crate::table::filter_block::{FilterBlock, FilterBlockBuilder}; use crate::table::format::BlockHandle; use crate::traits::filter_policy_trait::FilterPolicy; -use crate::util::options::{CompressionType, OptionRef, Options}; +use crate::util::options::{CompressionType, OptionsPtr, Options}; use crate::util::slice::Slice; use crate::util::status::Status; use crate::util::unsafe_slice::UnsafeSlice; @@ -16,8 +16,8 @@ pub struct TableBuilder { /// TableBuilder Rep 结构体, 内部使用 struct Rep<> { - options: Box, - index_block_options: Box, + options: OptionsPtr, + index_block_options: OptionsPtr, // SSTable 生成后的文件 file: Arc, @@ -47,7 +47,7 @@ struct Rep<> { } impl TableBuilder { - pub fn new_with_writable_file(options: &Options, writable_file: Arc) -> Self { + pub fn new_with_writable_file(options: OptionsPtr, writable_file: Arc) -> Self { let rep = Rep::new(options, writable_file); // Self { @@ -95,33 +95,30 @@ impl TableBuilder { } impl Rep { - pub fn new(opt: OptionRef, writableFile: Arc) -> Self { - let options = Box::new(opt.clone()); - let index_block_options = Box::new(opt.clone()); - + pub fn new(opt: OptionsPtr, writableFile: Arc) -> Self { let mut filter_block: Option; if opt.filter_policy.is_none() { filter_block = None; }else { - filter_block = Some(FilterBlockBuilder::new_with_policy(opt.filter_policy.unwrap().clone())); + filter_block = Some(FilterBlockBuilder::new_with_policy(opt.filter_policy.clone().unwrap())); } Self { - options, - index_block_options, + options: opt.clone(), + index_block_options: opt.clone(), file: writableFile, offset: 0, // default Status::OK status: Status::default(), - data_block: BlockBuilder::new(&options.as_ref()), - index_block: BlockBuilder::new(&index_block_options.as_ref()), - last_key: Default::default(), + data_block: BlockBuilder::new(opt.clone()), + index_block: BlockBuilder::new(opt.clone()), + last_key: Slice::default(), num_entries: 0, closed: false, filter_block, pending_index_entry: false, - pending_handle: Default::default(), - compressed_output: Default::default(), + pending_handle: BlockHandle::default(), + compressed_output: Slice::default(), } } } \ No newline at end of file diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index 3920604..aaafafd 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -1,5 +1,10 @@ +use std::sync::Arc; use crate::util::slice::Slice; + +/// FilterPolicy 的 `Arc>` 别名 +pub type FilterPolicyPtr = Arc>; + /// 用于key过滤,可以快速的排除不存在的key pub trait FilterPolicy { diff --git a/src/util/options.rs b/src/util/options.rs index 17dc0da..ba51a8c 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -2,11 +2,12 @@ use std::sync::Arc; use crate::db::db::Snapshot; use crate::db::db_format::InternalKeyComparator; use crate::traits::comparator_trait::Comparator; -use crate::traits::filter_policy_trait::FilterPolicy; +use crate::traits::filter_policy_trait::{FilterPolicy, FilterPolicyPtr}; use crate::util::comparator::BytewiseComparatorImpl; use crate::util::env::Env; -pub type OptionRef = Arc>; +/// Options 的 `Arc>` 别名 +pub type OptionsPtr = Arc>; pub enum CompressionType { NoCompression, @@ -103,7 +104,7 @@ pub struct Options { /// If non-null, use the specified filter policy to reduce disk reads. /// Many applications will benefit from passing the result of /// NewBloomFilterPolicy() here. - pub filter_policy: Option>>, + pub filter_policy: Option, } /// Options that control read operations pub struct ReadOptions { -- Gitee From 400fa63b8910a13635febcac60c16ac9a9bf53fc Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Thu, 6 Apr 2023 19:49:34 +0800 Subject: [PATCH 47/50] 4-6 code review --- README.md | 5 ++--- src/db/filename.rs | 1 + src/table/table_builder.rs | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f05c752..c45a6e3 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | table.Iterator(TwoLevelIterator) | kazeseiriou | 0% | | table.Iterator(tabletest.KeyConvertingIterator) | kazeseiriou | 0% | | table.Iterator(dbtest.ModelIter) | kazeseiriou | 0% | -| table.Iterator(Block::Iter) | fengyang | 0% | +| table.Iterator(Block::Iter) | wangboo | 0% | | IteratorWrapper | kazeseiriou | 0% | | db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | | db.Builder | fengyang | 85% | @@ -94,7 +94,7 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | table.FilterBlockBuilder | fengyang | 30% | | FilterBlock, FilterBlockReader | fengyang | 80% | | SSTable | fengyang | 0% | -| table.Table | peach,tzcyujunyong | | +| table.Table | peach,tzcyujunyong | 30% | | db.leveldb_util | wangboo | 0% | | db.log_format | wangboo | 90% | | db.LogReader | wangboo | 90% | @@ -104,7 +104,6 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | | db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | | WriteBatch | tzcyujunyong,wangboo | 50% | -| table.table | | 30% | | db.filename | | | | | 半支烟 | 40% | diff --git a/src/db/filename.rs b/src/db/filename.rs index fb1db13..8c47c9d 100644 --- a/src/db/filename.rs +++ b/src/db/filename.rs @@ -1,5 +1,6 @@ use crate::util::slice::Slice; +// TODo 参考 PathBuf pub struct FileName { } diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs index 7e0155e..2d7ed0a 100644 --- a/src/table/table_builder.rs +++ b/src/table/table_builder.rs @@ -102,6 +102,8 @@ impl Rep { }else { filter_block = Some(FilterBlockBuilder::new_with_policy(opt.filter_policy.clone().unwrap())); } + // TODo if let sytax + // let filter_block = opt.filter_policy.map(|e|FilterBlockBuilder::new_with_policy(e.clone().unwrap())); Self { options: opt.clone(), -- Gitee From cef589b56509299205a7a943e3df0830c3c7940e Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 6 Apr 2023 21:06:33 +0800 Subject: [PATCH 48/50] =?UTF-8?q?TableBuilder=20struct=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/table/block_builder.rs | 6 +++--- src/table/format.rs | 34 ++++++++++++++++++---------------- src/table/table_builder.rs | 12 ++++++++++++ 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs index 63166ce..f24a995 100644 --- a/src/table/block_builder.rs +++ b/src/table/block_builder.rs @@ -39,7 +39,7 @@ impl BlockBuilder { todo!() } - /// 添加数据到block + /// 向datablock增加entry /// /// # Arguments /// @@ -68,7 +68,7 @@ impl BlockBuilder { todo!() } - /// 构造block + /// 追加Restart points /// /// /// # Examples @@ -91,7 +91,7 @@ impl BlockBuilder { todo!() } - /// 估算当前的block大小 + /// 估算当前的block大小, 超过一定大小后,写入文件 /// /// # Examples /// diff --git a/src/table/format.rs b/src/table/format.rs index 01c9610..ccfe72b 100644 --- a/src/table/format.rs +++ b/src/table/format.rs @@ -12,26 +12,28 @@ pub const k_max_encoded_length: u32 = 10 + 10; /// of two block handles and a magic number. pub const k_encoded_length: u32 = 2 * k_max_encoded_length + 8; -// // kTableMagicNumber was picked by running -// // echo http://code.google.com/p/leveldb/ | sha1sum -// // and taking the leading 64 bits. -// pub const k_table_magic_number: &str = 0xdb4775248b80fb57ull; +/// Footer 的大小为 48 字节,内容是一个 8 字节的 magic number 和两个 BlockHandle 构成 +/// 在 Footer::EncodeTo 和 Footer::DecodeFrom 中起作用 +/// kTableMagicNumber was picked by running +/// echo http://code.google.com/p/leveldb/ | sha1sum +/// and taking the leading 64 bits. +pub const k_table_magic_number: u64 = 0xdb4775248b80fb57; /// 1-byte type + 32-bit crc pub const k_block_trailer_size: usize = 5; pub struct BlockHandle { // 偏移量 - offset_: u64, + offset: u64, // - size_: u64 + size: u64 } /// Footer encapsulates the fixed information stored at the tail /// end of every table file. pub struct Footer { - metaindex_handle_: BlockHandle, - index_handle_: BlockHandle + meta_index_handle: BlockHandle, + index_handle: BlockHandle } pub struct BlockContents { @@ -102,7 +104,7 @@ trait BlockTrait { trait FootTrait { // The block handle for the metaindex block of the table - fn metaindex_handle(&self) -> BlockHandle; + fn meta_index_handle(&self) -> BlockHandle; fn set_metaindex_handle(&mut self, h: BlockHandle); @@ -153,19 +155,19 @@ trait BlockContent { impl BlockTrait for BlockHandle { fn offset(&self) -> u64 { - self.offset_ + self.offset } fn set_offset(&mut self, offset: u64) { - self.offset_ = offset; + self.offset = offset; } fn size(&self) -> u64 { - self.size_ + self.size } fn set_size(&mut self, size: u64) { - self.size_ = size; + self.size = size; } fn encode_to(&self) -> Result { @@ -190,14 +192,14 @@ impl Default for BlockHandle { #[inline] fn default() -> Self { BlockHandle { - offset_: 0, - size_: 0, + offset: 0, + size: 0, } } } impl FootTrait for Footer { - fn metaindex_handle(&self) -> BlockHandle { + fn meta_index_handle(&self) -> BlockHandle { todo!() } diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs index 2d7ed0a..dc9cc91 100644 --- a/src/table/table_builder.rs +++ b/src/table/table_builder.rs @@ -10,6 +10,12 @@ use crate::util::slice::Slice; use crate::util::status::Status; use crate::util::unsafe_slice::UnsafeSlice; +/// 在一个 SSTable 中,文件末尾的 Footer 是定长的, +/// 其他数据都被划分成一个个变长的 block: +/// index block(@see Footer#index_handle)、 +/// meta_index block(@see Footer#meta_index_handle)、 +/// meta blocks、 +/// data blocks。 pub struct TableBuilder { rep: Box } @@ -41,6 +47,7 @@ struct Rep<> { pending_index_entry: bool, // Handle to add to index block // pending_handle 记录需要生成数据索引的数据块在 SSTable 中的偏移量和大小 + // 也就是说, pending_handle 主要用于表示当前块的offset及size。 pending_handle: BlockHandle, compressed_output: Slice, @@ -57,18 +64,22 @@ impl TableBuilder { todo!() } + /// 写入 entry pub fn add(&self, key: &UnsafeSlice, value: &UnsafeSlice) { todo!() } + /// flush到文件 pub fn flush(&self) { todo!() } + /// block->Finish、压缩 pub fn write_block(&self, block: &BlockBuilder, handler: &BlockHandle) { todo!() } + /// datablock写入文件,添加压缩方式、crc。 pub fn write_raw_block(&self, block_contents: &UnsafeSlice, compression_type: CompressionType, handler: &BlockHandle) { todo!() } @@ -77,6 +88,7 @@ impl TableBuilder { todo!() } + /// 剩余datablock写入文件,并生成管理区。 pub fn finish(&self) -> Status { todo!() } -- Gitee From 8f92c247c340c890698ba0d2daea82922c251cb9 Mon Sep 17 00:00:00 2001 From: fengyang Date: Fri, 7 Apr 2023 12:33:24 +0800 Subject: [PATCH 49/50] =?UTF-8?q?format=20api=20=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/table/format.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/table/format.rs b/src/table/format.rs index ccfe72b..e082810 100644 --- a/src/table/format.rs +++ b/src/table/format.rs @@ -47,7 +47,7 @@ pub struct BlockContents { heap_allocated:bool, } -trait BlockTrait { +trait BlockHandleTrait { /// /// The offset of the block in the file. /// @@ -153,7 +153,7 @@ trait BlockContent { } -impl BlockTrait for BlockHandle { +impl BlockHandleTrait for BlockHandle { fn offset(&self) -> u64 { self.offset } @@ -199,6 +199,7 @@ impl Default for BlockHandle { } impl FootTrait for Footer { + /// The block handle for the metaindex block of the table fn meta_index_handle(&self) -> BlockHandle { todo!() } -- Gitee From 1ac1d7a7dfbd795984c85610fb7d238563be5f7b Mon Sep 17 00:00:00 2001 From: fengyang Date: Fri, 7 Apr 2023 13:57:21 +0800 Subject: [PATCH 50/50] =?UTF-8?q?TableBuilder=20=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/table/filter_block.rs | 1 + src/table/table_builder.rs | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index c24694e..0013382 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -13,6 +13,7 @@ const FILTER_BASE: usize = 1 << FILTER_BASE_LG; /// /// meta block 构建器 +/// FilterBlock,实质上就是SST文件里面的 meta block /// pub trait FilterBlock { fn new_with_policy(policy: FilterPolicyPtr) -> Self; diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs index dc9cc91..c2d4492 100644 --- a/src/table/table_builder.rs +++ b/src/table/table_builder.rs @@ -12,9 +12,9 @@ use crate::util::unsafe_slice::UnsafeSlice; /// 在一个 SSTable 中,文件末尾的 Footer 是定长的, /// 其他数据都被划分成一个个变长的 block: -/// index block(@see Footer#index_handle)、 -/// meta_index block(@see Footer#meta_index_handle)、 -/// meta blocks、 +/// index block(@see format.BlockHandle、Footer#index_handle)、 +/// meta_index block(@see format.BlockHandle、Footer#meta_index_handle)、 +/// meta blocks(@see table.FilterBlock)、 /// data blocks。 pub struct TableBuilder { rep: Box -- Gitee