Commit 2e6302fc authored by Philipp Götze's avatar Philipp Götze
Browse files

Preparations for Evalutation

parent e2d7a4fd
......@@ -135,4 +135,24 @@ add_custom_command(
# ${nvml_SOURCE_DIR}/src/include
# ${THIRD_PARTY_DIR}/nvml/include
)
# Key/Value Store for Non-Volatile Memory(pmem.io)
if(FALSE)
download_project(PROJ pmemkv
GIT_REPOSITORY https://github.com/pmem/pmemkv.git
GIT_TAG master
UPDATE_DISCONNECTED 1
QUIET
)
add_custom_command(
OUTPUT ${THIRD_PARTY_DIR}/pmemkv
COMMAND ${CMAKE_COMMAND} -E chdir ${pmemkv_SOURCE_DIR} $(MAKE) configure
COMMAND ${CMAKE_COMMAND} -E make_directory ${THIRD_PARTY_DIR}/pmemkv/include
COMMAND ${CMAKE_COMMAND} -E make_directory ${THIRD_PARTY_DIR}/pmemkv/lib
COMMAND ${CMAKE_COMMAND} -E chdir ${pmemkv_SOURCE_DIR} $(MAKE) install prefix=${THIRD_PARTY_DIR}/pmemkv
)
endif()
endif()
......@@ -16,6 +16,9 @@ include(CTest)
# Installation path
set(PIPEFABRIC_DIR "/usr/local/pfabric")
# Set to 1 if you need log output
add_definitions(-DDO_LOG=1)
#The following variables enable or disable additional functionalities, which can be switched off to reduce build time.
# Use the boost::spirit parser for converting strings to numbers
......@@ -49,7 +52,7 @@ option(BUILD_ONLY_LIBS
# If switched to off, no tests will be build
option(BUILD_TEST_CASES
"build tests for pipefabric functionality"
ON
OFF
)
#Build google benchmark library
......@@ -303,7 +306,9 @@ if(USE_NVML_TABLE)
set(core_sources
${core_sources}
nvm/PTableInfo.cpp
${THIRD_PARTY_DIR}/nvml)
${THIRD_PARTY_DIR}/nvml
# ${THIRD_PARTY_DIR}/pmemkv
)
add_library(pfabric_core SHARED
${core_sources}
)
......@@ -312,8 +317,10 @@ if(USE_NVML_TABLE)
${ZEROMQ_LIBRARIES}
${NVML_LIBRARIES})
add_executable(nvm_test nvm_test.cpp)
target_link_libraries(nvm_test pfabric_core)
add_executable(nvm_scan nvm_scan.cpp)
target_link_libraries(nvm_scan pfabric_core)
add_executable(nvm_insert nvm_insert.cpp)
target_link_libraries(nvm_insert pfabric_core)
endif()
#-----------------------------------------------------------------------------------------
......
......@@ -22,70 +22,88 @@
#ifndef BDCCInfo_hpp_
#define BDCCInfo_hpp_
#include <nvm/PTableInfo.hpp>
#include <map>
#include <unordered_map>
#include <algorithm>
#include <iterator>
#include "nvm/PTableInfo.hpp"
#include "nvml/include/libpmemobj++/allocator.hpp"
#include "nvml/include/libpmemobj++/detail/persistent_ptr_base.hpp"
#include "nvml/include/libpmemobj++/make_persistent.hpp"
#include "nvml/include/libpmemobj++/p.hpp"
#include "nvml/include/libpmemobj++/persistent_ptr.hpp"
#include "nvml/include/libpmemobj++/transaction.hpp"
#include "nvml/include/libpmemobj++/utils.hpp"
namespace pfabric {
namespace nvm {
namespace nvm {
using nvml::obj::allocator;
using nvml::obj::delete_persistent;
using nvml::obj::make_persistent;
using nvml::obj::p;
using nvml::obj::persistent_ptr;
using nvml::obj::pool_by_vptr;
using nvml::obj::transaction;
using nvml::obj::allocator;
using nvml::obj::p;
/**************************************************************************//**
* \brief Info structure about the BDCC meta data.
*
* It is used in persistent tables to store the BDCC meta data and statistics.
*****************************************************************************/
class BDCCInfo {
using pColumnBitsMap = const std::vector<std::pair<uint16_t, uint16_t>, nvml::obj::allocator<std::pair<uint16_t, uint16_t>>>;
public:
using ColumnBitsMap = const std::unordered_map<uint16_t, uint16_t>; //<mapping from column id to number of bits
explicit BDCCInfo(const ColumnBitsMap &_bitMap) :
bitMap(_bitMap.cbegin(), _bitMap.cend()),
numberOfBins(std::accumulate(_bitMap.begin(), _bitMap.end(), 0,
[](const size_t sum, decltype(*_bitMap.begin()) p) { return sum + p.second; })) {}
const pColumnBitsMap::const_iterator find(uint16_t item) const {
for (auto it = bitMap.cbegin(); it != bitMap.cend(); it++) {
if (it->first == item) return it;
}
return bitMap.cend();
}
class BDCCInfo {
using DimensionUses = std::vector<std::tuple<uint16_t, uint16_t, std::bitset<32>>,
nvml::obj::allocator<std::tuple<uint16_t, uint16_t, std::bitset<32>>>>;
p<size_t> numberOfBins;
p<DimensionUses> dimensions;
const size_t numColumns() const {
return bitMap.size();
}
public:
using ColumnBitsMap = std::map<uint16_t, uint16_t>; //<mapping from column id to number of bits
const size_t numBins() const {
return numberOfBins.get_ro();
}
BDCCInfo() : numberOfBins(0), dimensions() {}
const pColumnBitsMap::const_iterator cend() const {
return bitMap.cend();
}
explicit BDCCInfo(const ColumnBitsMap &_bitMap) :
numberOfBins(std::accumulate(_bitMap.begin(), _bitMap.end(), 0,
[](const size_t sum, decltype(*_bitMap.begin()) p) {
return sum + p.second;
})),
dimensions() { deriveMasks(_bitMap); }
const auto find(uint16_t item) const {
for (auto it = dimensions.get_ro().cbegin(); it != dimensions.get_ro().cend(); it++) {
if (std::get<0>(*it) == item) return it;
}
return dimensions.get_ro().cend();
}
// private:
const pColumnBitsMap bitMap;
p<const size_t> numberOfBins;
//std::map<uint32_t, std::size_t> histogram;
};/* struct BDCCInfo */
const auto numBins() const {
return numberOfBins.get_ro();
}
const auto begin() const {
return dimensions.get_ro().cbegin();
}
const auto end() const {
return dimensions.get_ro().cend();
}
private:
void deriveMasks(ColumnBitsMap colToBits) {
/* Initialize */
for (const auto &dim: colToBits) {
dimensions.get_rw()
.emplace_back(dim.first,
dim.second,
std::bitset<32>());
}
/* Round robin the bins for mapping */
auto bdccSize = numBins();
while (bdccSize > 0) {
auto i = 0ul;
for (auto &dim: colToBits) {
if (std::get<1>(dim)-- > 0) {
std::get<2>(dimensions.get_rw()[i++])[--bdccSize] = 1;
}
}
}
}
};/* struct BDCCInfo */
}
} /* namespace pfabric::nvm */
#endif /* PTuple_hpp_ */
......@@ -19,99 +19,105 @@
* If not you can find the GPL at http://www.gnu.org/copyleft/gpl.html
*/
#ifndef NVM_Block_hpp_
#define NVM_Block_hpp_
#ifndef DataNode_hpp_
#define DataNode_hpp_
#include <array>
#include "nvml/include/libpmemobj++/make_persistent.hpp"
#include "nvml/include/libpmemobj++/persistent_ptr.hpp"
#define PLOG(msg) if(false/*DO_LOG*/) std::cout << "[PTable] " << msg << '\n';
namespace pfabric {
namespace nvm {
using nvml::obj::persistent_ptr;
using nvml::obj::make_persistent;
using nvml::obj::delete_persistent;
/** Positions in NVM_Block */
const int gDDCRangePos1 = 0;
const int gDDCRangePos2 = 4;
const int gCountPos = 8;
const int gFreeSpacePos = 12;
const int gSmaOffsetPos = 14;
const int gDataOffsetPos = 16;
/** Sizes/Lengths in NVM_Block */
const int gFixedHeaderSize = 14;
const int gDDCValueSize = 4;
const int gAttrOffsetSize = 4;
const int gOffsetSize = 2;
/** The size of a single block in persistent memory */
static constexpr uint16_t gBlockSize = 1 << 15; // 32KB
/**
* \brief This type represents a byte array used for persistent structures.
*
* A BDCC_Block is a PAX oriented data block with the following structure for 32KB:
* <ddc_range><ddc_cnt><sma_offset_0><data_offset_0> ...<sma_offset_n><data_offset_n>
* <sma_min_0><sma_max_0><data_vector_0> ... <sma_min_n><sma_max_n><data_vector_n>
* 0 ddc_range -> long (x2) - 8 Byte
* 8 ddc_cnt -> long - 4 Byte
* 12 free_space -> unsigned short
* for each attribute:
* 14 sma_offset_x -> unsigned short - 2 Byte (depends on block size)
* 16 data_offset_x -> unsigned short
* ...
*
* for each attribute (int, double):
* . sma_min_x -> size of attributes data type
* . sma_max_x -> size of attributes data type
* . data_vector -> size of attributes data type * ddc_cnt
* ...
*
* for each attribute (string - data starts at the end of the minipage):
* . sma_min_offset_x -> unsigned short
* . sma_max_offset_x -> unsigned short
* . data_offset_vector -> unsigned short * ddc_cnt
* . ...
* . data -> size of all strings + ddc_cnt (Nul termination)
*/
using BDCC_Block = typename std::array<uint8_t, gBlockSize>;
template<typename KeyType>
struct DataNode {
using KeyVector = std::array<KeyType, 8192>; // <KeyType, nvml::obj::allocator<KeyType>>;
DataNode() : next(nullptr), block(nullptr) {
keys = make_persistent<KeyVector>();
}
DataNode(BDCC_Block _block) : next(nullptr) {
block = make_persistent<BDCC_Block>(_block);
keys = make_persistent<KeyVector>();
}
persistent_ptr<struct DataNode> next;
persistent_ptr<BDCC_Block> block;
persistent_ptr<KeyVector> keys;
void clear() {
if (next) {
//delete_persistent<struct DataNode>(next);
next = nullptr;
}
if (block) {
delete_persistent<BDCC_Block>(block);
block = nullptr;
}
delete_persistent<struct DataNode>(this);
}
};
} /* end namespace nvm */
namespace nvm {
using nvml::obj::persistent_ptr;
using nvml::obj::make_persistent;
using nvml::obj::delete_persistent;
/** Positions in NVM_Block */
const int gDDCRangePos1 = 0;
const int gDDCRangePos2 = 4;
const int gCountPos = 8;
const int gFreeSpacePos = 12;
const int gSmaOffsetPos = 14;
const int gDataOffsetPos = 16;
/** Sizes/Lengths in NVM_Block */
const int gFixedHeaderSize = 14;
const int gDDCValueSize = 4;
const int gAttrOffsetSize = 4;
const int gOffsetSize = 2;
/** The size of a single block in persistent memory */
static constexpr std::size_t gBlockSize = 1 << 14; // 12->4KB, 15->32KB, max 16 due to data types
/**
* \brief This type represents a byte array used for persistent structures.
*
* A BDCC_Block is a PAX oriented data block with the following structure for 32KB:
* <ddc_range><ddc_cnt><sma_offset_0><data_offset_0> ...<sma_offset_n><data_offset_n>
* <sma_min_0><sma_max_0><data_vector_0> ... <sma_min_n><sma_max_n><data_vector_n>
* 0 ddc_range -> long (x2) - 8 Byte
* 8 ddc_cnt -> long - 4 Byte
* 12 free_space -> unsigned short
* for each attribute:
* 14 sma_offset_x -> unsigned short - 2 Byte (depends on block size)
* 16 data_offset_x -> unsigned short
* ...
*
* for each attribute (int, double):
* . sma_min_x -> size of attributes data type
* . sma_max_x -> size of attributes data type
* . data_vector -> size of attributes data type * ddc_cnt
* ...
*
* for each attribute (string - data starts at the end of the minipage):
* . sma_min_offset_x -> unsigned short
* . sma_max_offset_x -> unsigned short
* . data_offset_vector -> unsigned short * ddc_cnt
* . ...
* . data -> size of all strings + ddc_cnt (Nul termination)
*/
using BDCC_Block = typename std::array<uint8_t, gBlockSize>;
template<typename KeyType>
struct DataNode {
using KeyVector = std::array<KeyType, 8192>; // <KeyType, nvml::obj::allocator<KeyType>>;
using HistogramType = std::unordered_map<uint32_t,
std::size_t,
std::hash<uint32_t>,
std::equal_to<uint32_t>,
nvml::obj::allocator<uint32_t>>;
DataNode() : next(nullptr) {}
DataNode(BDCC_Block _block) : next(nullptr), block(_block) {}
persistent_ptr<struct DataNode> next;
p<BDCC_Block> block;
p<KeyVector> keys;
p<HistogramType> histogram;
const uint32_t calcAverageBDCC() const {
auto sum = 0u;
for(const auto &bdccValue : histogram.get_ro()) {
sum += bdccValue.first * bdccValue.second;
}
return sum / reinterpret_cast<const uint32_t &>(block.get_ro()[gCountPos]);
}
void clear() {
if (next) {
//delete_persistent<struct DataNode>(next);
next = nullptr;
}
delete_persistent<struct DataNode>(this);
}
};
} /* end namespace nvm */
} /* end namespace pfabric */
#endif /* NVM_Block_hpp_ */
#endif /* DataNode_hpp_ */
......@@ -3,6 +3,8 @@
#include <array>
#include "DataNode.hpp"
#include "nvml/include/libpmemobj++/make_persistent.hpp"
#include "nvml/include/libpmemobj++/p.hpp"
#include "nvml/include/libpmemobj++/persistent_ptr.hpp"
......@@ -12,7 +14,8 @@
#define BRANCH_PADDING 0
#define LEAF_PADDING 0
namespace pfabric { namespace nvm {
namespace pfabric {
namespace nvm {
using nvml::obj::delete_persistent;
using nvml::obj::make_persistent;
......@@ -20,8 +23,6 @@ using nvml::obj::p;
using nvml::obj::persistent_ptr;
using nvml::obj::transaction;
//const std::string LAYOUT = "PBPTree";
/**
* A persistent memory implementation of a B+ tree.
*
......@@ -36,6 +37,9 @@ class PBPTree {
static_assert(N > 2, "number of branch keys has to be >2.");
// we need at least one key on a leaf node
static_assert(M > 0, "number of leaf keys should be >0.");
// there is a bug that for odd numbers the tree sometimes breaks (TODO)
static_assert(M % 2 == 0 && N % 2 == 0, "The number of keys should be even");
#ifndef UNIT_TESTS
private:
#else
......@@ -84,15 +88,15 @@ class PBPTree {
* the caller.
*/
struct SplitInfo {
KeyType key; //< the key at which the node was split
LeafOrBranchNode leftChild; //< the resulting lhs child node
LeafOrBranchNode rightChild; //< the resulting rhs child node
KeyType key; //< the key at which the node was split
LeafOrBranchNode leftChild; //< the resulting lhs child node
LeafOrBranchNode rightChild; //< the resulting rhs child node
};
p<unsigned int> depth; //< the depth of the tree, i.e. the number of levels (0 => rootNode is LeafNode)
p<unsigned int> depth; //< the depth of the tree, i.e. the number of levels (0 => rootNode is LeafNode)
LeafOrBranchNode rootNode; //< pointer to the root node (an instance of @c LeafNode or
//< @c BranchNode). This pointer is never @c nullptr.
LeafOrBranchNode rootNode; //< pointer to the root node (an instance of @c LeafNode or
//< @c BranchNode). This pointer is never @c nullptr.
public:
/**
......@@ -154,15 +158,14 @@ class PBPTree {
*/
PBPTree() : depth(0) {
rootNode = newLeafNode();
std::cout << "sizeof(BranchNode) = " << sizeof(BranchNode)
<< ", sizeof(LeafNode) = " << sizeof(LeafNode) << std::endl;
PLOG("created new tree with sizeof(BranchNode) = "
<< sizeof(BranchNode) << ", sizeof(LeafNode) = " << sizeof(LeafNode));
}
/**
* Destructor for the B+ tree. Should delete all allocated nodes.
*/
~PBPTree() {
//TODO: Necessary in Pmem case?
// Nodes are deleted automatically by releasing leafPool and branchPool.
}
......@@ -327,7 +330,7 @@ class PBPTree {
bool eraseFromLeafNode(persistent_ptr<LeafNode> node, const KeyType &key) {
bool deleted = false;
auto pos = lookupPositionInLeafNode(node, key);
if (node->keys.get_rw()[pos] == key) {
if (node->keys.get_ro()[pos] == key) {
for (auto i = pos; i < node->numKeys - 1; i++) {
node->keys.get_rw()[i] = node->keys.get_ro()[i + 1];
node->values.get_rw()[i] = node->values.get_ro()[i + 1];
......@@ -358,7 +361,7 @@ class PBPTree {
if (pos > 0 && leaf->prevLeaf->numKeys > middle) {
// we have a sibling at the left for rebalancing the keys
balanceLeafNodes(leaf->prevLeaf, leaf);
node->keys.get_rw()[pos] = leaf->keys.get_ro()[0];
node->keys.get_rw()[pos-1] = leaf->keys.get_ro()[0];
} else if (pos < node->numKeys && leaf->nextLeaf->numKeys > middle) {
// we have a sibling at the right for rebalancing the keys
balanceLeafNodes(leaf->nextLeaf, leaf);
......@@ -489,10 +492,9 @@ class PBPTree {
bool deleted = false;
// try to find the branch
auto pos = lookupPositionInBranchNode(node, key);
auto n = node->children.get_ro()[pos];
if (d == 1) {
// the next level is the leaf level
auto leaf = n.leaf;
auto leaf = node->children.get_ro()[pos].leaf;
assert(leaf != nullptr);
deleted = eraseFromLeafNode(leaf, key);
unsigned int middle = (M + 1) / 2;
......@@ -501,7 +503,7 @@ class PBPTree {
underflowAtLeafLevel(node, pos, leaf);
}
} else {
auto child = n.branch;
auto child = node->children.get_ro()[pos].branch;
deleted = eraseFromBranchNode(child, d - 1, key);
pos = lookupPositionInBranchNode(node, key);
......@@ -800,6 +802,10 @@ class PBPTree {
insertInLeafNodeAtPosition(sibling, pos - middle, key, val);
// setup the list of leaf nodes
if(node->nextLeaf != nullptr) {
sibling->nextLeaf = node->nextLeaf;
node->nextLeaf->prevLeaf = sibling;
}
node->nextLeaf = sibling;
sibling->prevLeaf = node;
......@@ -1047,22 +1053,14 @@ class PBPTree {
/**
* Constructor for creating a new empty leaf node.
*/
LeafNode() : numKeys(0), nextLeaf(nullptr), prevLeaf(nullptr) {
/*auto pop = nvml::obj::pool_by_vptr(this);
transaction::exec_tx(pop, [&] {
keys = make_persistent<std::array<KeyType, M>>();
values = make_persistent<std::array<ValueType, M>>();
});*/
}
// ~LeafNode() { std::cout << "~LeafNode: " << std::hex << this <<
// std::endl; }
p<unsigned int> numKeys; //< the number of currently stored keys
p<std::array<KeyType, M>> keys; //< the actual keys
p<std::array<ValueType, M>> values; //< the actual values
persistent_ptr<LeafNode> nextLeaf; //< pointer to the subsequent sibling
persistent_ptr<LeafNode> prevLeaf; //< pointer to the preceeding sibling
p<unsigned char> pad_[LEAF_PADDING]; //<
LeafNode() : numKeys(0), nextLeaf(nullptr), prevLeaf(nullptr) {}
p<unsigned int> numKeys; //< the number of currently stored keys
p<std::array<KeyType, M>> keys; //< the actual keys
p<std::array<ValueType, M>> values; //< the actual values
persistent_ptr<LeafNode> nextLeaf; //< pointer to the subsequent sibling
persistent_ptr<LeafNode> prevLeaf; //< pointer to the preceeding sibling
p<unsigned char> pad_[LEAF_PADDING]; //<
};
/**
......@@ -1072,21 +1070,12 @@ class PBPTree {
/**
* Constructor for creating a new empty branch node.
*/
BranchNode() : numKeys(0) {
/*auto pop = nvml::obj::pool_by_vptr(this);
transaction::exec_tx(pop, [&] {
keys = make_persistent<std::array<KeyType, N>>();
children = make_persistent<std::array<LeafOrBranchNode, N+1>>();
});*/
}
// ~BranchNode() { std::cout << "~BranchNode: " << std::hex << this << std::dec <<
// std::endl; }
p<unsigned int> numKeys; //< the number of currently stored keys
p<std::array<KeyType, N>> keys; //< the actual keys
p<std::array<LeafOrBranchNode, N + 1>>
children; //< pointers to child nodes (BranchNode or LeafNode)
p<unsigned char> pad_[BRANCH_PADDING]; //<
BranchNode() : numKeys(0) {}
p<unsigned int> numKeys; //< the number of currently stored keys
p<std::array<KeyType, N>> keys; //< the actual keys
p<std::array<LeafOrBranchNode, N + 1>> children; //< pointers to child nodes (BranchNode or LeafNode)
p<unsigned char> pad_[BRANCH_PADDING]; //<
};
}; /* end class PBPTree */
......
This diff is collapsed.
......@@ -51,7 +51,7 @@ namespace detail {
* \tparam ID
* the index of the requested attribute
*****************************************************************************/
template<typename T, std::size_t ID>
template<typename T, std::size_t ID, typename KeyType>
struct get_helper;
/**************************************************************************//**
......@@ -62,12 +62,12 @@ struct get_helper;
* \tparam ID
* the index of the requested attribute
*****************************************************************************/
template<typename T, std::size_t ID>
template<typename T, std::size_t ID, typename KeyType>
struct get_helper {
static T apply(persistent_ptr<BDCC_Block> block, const uint16_t *offsets) {
static T apply(persistent_ptr<DataNode<KeyType>> node, const uint16_t *offsets) {
T val;
uint8_t* ptr = reinterpret_cast<uint8_t*>(&val);
std::copy(block->begin() + offsets[ID], block->begin() + offsets[ID] + sizeof(T), ptr);
std::copy(node->block.get_ro().begin() + offsets[ID], node->block.get_ro().begin() + offsets[ID] + sizeof(T), ptr);
return val;
}
};
......@@ -80,10 +80,10 @@ struct get_helper {
* \tparam ID
* the index of the requested attribute
*****************************************************************************/
template<std::size_t ID>
struct get_helper<std::string, ID> {
static std::string apply(persistent_ptr<BDCC_Block> block, const uint16_t *offsets) {
return reinterpret_cast<char (&)[]>(block->at(offsets[ID]));
template<std::size_t ID, typename KeyType>
struct get_helper<std::string, ID, KeyType> {
static std::string apply(persistent_ptr<DataNode<KeyType>> node, const uint16_t *offsets) {
return reinterpret_cast<const char (&)[]>(node->block.get_ro()[offsets[ID]]);
}
};
......@@ -95,10 +95,10 @@ struct get_helper<std::string, ID> {
* \tparam ID
* the index of the requested attribute
*****************************************************************************/
template<std::size_t ID>
struct get_helper<int, ID> {