Commit 0d78c26c authored by Alexander Baumstark's avatar Alexander Baumstark
Browse files

Fix build

parent 2d1c3eb3
cmake_minimum_required(VERSION 3.16)
project(arrow)
MESSAGE(STATUS "Arrow root = " ${ARROW_ROOT})
set(ARROW_LIBS libarrow.so)
include_directories(${ARROW_INCLIDE_DIRS})
link_directories(${ARROW_LINK_DIRS})
find_package(Arrow REQUIRED)
set(CMAKE_CXX_STANDARD 17)
......@@ -14,16 +10,8 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/lib)
add_library(cs_lib
lib/core/Core.cpp
lib/core/Core.h
lib/column/GenericColumnCursor.cpp
lib/column/GenericColumnCursor.h
lib/column/BaseColumnCursor.cpp
lib/column/BaseColumnCursor.h
lib/column/ChunkedColumnCursor.cpp
lib/column/ChunkedColumnCursor.h
lib/column/ColumnBuilder.cpp
lib/column/ColumnBuilder.h
lib/table/TableCursor.cpp
lib/table/TableCursor.h
lib/table/Table.cpp
lib/table/Table.h
lib/database/Database.cpp
......@@ -35,11 +23,11 @@ target_include_directories (cs_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
add_executable(arrow main.cpp)
target_link_libraries(arrow LINK_PUBLIC cs_lib ${ARROW_LIBS} ${CMAKE_THREAD_LIBS_INIT})
target_link_libraries(arrow LINK_PUBLIC cs_lib arrow_shared ${CMAKE_THREAD_LIBS_INIT})
add_executable(bench bench/db_bench.cpp)
target_link_libraries(bench ${ARROW_LIBS} ${CMAKE_THREAD_LIBS_INIT})
target_link_libraries(bench arrow_shared ${CMAKE_THREAD_LIBS_INIT})
Include(FetchContent)
......@@ -51,4 +39,4 @@ FetchContent_Declare(
FetchContent_MakeAvailable(Catch2)
add_executable(tests test/database_test.cpp test/index_test.cpp)
target_link_libraries(tests cs_lib ${ARROW_LIBS} ${CMAKE_THREAD_LIBS_INIT} Catch2::Catch2 )
target_link_libraries(tests cs_lib arrow_shared ${CMAKE_THREAD_LIBS_INIT} Catch2::Catch2 )
#include "BaseColumnCursor.h"
#include "ChunkedColumnCursor.h"
namespace db {
template<typename T>
BaseColumnCursor<T>::BaseColumnCursor(TableCursor &table_cursor)
: GenericColumnCursor(table_cursor) {}
template<typename T>
std::shared_ptr<BaseColumnCursor<T>>
BaseColumnCursor<T>::makeCursor(
std::shared_ptr<arrow::ChunkedArray> column, ColumnEncoding encoding, TableCursor &table_cursor)
{
switch (encoding) {
case db::ColumnEncoding::PLAIN: {
return std::make_shared<ChunkedColumnCursor<T>>(column, table_cursor);
}
case db::ColumnEncoding::DICT: {
//return std::make_shared<ChunkedDictColumnCursor<T>>(column, table_cursor);
}
default:
return std::make_shared<ChunkedColumnCursor<T>>(column, table_cursor);
}
}
}
template class db::BaseColumnCursor<db::IntType>;
template class db::BaseColumnCursor<db::StringType>;
\ No newline at end of file
#ifndef ARROW_BASECOLUMNCURSOR_H
#define ARROW_BASECOLUMNCURSOR_H
#include <memory>
#include <arrow/api.h>
#include "../core/Core.h"
#include "GenericColumnCursor.h"
namespace db {
class TableCursor;
template<typename T>
class BaseColumnCursor : public GenericColumnCursor {
public:
explicit BaseColumnCursor(TableCursor &table_cursor);
static std::shared_ptr<BaseColumnCursor<T>> makeCursor(
std::shared_ptr<arrow::ChunkedArray> column, ColumnEncoding encoding, TableCursor &table_cursor);
/**
* Get value at current position.
* @return
*/
virtual typename T::ElementType get() = 0;
protected:
/**
* Seek to the given position.
* @param to zero-based ordinal position of element in column
* @return True if successful.
*/
virtual bool seek(uint64_t to) = 0;
/**
* Will next() produce another element?
* @return
*/
virtual bool hasMore() = 0;
/**
* Move to the next element.
* @return True if an element is available, false otherwise (end of column.)
*/
virtual bool next() = 0;
};
};
#endif //ARROW_BASECOLUMNCURSOR_H
#include <iostream>
#include "ChunkedColumnCursor.h"
#include "../core/Core.h"
namespace db {
template<typename T>
ChunkedColumnCursor<T>::ChunkedColumnCursor(std::shared_ptr<arrow::ChunkedArray> column, TableCursor &table_cursor)
: BaseColumnCursor<T>(table_cursor), _column(std::move(column)) {
// std::cout << "Cursor: [" << _column->data()->num_chunks() << "]" << std::endl;
reset();
}
template<typename T>
bool
ChunkedColumnCursor<T>::hasMore() {
return (_pos + 1) < _column->length();
}
template<typename T>
bool
ChunkedColumnCursor<T>::next() {
if ((_pos + 1) < _column->length()) {
_pos++;
_pos_in_chunk++;
// may have hit the end of the current chunk
if (_pos_in_chunk >= _current_chunk->length()) {
// invariant: if this could fail (we are ignoring the return) it would have been caught above
// TODO: still check the invariant as it's cheap
advance_chunk();
}
return true;
} else {
return false;
}
}
template<typename T>
bool
ChunkedColumnCursor<T>::isNull() {
seek(this->get_pos());
return _current_chunk->IsNull(_pos_in_chunk);
}
template<typename T>
typename T::ElementType
ChunkedColumnCursor<T>::get() {
seek(this->get_pos());
return _current_chunk->Value(_pos_in_chunk);
}
template<>
typename db::StringType::ElementType
ChunkedColumnCursor<db::StringType>::get() {
seek(this->get_pos());
return _current_chunk->GetString(_pos_in_chunk);
}
template<typename T>
void
ChunkedColumnCursor<T>::reset() {
_pos = 0;
_chunk = 0;
_pos_in_chunk = 0;
_current_chunk =
std::static_pointer_cast<typename T::ArrayType>(_column->chunk(_chunk));
// TODO: this may fail if the column is empty
}
template<typename T>
bool
ChunkedColumnCursor<T>::seek(uint64_t to) {
// the key idea here is to avoid touching the memory of the intervening chunks completely
int64_t distance = to - _pos;
while (_pos_in_chunk + distance >= _current_chunk->length()) {
int64_t advancing = _current_chunk->length() - _pos_in_chunk;
distance -= advancing;
if (!advance_chunk()) return false;
_pos += advancing;
}
// invariant: there's enough data since the loop exited and advance_chunk() returned true
_pos += distance;
_pos_in_chunk += distance;
//std::cout << to << " << " << _pos << " , " << _pos_in_chunk << " >>" << std::endl;
return true;
}
template<typename T>
bool
ChunkedColumnCursor<T>::advance_chunk() {
if ((_chunk + 1) < _column->num_chunks()) {
_chunk++;
_pos_in_chunk = 0;
_current_chunk =
std::static_pointer_cast<typename T::ArrayType>(_column->chunk(_chunk));
return true;
} else {
return false;
}
}
};
template class db::ChunkedColumnCursor<db::IntType>;
template class db::ChunkedColumnCursor<db::StringType>;
\ No newline at end of file
#ifndef ARROW_CHUNKEDCOLUMNCURSOR_H
#define ARROW_CHUNKEDCOLUMNCURSOR_H
#include <arrow/table.h>
#include "BaseColumnCursor.h"
namespace db {
/**
* A simple column cursor implemented on top of a possibly chunked Arrow column, the hides the
* chunking to present a simpel column structure. This is not directly used for executing queries.
*
* @tparam T The underlying Arrow array type:: for example, arrow::Int64Array.
*/
template<typename T>
class ChunkedColumnCursor : public BaseColumnCursor<T> {
public:
/**
* Create from a column -- initially positioned at first element, if any.
* @param column
*/
explicit ChunkedColumnCursor(std::shared_ptr<arrow::ChunkedArray> column, TableCursor &table_cursor);
/**
* Will next() produce another element?
* @return
*/
bool hasMore();
/**
* Move to the next element.
* @return True if an element is available, false otherwise (end of column.)
*/
bool next();
/**
* Is the element at the current position null?
* @return
*/
bool isNull();
/**
* Get value at current position.
* @return
*/
typename T::ElementType get();
/**
* Reset to the first element, if any.
*/
void reset();
/**
* Seek to the given position.
* @param to zero-based ordinal position of element in column
* @return True if successful.
*/
bool seek(uint64_t to);
protected:
/**
* Advance to the next chunk in the column's chunk sequence, when the values
* in the current chunk have been exhausted.
* @return True if successful, false if the current chunk was the last.
*/
bool advance_chunk();
private:
/**
* The underlying column
*/
std::shared_ptr<arrow::ChunkedArray> _column;
/**
* The current chunk of the underlying column
*/
std::shared_ptr<typename T::ArrayType> _current_chunk;
/**
* Offset of current chunk inthe sequence of chunks
*/
int32_t _chunk = 0;
/**
* Offset within the current chunk
*/
int64_t _pos_in_chunk = 0;
/**
* Position within the (logical) column.
*/
int64_t _pos = 0;
};
};
#endif //ARROW_CHUNKEDCOLUMNCURSOR_H
#include "GenericColumnCursor.h"
#include "../table/TableCursor.h"
using namespace db;
using namespace db;
GenericColumnCursor::GenericColumnCursor(TableCursor &table_cursor)
: _table_cursor(table_cursor)
{
}
int
GenericColumnCursor::get_pos()
{
return _table_cursor.get_pos();
}
\ No newline at end of file
#ifndef ARROW_GENERICCOLUMNCURSOR_H
#define ARROW_GENERICCOLUMNCURSOR_H
namespace db {
class TableCursor;
/**
* Access to columns, controlled by a TableCursor. Obtain one of these by
* calling getColumn on your outermost TableCursor, and use that TableCursor's hasMore()
* method to iterate. TO get data out of one of these, cast it to the right kind of
* ColumnCursorWrapper and call get().
*/
class GenericColumnCursor {
friend class ScanTableCursor;
public:
virtual ~GenericColumnCursor() = default;
/**
* Is the element at the current position null?
* @return
*/
virtual bool isNull() = 0;
protected:
explicit GenericColumnCursor(TableCursor &table_cursor);
/**
* Reset to the first element, if any.
*/
virtual void reset() = 0;
int get_pos();
private:
TableCursor &_table_cursor;
};
};
#endif //ARROW_GENERICCOLUMNCURSOR_H
......@@ -66,7 +66,7 @@ namespace db {
std::shared_ptr<DataType> int_type();
// Methode zum erzeugen eines Double Typs
std::shared_ptr<DataType> double_type();
// TODO:
// Virtuelle Basisklasse zum erzeugen eines Values
class GenValue {
......
//
// Created by alex on 20.04.21.
//
#include "TableCursor.h"
#ifndef ARROW_TABLECURSOR_H
#define ARROW_TABLECURSOR_H
#include <cstdint>
#include <memory>
#include <arrow/api.h>
#include "../core/Core.h"
namespace db {
class TableCursor {
friend class BaseColumnCursor;
friend class GenericColumnCursor;
private:
public:
virtual ~TableCursor() = default;
protected:
virtual int get_pos() const = 0;
};
}
#endif //ARROW_TABLECURSOR_H
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment