// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include <cstdint> #include <memory> #include <type_traits> #include <vector> #include "arrow/array.h" #include "arrow/array/builder_binary.h" #include "arrow/array/builder_primitive.h" #include "arrow/array/builder_time.h" #include "arrow/buffer.h" #include "arrow/testing/gtest_util.h" #include "arrow/type_fwd.h" #include "arrow/util/bit_util.h" #include "arrow/visit_type_inline.h" namespace arrow { // ArrayFromVector: construct an Array from vectors of C values template <typename TYPE, typename C_TYPE = typename TYPE::c_type> void ArrayFromVector(const std::shared_ptr<DataType>& type, const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) { auto type_id = TYPE::type_id; ASSERT_EQ(type_id, type->id()) << "template parameter and concrete DataType instance don't agree"; std::unique_ptr<ArrayBuilder> builder_ptr; ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr)); // Get the concrete builder class to access its Append() specializations auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr); for (size_t i = 0; i < values.size(); ++i) { if (is_valid[i]) { ASSERT_OK(builder.Append(values[i])); } else { ASSERT_OK(builder.AppendNull()); } } ASSERT_OK(builder.Finish(out)); } template <typename TYPE, typename C_TYPE = typename TYPE::c_type> void ArrayFromVector(const std::shared_ptr<DataType>& type, const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) { auto type_id = TYPE::type_id; ASSERT_EQ(type_id, type->id()) << "template parameter and concrete DataType instance don't agree"; std::unique_ptr<ArrayBuilder> builder_ptr; ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr)); // Get the concrete builder class to access its Append() specializations auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr); for (size_t i = 0; i < values.size(); ++i) { ASSERT_OK(builder.Append(values[i])); } ASSERT_OK(builder.Finish(out)); } // Overloads without a DataType argument, for parameterless types template <typename TYPE, typename C_TYPE = typename TYPE::c_type> void ArrayFromVector(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) { auto type = TypeTraits<TYPE>::type_singleton(); ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out); } template <typename TYPE, typename C_TYPE = typename TYPE::c_type> void ArrayFromVector(const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) { auto type = TypeTraits<TYPE>::type_singleton(); ArrayFromVector<TYPE, C_TYPE>(type, values, out); } // ChunkedArrayFromVector: construct a ChunkedArray from vectors of C values template <typename TYPE, typename C_TYPE = typename TYPE::c_type> void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type, const std::vector<std::vector<bool>>& is_valid, const std::vector<std::vector<C_TYPE>>& values, std::shared_ptr<ChunkedArray>* out) { ArrayVector chunks; ASSERT_EQ(is_valid.size(), values.size()); for (size_t i = 0; i < values.size(); ++i) { std::shared_ptr<Array> array; ArrayFromVector<TYPE, C_TYPE>(type, is_valid[i], values[i], &array); chunks.push_back(array); } *out = std::make_shared<ChunkedArray>(chunks); } template <typename TYPE, typename C_TYPE = typename TYPE::c_type> void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type, const std::vector<std::vector<C_TYPE>>& values, std::shared_ptr<ChunkedArray>* out) { ArrayVector chunks; for (size_t i = 0; i < values.size(); ++i) { std::shared_ptr<Array> array; ArrayFromVector<TYPE, C_TYPE>(type, values[i], &array); chunks.push_back(array); } *out = std::make_shared<ChunkedArray>(chunks); } // Overloads without a DataType argument, for parameterless types template <typename TYPE, typename C_TYPE = typename TYPE::c_type> void ChunkedArrayFromVector(const std::vector<std::vector<bool>>& is_valid, const std::vector<std::vector<C_TYPE>>& values, std::shared_ptr<ChunkedArray>* out) { auto type = TypeTraits<TYPE>::type_singleton(); ChunkedArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out); } template <typename TYPE, typename C_TYPE = typename TYPE::c_type> void ChunkedArrayFromVector(const std::vector<std::vector<C_TYPE>>& values, std::shared_ptr<ChunkedArray>* out) { auto type = TypeTraits<TYPE>::type_singleton(); ChunkedArrayFromVector<TYPE, C_TYPE>(type, values, out); } template <typename BuilderType> void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) { ASSERT_OK_AND_ASSIGN(*out, builder->Finish()); AssertZeroPadded(**out); TestInitialized(**out); } template <class T, class Builder> Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values, int64_t size, Builder* builder, std::shared_ptr<Array>* out) { // Append the first 1000 for (int64_t i = 0; i < size; ++i) { if (valid_bytes[i] > 0) { RETURN_NOT_OK(builder->Append(values[i])); } else { RETURN_NOT_OK(builder->AppendNull()); } } return builder->Finish(out); } template <typename Fn> struct VisitBuilder { template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType, // need to let SFINAE drop this Visit when it would result in // [](NullBuilder*){}(double_builder) typename = decltype(std::declval<Fn>()(std::declval<BuilderType*>()))> Status Visit(const T&, ArrayBuilder* builder, Fn&& fn) { fn(internal::checked_cast<BuilderType*>(builder)); return Status::OK(); } Status Visit(const DataType& t, ArrayBuilder* builder, Fn&& fn) { return Status::NotImplemented("visiting builders of type ", t); } }; template <typename Fn> Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor( const std::shared_ptr<DataType>& type, int64_t initial_capacity, int64_t visitor_repetitions, Fn&& fn) { std::unique_ptr<ArrayBuilder> builder; RETURN_NOT_OK(MakeBuilder(default_memory_pool(), type, &builder)); if (initial_capacity != 0) { RETURN_NOT_OK(builder->Resize(initial_capacity)); } VisitBuilder<Fn> visitor; for (int64_t i = 0; i < visitor_repetitions; ++i) { RETURN_NOT_OK( VisitTypeInline(*builder->type(), &visitor, builder.get(), std::forward<Fn>(fn))); } std::shared_ptr<Array> out; RETURN_NOT_OK(builder->Finish(&out)); return out; } template <typename Fn> Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor( const std::shared_ptr<DataType>& type, int64_t length, Fn&& fn) { return ArrayFromBuilderVisitor(type, length, length, std::forward<Fn>(fn)); } template <typename T> static inline Status GetBitmapFromVector(const std::vector<T>& is_valid, std::shared_ptr<Buffer>* result) { size_t length = is_valid.size(); ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length)); uint8_t* bitmap = buffer->mutable_data(); for (size_t i = 0; i < static_cast<size_t>(length); ++i) { if (is_valid[i]) { bit_util::SetBit(bitmap, i); } } *result = buffer; return Status::OK(); } template <typename T> inline void BitmapFromVector(const std::vector<T>& is_valid, std::shared_ptr<Buffer>* out) { ASSERT_OK(GetBitmapFromVector(is_valid, out)); } } // namespace arrow
Memory