Merge pull request #171874 from cpcloud/arrow-cpp-8.0

main
Sandro 2 years ago committed by GitHub
commit f76fa41ae6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 35
      pkgs/development/libraries/arrow-cpp/default.nix
  2. 10
      pkgs/development/python-modules/apache-beam/default.nix
  3. 18
      pkgs/development/python-modules/db-dtypes/default.nix
  4. 7
      pkgs/development/python-modules/google-cloud-bigquery/default.nix
  5. 14
      pkgs/development/python-modules/pyarrow/default.nix

@ -19,6 +19,7 @@
, grpc , grpc
, gtest , gtest
, jemalloc , jemalloc
, libbacktrace
, lz4 , lz4
, minio , minio
, ninja , ninja
@ -69,21 +70,20 @@ let
in in
stdenv.mkDerivation rec { stdenv.mkDerivation rec {
pname = "arrow-cpp"; pname = "arrow-cpp";
version = "7.0.0"; version = "8.0.0";
src = fetchurl { src = fetchurl {
url = url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
"mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz"; hash = "sha256-rZoFcFEXyYnBFrrprHBJL+AVBQ4bgPsOOP3ktdhjqqM=";
hash = "sha256-6PSbFJoV7O9OQPz6sbh8ETxrHuGGAFwWnlzfldMamd4=";
}; };
sourceRoot = "apache-arrow-${version}/cpp"; sourceRoot = "apache-arrow-${version}/cpp";
${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = jemalloc.src; ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = jemalloc.src;
# versions are all taken from
# https://github.com/apache/arrow/blob/apache-arrow-8.0.0/cpp/thirdparty/versions.txt
ARROW_MIMALLOC_URL = fetchFromGitHub { ARROW_MIMALLOC_URL = fetchFromGitHub {
# From
# ./cpp/cmake_modules/ThirdpartyToolchain.cmake
# ./cpp/thirdparty/versions.txt
owner = "microsoft"; owner = "microsoft";
repo = "mimalloc"; repo = "mimalloc";
rev = "v1.7.3"; rev = "v1.7.3";
@ -93,8 +93,15 @@ stdenv.mkDerivation rec {
ARROW_XSIMD_URL = fetchFromGitHub { ARROW_XSIMD_URL = fetchFromGitHub {
owner = "xtensor-stack"; owner = "xtensor-stack";
repo = "xsimd"; repo = "xsimd";
rev = "aeec9c872c8b475dedd7781336710f2dd2666cb2"; rev = "7d1778c3b38d63db7cec7145d939f40bc5d859d1";
hash = "sha256-vWKdJkieKhaxyAJhijXUmD7NmNvMWd79PskQojulA1w="; hash = "sha256-89AysBUVnTdWyMPazeJegnQ6WEH90Ns7qQInZLMSXY4=";
};
ARROW_SUBSTRAIT_URL = fetchFromGitHub {
owner = "substrait-io";
repo = "substrait";
rev = "e1b4c04a1b518912f4c4065b16a1b2c0ac8e14cf";
hash = "sha256-56FSjDngsROSHLjMv+OYAIYqphEu3GzgIMHbgh/ZQw0=";
}; };
patches = [ patches = [
@ -115,7 +122,10 @@ stdenv.mkDerivation rec {
gflags gflags
glog glog
gtest gtest
libbacktrace
lz4 lz4
nlohmann_json # alternative JSON parser to rapidjson
protobuf # substrait requires protobuf
rapidjson rapidjson
re2 re2
snappy snappy
@ -150,6 +160,9 @@ stdenv.mkDerivation rec {
"-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}" "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
"-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}" "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
"-DARROW_BUILD_TESTS=ON" "-DARROW_BUILD_TESTS=ON"
"-DARROW_BUILD_INTEGRATION=ON"
"-DARROW_BUILD_UTILITIES=ON"
"-DARROW_EXTRA_ERROR_CONTEXT=ON"
"-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
"-DARROW_DEPENDENCY_SOURCE=SYSTEM" "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
"-DThrift_SOURCE=AUTO" # search for Thrift using pkg-config (ThriftConfig.cmake requires OpenSSL and libevent) "-DThrift_SOURCE=AUTO" # search for Thrift using pkg-config (ThriftConfig.cmake requires OpenSSL and libevent)
@ -168,8 +181,10 @@ stdenv.mkDerivation rec {
# Disable Python for static mode because openblas is currently broken there. # Disable Python for static mode because openblas is currently broken there.
"-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}" "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
"-DARROW_USE_GLOG=ON" "-DARROW_USE_GLOG=ON"
"-DARROW_WITH_BACKTRACE=ON"
"-DARROW_WITH_BROTLI=ON" "-DARROW_WITH_BROTLI=ON"
"-DARROW_WITH_LZ4=ON" "-DARROW_WITH_LZ4=ON"
"-DARROW_WITH_NLOHMANN_JSON=ON"
"-DARROW_WITH_SNAPPY=ON" "-DARROW_WITH_SNAPPY=ON"
"-DARROW_WITH_UTF8PROC=ON" "-DARROW_WITH_UTF8PROC=ON"
"-DARROW_WITH_ZLIB=ON" "-DARROW_WITH_ZLIB=ON"
@ -177,8 +192,10 @@ stdenv.mkDerivation rec {
"-DARROW_MIMALLOC=ON" "-DARROW_MIMALLOC=ON"
# Parquet options: # Parquet options:
"-DARROW_PARQUET=ON" "-DARROW_PARQUET=ON"
"-DARROW_SUBSTRAIT=ON"
"-DPARQUET_BUILD_EXECUTABLES=ON" "-DPARQUET_BUILD_EXECUTABLES=ON"
"-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}" "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
"-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
"-DARROW_S3=${if enableS3 then "ON" else "OFF"}" "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
"-DARROW_GCS=${if enableGcs then "ON" else "OFF"}" "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
] ++ lib.optionals (!enableShared) [ ] ++ lib.optionals (!enableShared) [

@ -5,6 +5,7 @@
, dill , dill
, fastavro , fastavro
, fetchFromGitHub , fetchFromGitHub
, fetchpatch
, freezegun , freezegun
, grpcio , grpcio
, grpcio-tools , grpcio-tools
@ -51,6 +52,15 @@ buildPythonPackage rec {
sha256 = "sha256-FmfTxRLqXUHhhAZIxCRx2+phX0bmU5rIHaftBU4yBJY="; sha256 = "sha256-FmfTxRLqXUHhhAZIxCRx2+phX0bmU5rIHaftBU4yBJY=";
}; };
patches = [
# patch in the pyarrow.Table.to_batches(max_chunksize=...) argument fix
(fetchpatch {
url = "https://github.com/apache/beam/commit/2418a14ee99ff490d1c82944043f97f37ec97a85.patch";
sha256 = "sha256-G8ARBBf7nmF46P2ncnlteGFnPWq5iCqZDfuaosre9jY=";
stripLen = 2;
})
];
# See https://github.com/NixOS/nixpkgs/issues/156957. # See https://github.com/NixOS/nixpkgs/issues/156957.
postPatch = '' postPatch = ''
substituteInPlace setup.py \ substituteInPlace setup.py \

@ -1,6 +1,7 @@
{ lib { lib
, buildPythonPackage , buildPythonPackage
, fetchPypi , fetchpatch
, fetchFromGitHub
, numpy , numpy
, packaging , packaging
, pandas , pandas
@ -12,11 +13,20 @@ buildPythonPackage rec {
pname = "db-dtypes"; pname = "db-dtypes";
version = "1.0.0"; version = "1.0.0";
src = fetchPypi { src = fetchFromGitHub {
inherit pname version; owner = "googleapis";
sha256 = "3070d1a8d86ff0b5d9b16f15c5fab9c18893c6b3d5723cd95ee397b169049454"; repo = "python-db-dtypes-pandas";
rev = "v${version}";
hash = "sha256-7u/E0ICiz7LQfuplm/mkGlWrgGEPqeMwM3CUhfH6868=";
}; };
patches = [
(fetchpatch {
url = "https://github.com/googleapis/python-db-dtypes-pandas/commit/fb30adfd427d3df9919df00b096210ba1eb1b91d.patch";
sha256 = "sha256-39kZtYGbn3U1WXiDTczki5EM6SjUlSRXz8UMcdTU20g=";
})
];
propagatedBuildInputs = [ propagatedBuildInputs = [
numpy numpy
packaging packaging

@ -16,6 +16,7 @@
, proto-plus , proto-plus
, psutil , psutil
, pyarrow , pyarrow
, pytest-xdist
}: }:
buildPythonPackage rec { buildPythonPackage rec {
@ -28,6 +29,11 @@ buildPythonPackage rec {
sha256 = "sha256-UmW6BEV44Ucdg/hUGSQk/kyDnB+Hsyx4q3AXTQe89hI="; sha256 = "sha256-UmW6BEV44Ucdg/hUGSQk/kyDnB+Hsyx4q3AXTQe89hI=";
}; };
postPatch = ''
substituteInPlace setup.py \
--replace 'pyarrow >= 3.0.0, < 8.0dev' 'pyarrow >= 3.0.0, < 9.0dev'
'';
propagatedBuildInputs = [ propagatedBuildInputs = [
google-cloud-core google-cloud-core
google-cloud-bigquery-storage google-cloud-bigquery-storage
@ -47,6 +53,7 @@ buildPythonPackage rec {
google-cloud-datacatalog google-cloud-datacatalog
google-cloud-storage google-cloud-storage
pytestCheckHook pytestCheckHook
pytest-xdist
]; ];
# prevent google directory from shadowing google imports # prevent google directory from shadowing google imports

@ -47,8 +47,10 @@ buildPythonPackage rec {
PYARROW_WITH_DATASET = zero_or_one true; PYARROW_WITH_DATASET = zero_or_one true;
PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight; PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight;
PYARROW_WITH_PARQUET = zero_or_one true;
PYARROW_WITH_HDFS = zero_or_one true; PYARROW_WITH_HDFS = zero_or_one true;
PYARROW_WITH_PARQUET = zero_or_one true;
PYARROW_WITH_PLASMA = zero_or_one (!stdenv.isDarwin);
PYARROW_WITH_S3 = zero_or_one _arrow-cpp.enableS3;
PYARROW_CMAKE_OPTIONS = [ PYARROW_CMAKE_OPTIONS = [
"-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
@ -73,6 +75,11 @@ buildPythonPackage rec {
# enabled in nixpkgs. # enabled in nixpkgs.
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
"--deselect=pyarrow/tests/test_memory.py::test_env_var" "--deselect=pyarrow/tests/test_memory.py::test_env_var"
# these tests require access to s3 via the internet
"--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
"--deselect=pyarrow/tests/test_fs.py::test_s3_options"
] ++ lib.optionals stdenv.isDarwin [ ] ++ lib.optionals stdenv.isDarwin [
# Requires loopback networking # Requires loopback networking
"--deselect=pyarrow/tests/test_ipc.py::test_socket_" "--deselect=pyarrow/tests/test_ipc.py::test_socket_"
@ -84,16 +91,17 @@ buildPythonPackage rec {
rm -r pyarrow/!(tests) rm -r pyarrow/!(tests)
''; '';
pythonImportsCheck = map (module: "pyarrow.${module}") [ pythonImportsCheck = [ "pyarrow" ] ++ map (module: "pyarrow.${module}") ([
"compute" "compute"
"csv" "csv"
"dataset" "dataset"
"feather"
"flight" "flight"
"fs" "fs"
"hdfs" "hdfs"
"json" "json"
"parquet" "parquet"
]; ] ++ lib.optionals (!stdenv.isDarwin) [ "plasma" ]);
meta = with lib; { meta = with lib; {
description = "A cross-language development platform for in-memory data"; description = "A cross-language development platform for in-memory data";

Loading…
Cancel
Save