hadoop: add passthrough tests

main
illustris 2 years ago
parent c82d48913f
commit 799dc66cf1
  1. 6
      nixos/tests/all-tests.nix
  2. 7
      nixos/tests/hadoop/default.nix
  3. 240
      nixos/tests/hadoop/hadoop.nix
  4. 12
      nixos/tests/hadoop/hdfs.nix
  5. 43
      nixos/tests/hadoop/yarn.nix
  6. 55
      pkgs/applications/networking/cluster/hadoop/default.nix

@ -189,9 +189,9 @@ in
grocy = handleTest ./grocy.nix {};
grub = handleTest ./grub.nix {};
gvisor = handleTest ./gvisor.nix {};
hadoop.all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/hadoop.nix {};
hadoop.hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/hdfs.nix {};
hadoop.yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/yarn.nix {};
hadoop = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop; };
hadoop_3_2 = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop_3_2; };
hadoop2 = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop2; };
haka = handleTest ./haka.nix {};
haproxy = handleTest ./haproxy.nix {};
hardened = handleTest ./hardened.nix {};

@ -0,0 +1,7 @@
{ handleTestOn, package, ... }:
{
all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop.nix { inherit package; };
hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hdfs.nix { inherit package; };
yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./yarn.nix { inherit package; };
}

@ -1,149 +1,151 @@
# This test is very comprehensive. It tests whether all hadoop services work well with each other.
# Run this when updating the Hadoop package or making significant changes to the hadoop module.
# For a more basic test, see hdfs.nix and yarn.nix
import ../make-test-python.nix ({pkgs, ...}: {
import ../make-test-python.nix ({ package, ... }: {
name = "hadoop-combined";
nodes = let
package = pkgs.hadoop;
coreSite = {
"fs.defaultFS" = "hdfs://ns1";
};
hdfsSite = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-bind-host" = "0.0.0.0";
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
# HA Quorum Journal Manager configuration
"dfs.nameservices" = "ns1";
"dfs.ha.namenodes.ns1" = "nn1,nn2";
"dfs.namenode.shared.edits.dir.ns1.nn1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.shared.edits.dir.ns1.nn2" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020";
"dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020";
"dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022";
"dfs.namenode.servicerpc-address.ns1.nn2" = "nn2:8022";
"dfs.namenode.http-address.ns1.nn1" = "nn1:9870";
"dfs.namenode.http-address.ns1.nn2" = "nn2:9870";
# Automatic failover configuration
"dfs.client.failover.proxy.provider.ns1" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider";
"dfs.ha.automatic-failover.enabled.ns1" = "true";
"dfs.ha.fencing.methods" = "shell(true)";
"ha.zookeeper.quorum" = "zk1:2181";
};
yarnSiteHA = {
"yarn.resourcemanager.zk-address" = "zk1:2181";
"yarn.resourcemanager.ha.enabled" = "true";
"yarn.resourcemanager.ha.rm-ids" = "rm1,rm2";
"yarn.resourcemanager.hostname.rm1" = "rm1";
"yarn.resourcemanager.hostname.rm2" = "rm2";
"yarn.resourcemanager.ha.automatic-failover.enabled" = "true";
"yarn.resourcemanager.cluster-id" = "cluster1";
# yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in
# hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70
# that causes AM containers to fail otherwise.
"yarn.resourcemanager.webapp.address.rm1" = "rm1:8088";
"yarn.resourcemanager.webapp.address.rm2" = "rm2:8088";
};
in {
zk1 = { ... }: {
services.zookeeper.enable = true;
networking.firewall.allowedTCPPorts = [ 2181 ];
};
nodes =
let
coreSite = {
"fs.defaultFS" = "hdfs://ns1";
};
hdfsSite = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-bind-host" = "0.0.0.0";
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
# HA Quorum Journal Manager configuration
"dfs.nameservices" = "ns1";
"dfs.ha.namenodes.ns1" = "nn1,nn2";
"dfs.namenode.shared.edits.dir.ns1.nn1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.shared.edits.dir.ns1.nn2" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020";
"dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020";
"dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022";
"dfs.namenode.servicerpc-address.ns1.nn2" = "nn2:8022";
"dfs.namenode.http-address.ns1.nn1" = "nn1:9870";
"dfs.namenode.http-address.ns1.nn2" = "nn2:9870";
# Automatic failover configuration
"dfs.client.failover.proxy.provider.ns1" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider";
"dfs.ha.automatic-failover.enabled.ns1" = "true";
"dfs.ha.fencing.methods" = "shell(true)";
"ha.zookeeper.quorum" = "zk1:2181";
};
yarnSiteHA = {
"yarn.resourcemanager.zk-address" = "zk1:2181";
"yarn.resourcemanager.ha.enabled" = "true";
"yarn.resourcemanager.ha.rm-ids" = "rm1,rm2";
"yarn.resourcemanager.hostname.rm1" = "rm1";
"yarn.resourcemanager.hostname.rm2" = "rm2";
"yarn.resourcemanager.ha.automatic-failover.enabled" = "true";
"yarn.resourcemanager.cluster-id" = "cluster1";
# yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in
# hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70
# that causes AM containers to fail otherwise.
"yarn.resourcemanager.webapp.address.rm1" = "rm1:8088";
"yarn.resourcemanager.webapp.address.rm2" = "rm2:8088";
};
in
{
zk1 = { ... }: {
services.zookeeper.enable = true;
networking.firewall.allowedTCPPorts = [ 2181 ];
};
# HDFS cluster
nn1 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.namenode = {
enable = true;
openFirewall = true;
# HDFS cluster
nn1 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.namenode = {
enable = true;
openFirewall = true;
};
hdfs.zkfc.enable = true;
};
hdfs.zkfc.enable = true;
};
};
nn2 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.namenode = {
enable = true;
openFirewall = true;
nn2 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.namenode = {
enable = true;
openFirewall = true;
};
hdfs.zkfc.enable = true;
};
hdfs.zkfc.enable = true;
};
};
jn1 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
jn1 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
};
};
};
};
jn2 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
jn2 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
};
};
};
};
jn3 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
jn3 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
};
};
};
};
dn1 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.datanode = {
enable = true;
openFirewall = true;
dn1 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.datanode = {
enable = true;
openFirewall = true;
};
};
};
};
# YARN cluster
rm1 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
# YARN cluster
rm1 = { options, ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
};
};
};
};
rm2 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
rm2 = { options, ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
};
};
};
};
nm1 = {pkgs, options, ...}: {
virtualisation.memorySize = 2048;
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.nodemanager = {
enable = true;
openFirewall = true;
nm1 = { options, ... }: {
virtualisation.memorySize = 2048;
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.nodemanager = {
enable = true;
openFirewall = true;
};
};
};
};
};
testScript = ''
start_all()

@ -1,9 +1,11 @@
# Test a minimal HDFS cluster with no HA
import ../make-test-python.nix ({...}: {
import ../make-test-python.nix ({ package, ... }: {
name = "hadoop-hdfs";
nodes = {
namenode = {pkgs, ...}: {
namenode = { pkgs, ... }: {
services.hadoop = {
package = pkgs.hadoop;
inherit package;
hdfs = {
namenode = {
enable = true;
@ -22,9 +24,9 @@ import ../make-test-python.nix ({...}: {
};
};
};
datanode = {pkgs, ...}: {
datanode = { pkgs, ... }: {
services.hadoop = {
package = pkgs.hadoop;
inherit package;
hdfs.datanode = {
enable = true;
openFirewall = true;

@ -1,28 +1,33 @@
# This only tests if YARN is able to start its services
import ../make-test-python.nix ({...}: {
import ../make-test-python.nix ({ package, ... }: {
name = "hadoop-yarn";
nodes = {
resourcemanager = {pkgs, ...}: {
services.hadoop.package = pkgs.hadoop;
services.hadoop.yarn.resourcemanager = {
enable = true;
openFirewall = true;
};
services.hadoop.yarnSite = {
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
resourcemanager = { ... }: {
services.hadoop = {
inherit package;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
};
yarnSite = {
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
};
};
};
nodemanager = {pkgs, ...}: {
services.hadoop.package = pkgs.hadoop;
services.hadoop.yarn.nodemanager = {
enable = true;
openFirewall = true;
};
services.hadoop.yarnSite = {
"yarn.resourcemanager.hostname" = "resourcemanager";
"yarn.nodemanager.log-dirs" = "/tmp/userlogs";
nodemanager = { ... }: {
services.hadoop = {
inherit package;
yarn.nodemanager = {
enable = true;
openFirewall = true;
};
yarnSite = {
"yarn.resourcemanager.hostname" = "resourcemanager";
"yarn.nodemanager.log-dirs" = "/tmp/userlogs";
};
};
};
};
testScript = ''

@ -15,6 +15,8 @@
, zlib
, zstd
, openssl
, openssl
, nixosTests
}:
with lib;
@ -22,7 +24,7 @@ with lib;
assert elem stdenv.system [ "x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin" ];
let
common = { pname, version, untarDir ? "${pname}-${version}", sha256, jdk, openssl ? null, nativeLibs ? [ ], libPatches ? "" }:
common = { pname, version, untarDir ? "${pname}-${version}", sha256, jdk, openssl ? null, nativeLibs ? [ ], libPatches ? "", tests }:
stdenv.mkDerivation rec {
inherit pname version jdk libPatches untarDir openssl;
src = fetchurl {
@ -49,6 +51,8 @@ let
done
'' + libPatches;
passthru = { inherit tests; };
meta = {
homepage = "https://hadoop.apache.org/";
description = "Framework for distributed processing of large data sets across clusters of computers";
@ -73,30 +77,29 @@ in
{
# Different version of hadoop support different java runtime versions
# https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions
hadoop_3_3 =
common
(rec {
pname = "hadoop";
version = "3.3.1";
untarDir = "${pname}-${version}";
sha256 = rec {
x86_64-linux = "1b3v16ihysqaxw8za1r5jlnphy8dwhivdx2d0z64309w57ihlxxd";
x86_64-darwin = x86_64-linux;
aarch64-linux = "00ln18vpi07jq2slk3kplyhcj8ad41n0yl880q5cihilk7daclxz";
aarch64-darwin = aarch64-linux;
};
hadoop_3_3 = common rec {
pname = "hadoop";
version = "3.3.1";
untarDir = "${pname}-${version}";
sha256 = rec {
x86_64-linux = "1b3v16ihysqaxw8za1r5jlnphy8dwhivdx2d0z64309w57ihlxxd";
x86_64-darwin = x86_64-linux;
aarch64-linux = "00ln18vpi07jq2slk3kplyhcj8ad41n0yl880q5cihilk7daclxz";
aarch64-darwin = aarch64-linux;
};
inherit openssl;
nativeLibs = [ stdenv.cc.cc.lib protobuf3_7 zlib snappy ];
libPatches = ''
ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/${untarDir}/lib/native/libsasl2.so.2
ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/${untarDir}/lib/native/
ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/${untarDir}/lib/native/
ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/${untarDir}/lib/native/
ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/${untarDir}/lib/native/
'' + optionalString stdenv.isLinux "patchelf --add-rpath ${jdk.home}/lib/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0";
jdk = jdk11_headless;
});
inherit openssl;
nativeLibs = [ stdenv.cc.cc.lib protobuf3_7 zlib snappy ];
libPatches = ''
ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/${untarDir}/lib/native/libsasl2.so.2
ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/${untarDir}/lib/native/
ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/${untarDir}/lib/native/
ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/${untarDir}/lib/native/
ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/${untarDir}/lib/native/
'' + optionalString stdenv.isLinux "patchelf --add-rpath ${jdk.home}/lib/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0";
jdk = jdk11_headless;
tests = nixosTests.hadoop;
};
hadoop_3_2 = common rec {
pname = "hadoop";
version = "3.2.2";
@ -104,11 +107,15 @@ in
jdk = jdk8_headless;
# not using native libs because of broken openssl_1_0_2 dependency
# can be manually overriden
# Disable tests involving HDFS till the module adds support for hadoop_3_2
tests = nixosTests.hadoop_3_2 // { all = null; hdfs = null; };
};
hadoop2 = common rec {
pname = "hadoop";
version = "2.10.1";
sha256.x86_64-linux = "1w31x4bk9f2swnx8qxx0cgwfg8vbpm6cy5lvfnbbpl3rsjhmyg97";
jdk = jdk8_headless;
# Disable tests involving HDFS till the module adds support for hadoop2
tests = nixosTests.hadoop2 // { all = null; hdfs = null; };
};
}

Loading…
Cancel
Save