copy-tarballs: Use an S3 bucket for tarballs.nixos.org

Tarballs.nixos.org is now stored in an S3 bucket rather than an EBS
volume. Redirects are used to simulate symlinks.

The function find-tarballs.nix now filters out fetchzip, fetchpatch
and the like.
wip/yesman
Eelco Dolstra 9 years ago
parent bb672805cd
commit 567e002545
  1. 193
      maintainers/scripts/copy-tarballs.pl
  2. 9
      maintainers/scripts/find-tarballs.nix

@ -1,97 +1,144 @@
#! /run/current-system/sw/bin/perl -w
#! /usr/bin/env nix-shell
#! nix-shell -i perl -p perl perlPackages.NetAmazonS3 nixUnstable
# This command uploads tarballs to tarballs.nixos.org, the
# content-addressed cache used by fetchurl as a fallback for when
# upstream tarballs disappear or change. Usage:
#
# 1) To upload a single file:
#
# $ copy-tarballs.pl --file /path/to/tarball.tar.gz
#
# 2) To upload all files obtained via calls to fetchurl in a Nix derivation:
#
# $ copy-tarballs.pl --expr '(import <nixpkgs> {}).hello'
use strict;
use XML::Simple;
use warnings;
use File::Basename;
use File::Path;
use File::Copy 'cp';
use IPC::Open2;
use JSON;
use Net::Amazon::S3;
use Nix::Store;
my $myDir = dirname($0);
# S3 setup.
my $aws_access_key_id = $ENV{'AWS_ACCESS_KEY_ID'} or die;
my $aws_secret_access_key = $ENV{'AWS_SECRET_ACCESS_KEY'} or die;
my $tarballsCache = $ENV{'NIX_TARBALLS_CACHE'} // "/tarballs";
my $s3 = Net::Amazon::S3->new(
{ aws_access_key_id => $aws_access_key_id,
aws_secret_access_key => $aws_secret_access_key,
retry => 1,
});
my $xml = `nix-instantiate --eval-only --xml --strict '<nixpkgs/maintainers/scripts/find-tarballs.nix>'`;
die "$0: evaluation failed\n" if $? != 0;
my $bucket = $s3->bucket("nixpkgs-tarballs") or die;
my $data = XMLin($xml) or die;
sub alreadyMirrored {
my ($algo, $hash) = @_;
return defined $bucket->get_key("$algo/$hash");
}
mkpath($tarballsCache);
mkpath("$tarballsCache/md5");
mkpath("$tarballsCache/sha1");
mkpath("$tarballsCache/sha256");
sub uploadFile {
my ($fn, $name) = @_;
foreach my $file (@{$data->{list}->{attrs}}) {
my $url = $file->{attr}->{url}->{string}->{value};
my $algo = $file->{attr}->{type}->{string}->{value};
my $hash = $file->{attr}->{hash}->{string}->{value};
my $md5_16 = hashFile("md5", 0, $fn) or die;
my $sha1_16 = hashFile("sha1", 0, $fn) or die;
my $sha256_32 = hashFile("sha256", 1, $fn) or die;
my $sha256_16 = hashFile("sha256", 0, $fn) or die;
my $sha512_32 = hashFile("sha512", 1, $fn) or die;
my $sha512_16 = hashFile("sha512", 0, $fn) or die;
if ($url !~ /^http:/ && $url !~ /^https:/ && $url !~ /^ftp:/ && $url !~ /^mirror:/) {
print STDERR "skipping $url (unsupported scheme)\n";
next;
}
my $mainKey = "sha512/$sha512_16";
$url =~ /([^\/]+)$/;
my $fn = $1;
return if alreadyMirrored("sha512", $sha512_16);
if (!defined $fn) {
print STDERR "skipping $url (no file name)\n";
next;
}
if ($fn =~ /[&?=%]/ || $fn =~ /^\./) {
print STDERR "skipping $url (bad character in file name)\n";
next;
}
# Upload the file as sha512/<hash-in-base-16>.
print STDERR "uploading $fn to $mainKey...\n";
$bucket->add_key_filename($mainKey, $fn, { 'x-amz-meta-original-name' => $name })
or die "failed to upload $fn to $mainKey\n";
if ($fn !~ /[a-zA-Z]/) {
print STDERR "skipping $url (no letter in file name)\n";
next;
# Create redirects from the other hash types.
sub redirect {
my ($name, $dest) = @_;
#print STDERR "linking $name to $dest...\n";
$bucket->add_key($name, "", { 'x-amz-website-redirect-location' => "/" . $dest })
or die "failed to create redirect from $name to $dest\n";
}
redirect "md5/$md5_16", $mainKey;
redirect "sha1/$sha1_16", $mainKey;
redirect "sha256/$sha256_32", $mainKey;
redirect "sha256/$sha256_16", $mainKey;
redirect "sha512/$sha512_32", $mainKey;
}
if ($fn !~ /[0-9]/) {
print STDERR "skipping $url (no digit in file name)\n";
next;
}
my $op = $ARGV[0] // "";
if ($fn !~ /[-_\.]/) {
print STDERR "skipping $url (no dash/dot/underscore in file name)\n";
next;
if ($op eq "--file") {
my $fn = $ARGV[1] // die "$0: --file requires a file name\n";
if (alreadyMirrored("sha512", hashFile("sha512", 0, $fn))) {
print STDERR "$fn is already mirrored\n";
} else {
uploadFile($fn, basename $fn);
}
}
my $dstPath = "$tarballsCache/$fn";
next if -e $dstPath;
print "downloading $url to $dstPath...\n";
next if $ENV{DRY_RUN};
$ENV{QUIET} = 1;
$ENV{PRINT_PATH} = 1;
my $fh;
my $pid = open($fh, "-|", "nix-prefetch-url", "--type", $algo, $url, $hash) or die;
waitpid($pid, 0) or die;
if ($? != 0) {
print STDERR "failed to fetch $url: $?\n";
next;
elsif ($op eq "--expr") {
# Evaluate find-tarballs.nix.
my $expr = $ARGV[1] // die "$0: --expr requires a Nix expression\n";
my $pid = open(JSON, "-|", "nix-instantiate", "--eval-only", "--json", "--strict",
"<nixpkgs/maintainers/scripts/find-tarballs.nix>",
"--arg", "expr", $expr);
my $stdout = <JSON>;
waitpid($pid, 0);
die "$0: evaluation failed\n" if $?;
close JSON;
my $fetches = decode_json($stdout);
print STDERR "evaluation returned ", scalar(@{$fetches}), " tarballs\n";
# Check every fetchurl call discovered by find-tarballs.nix.
my $mirrored = 0;
my $have = 0;
foreach my $fetch (@{$fetches}) {
my $url = $fetch->{url};
my $algo = $fetch->{type};
my $hash = $fetch->{hash};
if ($url !~ /^http:/ && $url !~ /^https:/ && $url !~ /^ftp:/ && $url !~ /^mirror:/) {
print STDERR "skipping $url (unsupported scheme)\n";
next;
}
if (alreadyMirrored($algo, $hash)) {
$have++;
next;
}
print STDERR "mirroring $url...\n";
next if $ENV{DRY_RUN};
# Download the file using nix-prefetch-url.
$ENV{QUIET} = 1;
$ENV{PRINT_PATH} = 1;
my $fh;
my $pid = open($fh, "-|", "nix-prefetch-url", "--type", $algo, $url, $hash) or die;
waitpid($pid, 0) or die;
if ($? != 0) {
print STDERR "failed to fetch $url: $?\n";
next;
}
<$fh>; my $storePath = <$fh>; chomp $storePath;
uploadFile($storePath, $url);
$mirrored++;
}
<$fh>; my $storePath = <$fh>; chomp $storePath;
die unless -e $storePath;
cp($storePath, $dstPath) or die;
my $md5 = hashFile("md5", 0, $storePath) or die;
symlink("../$fn", "$tarballsCache/md5/$md5");
my $sha1 = hashFile("sha1", 0, $storePath) or die;
symlink("../$fn", "$tarballsCache/sha1/$sha1");
my $sha256 = hashFile("sha256", 0, $storePath) or die;
symlink("../$fn", "$tarballsCache/sha256/$sha256");
print STDERR "mirrored $mirrored files, already have $have files\n";
}
$sha256 = hashFile("sha256", 1, $storePath) or die;
symlink("../$fn", "$tarballsCache/sha256/$sha256");
else {
die "Syntax: $0 --file FILENAME | --expr EXPR\n";
}

@ -4,9 +4,11 @@
with import ../.. { };
with lib;
{ expr ? removeAttrs (import ../../pkgs/top-level/release.nix { }) [ "tarball" "unstable" ] }:
let
root = removeAttrs (import ../../pkgs/top-level/release.nix { }) [ "tarball" "unstable" ];
root = expr;
uniqueUrls = map (x: x.file) (genericClosure {
startSet = map (file: { key = file.url; inherit file; }) urls;
@ -15,7 +17,10 @@ let
urls = map (drv: { url = head drv.urls; hash = drv.outputHash; type = drv.outputHashAlgo; }) fetchurlDependencies;
fetchurlDependencies = filter (drv: drv.outputHash or "" != "" && drv ? urls) dependencies;
fetchurlDependencies =
filter
(drv: drv.outputHash or "" != "" && drv.outputHashMode == "flat" && drv.postFetch or "" == "" && drv ? urls)
dependencies;
dependencies = map (x: x.value) (genericClosure {
startSet = map keyDrv (derivationsIn' root);

Loading…
Cancel
Save