{ lib, fetchFromGitHub, fetchurl, python2Packages, curl }:
getmodel = name: sha256: {
inherit name;
src = fetchurl {
url = "${name}";
inherit sha256;
models = [
(getmodel "en-default.pyrnn.gz"
(getmodel "fraktur.pyrnn.gz"
python2Packages.buildPythonApplication rec {
pname = "ocropus";
version = "1.3.3";
src = fetchFromGitHub {
sha256 = "02p1334mic5cfhvpfphfrbim4036yfd8s2zzpwm0xmm829z71nr7";
rev = "v${version}";
repo = "ocropy";
owner = "tmbdev";
propagatedBuildInputs = with python2Packages; [ curl numpy scipy pillow
matplotlib beautifulsoup4 pygtk lxml ];
enableParallelBuilding = true;
preConfigure = with lib; ''
${concatStrings (map (x: "cp -R ${x.src} models/`basename ${}`;")
substituteInPlace ocrolib/ --replace /usr/local $out
substituteInPlace ocrolib/ --replace /usr/local $out
doCheck = false; # fails
checkPhase = ''
patchShebangs .
substituteInPlace ./run-test \
--replace 'ocropus-rpred' 'ocropus-rpred -Q $NIX_BUILD_CORES'
PATH=".:$PATH" ./run-test
meta = with lib; {
description = "Open source document analysis and OCR system";
license = licenses.asl20;
homepage = "";
maintainers = with maintainers; [ domenkozar ];
platforms = platforms.linux;