|
|
|
@ -1,10 +1,38 @@ |
|
|
|
|
{ stdenv, fetchFromGitHub, autoreconfHook, pkgconfig |
|
|
|
|
, leptonica, libpng, libtiff, icu, pango, opencl-headers |
|
|
|
|
|
|
|
|
|
# Supported list of languages or `null' for all available languages |
|
|
|
|
, enableLanguages ? null |
|
|
|
|
# if you want just a specific list of languages, optionally specify a hash |
|
|
|
|
# to make tessdata a fixed output derivation. |
|
|
|
|
, enableLanguagesHash ? (if enableLanguages == null # all languages |
|
|
|
|
then "1h48xfzabhn0ldbx5ib67cp9607pr0zpblsy8z6fs4knn0zznfnw" |
|
|
|
|
else null) |
|
|
|
|
}: |
|
|
|
|
|
|
|
|
|
let tessdata = stdenv.mkDerivation ({ |
|
|
|
|
name = "tessdata"; |
|
|
|
|
src = fetchFromGitHub { |
|
|
|
|
owner = "tesseract-ocr"; |
|
|
|
|
repo = "tessdata"; |
|
|
|
|
rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d"; |
|
|
|
|
# when updating don't forget to update the default value fo enableLanguagesHash |
|
|
|
|
sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7"; |
|
|
|
|
}; |
|
|
|
|
buildCommand = '' |
|
|
|
|
cd $src; |
|
|
|
|
for lang in ${if enableLanguages==null then "*.traineddata" else stdenv.lib.concatMapStringsSep " " (x: x+".traineddata") enableLanguages} ; do |
|
|
|
|
install -Dt $out/share/tessdata $src/$lang ; |
|
|
|
|
done; |
|
|
|
|
''; |
|
|
|
|
preferLocalBuild = true; |
|
|
|
|
} // (stdenv.lib.optionalAttrs (enableLanguagesHash != null) { |
|
|
|
|
# when a hash is given, we make this a fixed output derivation. |
|
|
|
|
outputHashMode = "recursive"; |
|
|
|
|
outputHashAlgo = "sha256"; |
|
|
|
|
outputHash = enableLanguagesHash; |
|
|
|
|
})); |
|
|
|
|
in |
|
|
|
|
|
|
|
|
|
stdenv.mkDerivation rec { |
|
|
|
|
name = "tesseract-${version}"; |
|
|
|
|
version = "3.05.00"; |
|
|
|
@ -16,41 +44,17 @@ stdenv.mkDerivation rec { |
|
|
|
|
sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30"; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
tessdata = fetchFromGitHub { |
|
|
|
|
owner = "tesseract-ocr"; |
|
|
|
|
repo = "tessdata"; |
|
|
|
|
rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d"; |
|
|
|
|
sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7"; |
|
|
|
|
}; |
|
|
|
|
enableParallelBuilding = true; |
|
|
|
|
|
|
|
|
|
nativeBuildInputs = [ pkgconfig autoreconfHook ]; |
|
|
|
|
buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ]; |
|
|
|
|
|
|
|
|
|
LIBLEPT_HEADERSDIR = "${leptonica}/include"; |
|
|
|
|
|
|
|
|
|
# Copy the .traineddata files of the languages specified in enableLanguages |
|
|
|
|
# into `$out/share/tessdata' and check afterwards if copying was successful. |
|
|
|
|
postInstall = let |
|
|
|
|
mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}"; |
|
|
|
|
mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg; |
|
|
|
|
findLangArgs = if enableLanguages != null |
|
|
|
|
then "\\( ${mkFindArgs enableLanguages} \\)" |
|
|
|
|
else "-iname '*.traineddata'"; |
|
|
|
|
in '' |
|
|
|
|
numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \ |
|
|
|
|
${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)" |
|
|
|
|
|
|
|
|
|
${if enableLanguages != null then '' |
|
|
|
|
expected=${toString (builtins.length enableLanguages)} |
|
|
|
|
'' else '' |
|
|
|
|
expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)" |
|
|
|
|
''} |
|
|
|
|
|
|
|
|
|
if [ "$numLangs" -ne "$expected" ]; then |
|
|
|
|
echo "Expected $expected languages, but $numLangs" \ |
|
|
|
|
"were copied to \`$out/share/tessdata'" >&2 |
|
|
|
|
exit 1 |
|
|
|
|
fi |
|
|
|
|
postInstall = '' |
|
|
|
|
for i in ${tessdata}/share/tessdata/*; do |
|
|
|
|
ln -s $i $out/share/tessdata; |
|
|
|
|
done |
|
|
|
|
''; |
|
|
|
|
|
|
|
|
|
meta = { |
|
|
|
|