ocrmypdf: move to python3Packages

According to https://ocrmypdf.readthedocs.io/en/latest/api.html, it also
provides a Python API.
main
Robert Schütz 3 years ago
parent fc1c501f4c
commit 4c268ee2cc
  1. 1
      pkgs/applications/office/paperless-ng/default.nix
  2. 77
      pkgs/development/python-modules/ocrmypdf/default.nix
  3. 160
      pkgs/development/python-modules/ocrmypdf/paths.patch
  4. 13
      pkgs/tools/text/ocrmypdf/liblept.patch
  5. 2
      pkgs/top-level/all-packages.nix
  6. 2
      pkgs/top-level/python-packages.nix

@ -5,7 +5,6 @@
, ghostscript
, imagemagick
, jbig2enc
, ocrmypdf
, optipng
, pngquant
, qpdf

@ -1,34 +1,32 @@
{ fetchFromGitHub
{ lib
, buildPythonPackage
, cffi
, coloredlogs
, fetchFromGitHub
, ghostscript
, img2pdf
, importlib-resources
, jbig2enc
, leptonica
, pdfminer
, pikepdf
, pillow
, pluggy
, pngquant
, python3
, python3Packages
, qpdf
, lib
, pytest-xdist
, pytestCheckHook
, reportlab
, setuptools
, setuptools-scm
, setuptools-scm-git-archive
, stdenv
, substituteAll
, tesseract4
, tqdm
, unpaper
, substituteAll
}:
let
inherit (python3Packages) buildPythonApplication;
runtimeDeps = with python3Packages; [
ghostscript
jbig2enc
leptonica
pngquant
qpdf
tesseract4
unpaper
pillow
];
in
buildPythonApplication rec {
buildPythonPackage rec {
pname = "ocrmypdf";
version = "12.5.0";
@ -39,51 +37,48 @@ buildPythonApplication rec {
sha256 = "sha256-g80WedX+TGHE9EJ/RSgOc53PM17V3WZslUNaHoqKTo0=";
};
nativeBuildInputs = with python3Packages; [
setuptools
patches = [
(substituteAll {
src = ./paths.patch;
gs = "${lib.getBin ghostscript}/bin/gs";
jbig2 = "${lib.getBin jbig2enc}/bin/jbig2";
liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}";
pngquant = "${lib.getBin pngquant}/bin/pngquant";
tesseract = "${lib.getBin tesseract4}/bin/tesseract";
unpaper = "${lib.getBin unpaper}/bin/unpaper";
})
];
nativeBuildInputs = [
setuptools-scm-git-archive
setuptools-scm
];
propagatedBuildInputs = with python3Packages; [
propagatedBuildInputs = [
cffi
coloredlogs
img2pdf
importlib-resources
pdfminer
pluggy
pikepdf
pillow
pluggy
reportlab
setuptools
tqdm
];
checkInputs = with python3Packages; [
pypdf2
pytest
pytest-helpers-namespace
checkInputs = [
pytest-xdist
pytest-cov
python-xmp-toolkit
pytestCheckHook
] ++ runtimeDeps;
patches = [
(substituteAll {
src = ./liblept.patch;
liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}";
})
];
makeWrapperArgs = [ "--prefix PATH : ${lib.makeBinPath [ ghostscript jbig2enc pngquant qpdf tesseract4 unpaper ]}" ];
meta = with lib; {
homepage = "https://github.com/jbarlow83/OCRmyPDF";
description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
license = with licenses; [ mpl20 mit ];
platforms = platforms.linux;
maintainers = [ maintainers.kiwi ];
maintainers = with maintainers; [ kiwi dotlambda ];
changelog = "https://github.com/jbarlow83/OCRmyPDF/blob/v${version}/docs/release_notes.rst";
};
}

@ -0,0 +1,160 @@
diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py
index 5c357f1b..f459763a 100644
--- a/src/ocrmypdf/_exec/ghostscript.py
+++ b/src/ocrmypdf/_exec/ghostscript.py
@@ -25,28 +25,7 @@ from ocrmypdf.subprocess import get_version, run, run_polling_stderr
log = logging.getLogger(__name__)
-missing_gs_error = """
----------------------------------------------------------------------
-This error normally occurs when ocrmypdf find can't Ghostscript.
-Please ensure Ghostscript is installed and its location is added to
-the system PATH environment variable.
-
-For details see:
- https://ocrmypdf.readthedocs.io/en/latest/installation.html
----------------------------------------------------------------------
-"""
-
-_gswin = None
-if os.name == 'nt':
- _gswin = which('gswin64c')
- if not _gswin:
- _gswin = which('gswin32c')
- if not _gswin:
- raise MissingDependencyError(missing_gs_error)
- _gswin = Path(_gswin).stem
-
-GS = _gswin if _gswin else 'gs'
-del _gswin
+GS = '@gs@'
def version():
diff --git a/src/ocrmypdf/_exec/jbig2enc.py b/src/ocrmypdf/_exec/jbig2enc.py
index 2e8a058b..65a09088 100644
--- a/src/ocrmypdf/_exec/jbig2enc.py
+++ b/src/ocrmypdf/_exec/jbig2enc.py
@@ -14,7 +14,7 @@ from ocrmypdf.subprocess import get_version, run
def version():
- return get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*')
+ return get_version('@jbig2@', regex=r'jbig2enc (\d+(\.\d+)*).*')
def available():
@@ -27,7 +27,7 @@ def available():
def convert_group(*, cwd, infiles, out_prefix):
args = [
- 'jbig2',
+ '@jbig2@',
'-b',
out_prefix,
'-s', # symbol mode (lossy)
@@ -46,7 +46,7 @@ def convert_group_mp(args):
def convert_single(*, cwd, infile, outfile):
- args = ['jbig2', '-p', infile]
+ args = ['@jbig2@', '-p', infile]
with open(outfile, 'wb') as fstdout:
proc = run(args, cwd=cwd, stdout=fstdout, stderr=PIPE)
proc.check_returncode()
diff --git a/src/ocrmypdf/_exec/pngquant.py b/src/ocrmypdf/_exec/pngquant.py
index ca8a4542..d0544174 100644
--- a/src/ocrmypdf/_exec/pngquant.py
+++ b/src/ocrmypdf/_exec/pngquant.py
@@ -19,7 +19,7 @@ from ocrmypdf.subprocess import get_version, run
def version():
- return get_version('pngquant', regex=r'(\d+(\.\d+)*).*')
+ return get_version('@pngquant@', regex=r'(\d+(\.\d+)*).*')
def available():
@@ -46,7 +46,7 @@ def input_as_png(input_file: Path):
def quantize(input_file: Path, output_file: Path, quality_min: int, quality_max: int):
with input_as_png(input_file) as input_stream:
args = [
- 'pngquant',
+ '@pngquant@',
'--force',
'--skip-if-larger',
'--quality',
diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py
index 33ead41e..5840f7c1 100644
--- a/src/ocrmypdf/_exec/tesseract.py
+++ b/src/ocrmypdf/_exec/tesseract.py
@@ -78,7 +78,7 @@ class TesseractVersion(StrictVersion):
def version():
- return get_version('tesseract', regex=r'tesseract\s(.+)')
+ return get_version('@tesseract@', regex=r'tesseract\s(.+)')
def has_user_words():
@@ -100,7 +100,7 @@ def get_languages():
msg += output
return msg
- args_tess = ['tesseract', '--list-langs']
+ args_tess = ['@tesseract@', '--list-langs']
try:
proc = run(
args_tess,
@@ -122,7 +122,7 @@ def get_languages():
def tess_base_args(langs: List[str], engine_mode: Optional[int]) -> List[str]:
- args = ['tesseract']
+ args = ['@tesseract@']
if langs:
args.extend(['-l', '+'.join(langs)])
if engine_mode is not None:
diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py
index 3c3ae72c..d269966a 100644
--- a/src/ocrmypdf/_exec/unpaper.py
+++ b/src/ocrmypdf/_exec/unpaper.py
@@ -31,7 +31,7 @@ log = logging.getLogger(__name__)
def version() -> str:
- return get_version('unpaper')
+ return get_version('@unpaper@')
def _setup_unpaper_io(tmpdir: Path, input_file: Path) -> Tuple[Path, Path]:
@@ -71,7 +71,7 @@ def _setup_unpaper_io(tmpdir: Path, input_file: Path) -> Tuple[Path, Path]:
def run(
input_file: Path, output_file: Path, *, dpi: DecFloat, mode_args: List[str]
) -> None:
- args_unpaper = ['unpaper', '-v', '--dpi', str(round(dpi, 6))] + mode_args
+ args_unpaper = ['@unpaper@', '-v', '--dpi', str(round(dpi, 6))] + mode_args
with TemporaryDirectory() as tmpdir:
input_pnm, output_pnm = _setup_unpaper_io(Path(tmpdir), input_file)
diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py
index e4814f1a..fdaf7ea4 100644
--- a/src/ocrmypdf/leptonica.py
+++ b/src/ocrmypdf/leptonica.py
@@ -33,14 +33,7 @@ from ocrmypdf.lib._leptonica import ffi
logger = logging.getLogger(__name__)
-if os.name == 'nt':
- from ocrmypdf.subprocess._windows import shim_env_path
-
- libname = 'liblept-5'
- os.environ['PATH'] = shim_env_path()
-else:
- libname = 'lept'
-_libpath = find_library(libname)
+_libpath = '@liblept@'
if not _libpath:
raise MissingDependencyError(
"""

@ -1,13 +0,0 @@
diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py
index 328b063..b993cc9 100644
--- a/src/ocrmypdf/leptonica.py
+++ b/src/ocrmypdf/leptonica.py
@@ -46,7 +46,7 @@ if os.name == 'nt':
os.environ['PATH'] = shim_paths_with_program_files()
else:
libname = 'lept'
-_libpath = find_library(libname)
+_libpath = '@liblept@'
if not _libpath:
raise MissingDependencyError(
"""

@ -3228,7 +3228,7 @@ with pkgs;
oci-cli = callPackage ../tools/admin/oci-cli { };
ocrmypdf = callPackage ../tools/text/ocrmypdf { };
ocrmypdf = with python3.pkgs; toPythonApplication ocrmypdf;
ocrfeeder = callPackage ../applications/graphics/ocrfeeder { };

@ -5059,6 +5059,8 @@ in {
oci = callPackage ../development/python-modules/oci { };
ocrmypdf = callPackage ../development/python-modules/ocrmypdf { };
od = callPackage ../development/python-modules/od { };
odfpy = callPackage ../development/python-modules/odfpy { };

Loading…
Cancel
Save