350 lines
17 KiB
YAML
350 lines
17 KiB
YAML
captainVersion: 4
|
||
services:
|
||
# Paperless-ng
|
||
$$cap_appname:
|
||
depends_on:
|
||
- $$cap_appname-db
|
||
- $$cap_appname-redis
|
||
image: jonaswinkler/paperless-ng:$$cap_app_version
|
||
restart: always
|
||
environment:
|
||
APP_URL: http://$$cap_appname.$$cap_root_domain
|
||
# Required services (https://paperless-ng.readthedocs.io/en/latest/configuration.html#required-services)
|
||
PAPERLESS_REDIS: redis://srv-captain--$$cap_appname-redis:6379/0
|
||
PAPERLESS_DBHOST: srv-captain--$$cap_appname-db
|
||
PAPERLESS_DBNAME: $$cap_dbname
|
||
PAPERLESS_DBUSER: $$cap_dbuser
|
||
PAPERLESS_DBPASS: $$cap_dbpass
|
||
# Paths and folders (https://paperless-ng.readthedocs.io/en/latest/configuration.html#paths-and-folders)
|
||
PAPERLESS_FILENAME_FORMAT: $$cap_filename_format
|
||
# Logging (https://paperless-ng.readthedocs.io/en/latest/configuration.html#logging)
|
||
PAPERLESS_LOGROTATE_MAX_SIZE: $$cap_logrotate_max_size
|
||
PAPERLESS_LOGROTATE_MAX_BACKUPS: $$cap_logrotate_max_backup
|
||
# Hosting & Security (https://paperless-ng.readthedocs.io/en/latest/configuration.html#hosting-security)
|
||
PAPERLESS_SECRET_KEY: $$cap_secret_key
|
||
PAPERLESS_ALLOWED_HOSTS: $$cap_appname.$$cap_root_domain
|
||
PAPERLESS_CORS_ALLOWED_HOSTS: http://$$cap_appname.$$cap_root_domain
|
||
PAPERLESS_ADMIN_USER: $$cap_admin_user
|
||
PAPERLESS_ADMIN_PASSWORD: $$cap_admin_password
|
||
PAPERLESS_ADMIN_MAIL: $$cap_admin_email
|
||
PAPERLESS_COOKIE_PREFIX: $$cap_appname
|
||
# OCR settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#ocr-settings)
|
||
PAPERLESS_OCR_LANGUAGE: $$cap_ocr_language
|
||
PAPERLESS_OCR_MODE: $$cap_ocr_mode
|
||
PAPERLESS_OCR_CLEAN: $$cap_ocr_clean
|
||
PAPERLESS_OCR_DESKEW: $$cap_ocr_deskew
|
||
PAPERLESS_OCR_ROTATE_PAGES: $$cap_ocr_rotate_pages
|
||
PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD: $$cap_ocr_rotate_threshold
|
||
PAPERLESS_OCR_OUTPUT_TYPE: $$cap_ocr_output_type
|
||
PAPERLESS_OCR_PAGES: $$cap_ocr_pages
|
||
PAPERLESS_OCR_IMAGE_DPI: $$cap_ocr_image_dpi
|
||
PAPERLESS_OCR_USER_ARGS: $cap_ocr_user_args
|
||
# Tika settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#tika-settings)
|
||
PAPERLESS_TIKA_ENABLED: 1
|
||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://srv-captain--$$cap_appname-gotenberg:3000
|
||
PAPERLESS_TIKA_ENDPOINT: http://srv-captain--$$cap_appname-tika:9998
|
||
# Software tweaks (https://paperless-ng.readthedocs.io/en/latest/configuration.html#software-tweaks)
|
||
# PAPERLESS_TASK_WORKERS: $$cap_task_workers
|
||
# PAPERLESS_THREADS_PER_WORKER: $$cap_threads_per_worker
|
||
PAPERLESS_TIME_ZONE: $$cap_timezone
|
||
PAPERLESS_CONSUMER_POLLING: $$cap_consumer_polling
|
||
PAPERLESS_CONSUMER_DELETE_DUPLICATES: $$cap_consumer_delete_duplicates
|
||
PAPERLESS_CONSUMER_RECURSIVE: $$cap_consumer_recursive
|
||
PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS: $$cap_consumer_subdirs_as_tags
|
||
PAPERLESS_CONVERT_MEMORY_LIMIT: $$cap_convert_memory_limit
|
||
PAPERLESS_CONVERT_TMPDIR: $$cap_convert_tempdir
|
||
PAPERLESS_OPTIMIZE_THUMBNAILS: $$cap_optimize_thumbnails
|
||
PAPERLESS_PRE_CONSUME_SCRIPT: $$cap_pre_consume_script
|
||
PAPERLESS_POST_CONSUME_SCRIPT: $$cap_post_consume_script
|
||
PAPERLESS_FILENAME_DATE_ORDER: $$cap_filename_date_order
|
||
PAPERLESS_THUMBNAIL_FONT_NAME: $$cap_thumbnail_font_name
|
||
PAPERLESS_IGNORE_DATES: $$cap_paperless_ignore_dates
|
||
# Docker-specific options (https://paperless-ng.readthedocs.io/en/latest/configuration.html#docker-specific-options)
|
||
PAPERLESS_WEBSERVER_WORKERS: $$cap_webserver_workers
|
||
USERMAP_UID: $$cap_usermap_uid
|
||
USERMAP_GID: $$cap_usermap_gid
|
||
PAPERLESS_OCR_LANGUAGES: $$cap_docker_ocr_languages
|
||
volumes:
|
||
- $$cap_appname-data:/usr/src/paperless/data
|
||
- $$cap_appname-media:/usr/src/paperless/media
|
||
- $$cap_appname-export:/usr/src/paperless/export
|
||
- $$cap_appname-consume:/usr/src/paperless/consume
|
||
caproverExtra:
|
||
containerHttpPort: '8000'
|
||
|
||
# Redis
|
||
$$cap_appname-redis:
|
||
volumes:
|
||
- $$cap_appname-redis-data:/data
|
||
restart: always
|
||
caproverExtra:
|
||
dockerfileLines:
|
||
- FROM redis:$$cap_redis_version
|
||
- CMD exec redis-server
|
||
notExposeAsWebApp: 'true'
|
||
|
||
# Database
|
||
$$cap_appname-db:
|
||
image: postgres:$$cap_postgres_version
|
||
volumes:
|
||
- $$cap_appname-db:/var/lib/postgresql/data
|
||
restart: always
|
||
environment:
|
||
POSTGRES_DB: $$cap_dbname
|
||
POSTGRES_USER: $$cap_dbuser
|
||
POSTGRES_PASSWORD: $$cap_dbpass
|
||
caproverExtra:
|
||
notExposeAsWebApp: 'true'
|
||
|
||
# gotenberg
|
||
$$cap_appname-gotenberg:
|
||
image: thecodingmachine/gotenberg:6
|
||
restart: unless-stopped
|
||
environment:
|
||
DISABLE_GOOGLE_CHROME: 1
|
||
caproverExtra:
|
||
containerHttpPort: '3000'
|
||
|
||
# tika
|
||
$$cap_appname-tika:
|
||
image: apache/tika
|
||
restart: unless-stopped
|
||
caproverExtra:
|
||
containerHttpPort: '9998'
|
||
|
||
caproverOneClickApp:
|
||
variables:
|
||
- id: $$cap_app_version
|
||
label: Paperless-ng
|
||
defaultValue: '1.4.1'
|
||
description: Check out their docker page for the valid tags https://hub.docker.com/r/jonaswinkler/paperless-ng/tags
|
||
|
||
- id: $$cap_postgres_version
|
||
label: Postgres Version
|
||
defaultValue: '13'
|
||
description: Check out their Docker page for the valid tags https://hub.docker.com/r/library/postgres/tags/
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_redis_version
|
||
label: Redis version
|
||
defaultValue: '6.2.1-alpine'
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_dbname
|
||
label: Database Name
|
||
defaultValue: 'paperless'
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_dbuser
|
||
label: Database User
|
||
defaultValue: 'paperless'
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_dbpass
|
||
label: Database Password
|
||
defaultValue: $$cap_gen_random_hex(64)
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_filename_format
|
||
label: Filename Format
|
||
defaultValue:
|
||
description: 'Changes the filenames paperless uses to store documents in the media directory. See File name handling (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-file-name-handling) for details. Default is none, which disables this feature.'
|
||
|
||
- id: $$cap_logrotate_max_size
|
||
label: Log Rotate Max Size (in bytes)
|
||
description: Maximum file size for log files before they are rotated, in bytes.
|
||
|
||
- id: $$cap_logrotate_max_backup
|
||
label: Log Rotate Max Backup Count
|
||
defaultValue: '20'
|
||
validRegex: /^\d{0,}$/
|
||
description: Number of rotated log files to keep.
|
||
|
||
- id: $$cap_secret_key
|
||
label: Paperless Secret Key
|
||
defaultValue: $$cap_gen_random_hex(64)
|
||
validRegex: /^([^\s^\/])+$/
|
||
description: Paperless uses this to make session tokens.
|
||
|
||
- id: $$cap_admin_user
|
||
label: Admin User
|
||
defaultValue: 'admin'
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_admin_password
|
||
label: Admin Password
|
||
defaultValue: $$cap_gen_random_hex(10)
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_admin_email
|
||
label: Admin Email
|
||
defaultValue: root@localhost
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_ocr_language
|
||
label: OCR Language
|
||
defaultValue: 'eng'
|
||
description: Customize the language that paperless will attempt to use when parsing documents. It should be a 3-letter language code consistent with ISO 639 https://www.loc.gov/standards/iso639-2/php/code_list.php This can be a combination of multiple languages such as deu+eng, in which case tesseract will use whatever language matches best. Keep in mind that tesseract uses much more cpu time with multiple languages enabled.
|
||
|
||
- id: $$cap_ocr_mode
|
||
label: OCR Mode
|
||
defaultValue: 'skip'
|
||
validRegex: /^(skip|skip_noarchive|redo|force)$/
|
||
description: Tell paperless when and how to perform ocr on your documents. Four modes are available- skip, skip_noarchive, redo, force. Read more about this in the OCRmyPDF documentation (https://ocrmypdf.readthedocs.io/en/latest/advanced.html#when-ocr-is-skipped).
|
||
|
||
- id: $$cap_ocr_clean
|
||
label: OCR Clean
|
||
defaultValue: 'clean'
|
||
validRegex: /^(clean|clean-final|none)$/
|
||
description: Tells paperless to use unpaper to clean any input document before sending it to tesseract. This uses more resources, but generally results in better OCR results. Available mode- clean, clean-final, none
|
||
|
||
- id: $$cap_ocr_deskew
|
||
label: OCR Deskew
|
||
defaultValue: 'true'
|
||
validRegex: /^(true|false)$/
|
||
description: Tells paperless to correct skewing (slight rotation of input images mainly due to improper scanning).
|
||
|
||
- id: $$cap_ocr_rotate_pages
|
||
label: OCR Rotate Pages
|
||
defaultValue: 'true'
|
||
validRegex: /^(true|false)$/
|
||
description: Tells paperless to correct page rotation (90°, 180° and 270° rotation). If you notice that paperless is not rotating incorrectly rotated pages (or vice versa), try adjusting the threshold up or down (see below).
|
||
|
||
- id: $$cap_ocr_rotate_threshold
|
||
label: OCR Rotate Pages Threshold
|
||
defaultValue: '12'
|
||
validRegex: /^\d[\d.]*$/
|
||
description: This is an arbitrary value reported by tesseract. “15” is a very conservative value, whereas “2” is a very aggressive option and will often result in correctly rotated pages being rotated as well.
|
||
|
||
- id: $$cap_ocr_output_type
|
||
label: OCR Output Type
|
||
defaultValue: 'pdfa'
|
||
validRegex: /^(pdf|pdfa|pdfa-1|pdfa-2|pdfa-3)$/
|
||
description: Specify the the type of PDF documents that paperless should produce. Choices- pdf, pdfa, pdfa-1, pdfa-2, pdfa-3
|
||
|
||
- id: $$cap_ocr_pages
|
||
label: OCR Pages Count
|
||
defaultValue: '0'
|
||
validRegex: /^\d{1,}$/
|
||
description: Tells paperless to use only the specified amount of pages for OCR. Documents with less than the specified amount of pages get OCR’ed completely. Specifying 1 here will only use the first page. Specifying 0 disables this feature a and always use all pages.
|
||
|
||
- id: $$cap_ocr_image_dpi
|
||
label: OCR Image DPI
|
||
description: Set this to the DPI your scanner produces images at. Default is none, which will automatically calculate image DPI so that the produced PDF documents are A4 sized.
|
||
|
||
- id: $cap_ocr_user_args
|
||
label: OCR User Args
|
||
description: See https://ocrmypdf.readthedocs.io/en/latest/api.html#reference for valid parameters. Specify arguments as a JSON dictionary. Keep note of lower case booleans and double quoted parameter names and strings.
|
||
|
||
# commenting out but keeping the code if somone tries to add these variables
|
||
# these are optional variables and paperless-ng calculates automatically if no value is provided
|
||
# but leaving it blank raises ValueError: invalid literal for int() with base 10:
|
||
# - id: $$cap_task_workers
|
||
# label: Paperless Task Workers
|
||
# validRegex: /^\d{0,2}$/
|
||
# description: Leave blank to calculate automatically based on CPU core count
|
||
|
||
# - id: $$cap_threads_per_worker
|
||
# label: Paperless Threads per Workers
|
||
# validRegex: /^\d{0,2}$/
|
||
# description: Leave blank to calculate automatically based on CPU core count. Ensure that the product PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER does not exceed your CPU core count
|
||
|
||
- id: $$cap_timezone
|
||
label: Timezone
|
||
defaultValue: 'UTC'
|
||
description: 'Set the time zone here. See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE for details on how to set it.'
|
||
|
||
- id: $$cap_consumer_polling
|
||
label: Consumer Polling (seconds)
|
||
defaultValue: '0'
|
||
validRegex: /^\d{1,}$/
|
||
description: Defaults to 0, which disables polling and uses filesystem notifications.
|
||
|
||
- id: $$cap_consumer_delete_duplicates
|
||
label: Consumer Delete Duplicates
|
||
defaultValue: 'false'
|
||
validRegex: /^(true|false)$/
|
||
description: When the consumer detects a duplicate document, it will not touch the original document.
|
||
|
||
- id: $$cap_consumer_recursive
|
||
label: Consumer Recursive
|
||
defaultValue: 'false'
|
||
validRegex: /^(true|false)$/
|
||
description: Enable recursive watching of the consumption directory. Paperless will then pickup files from files in subdirectories within your consumption directory as well.
|
||
|
||
- id: $$cap_consumer_subdirs_as_tags
|
||
label: Consumer Sub Directory as tags
|
||
defaultValue: 'false'
|
||
validRegex: /^(true|false)$/
|
||
description: Set the names of subdirectories as tags for consumed files. E.g. <CONSUMPTION_DIR>/foo/bar/file.pdf will add the tags “foo” and “bar” to the consumed file
|
||
|
||
- id: $$cap_convert_memory_limit
|
||
label: Convert Memory Limit
|
||
defaultValue: '0'
|
||
validRegex: /^\d{1,}$/
|
||
description: For more information on how to use this value, you should search the web for “MAGICK_MEMORY_LIMIT”.
|
||
|
||
- id: $$cap_convert_tempdir
|
||
label: Convert Memory Limit
|
||
description: For more information on how to use this value, you should search the web for “MAGICK_TMPDIR”.
|
||
|
||
- id: $$cap_optimize_thumbnails
|
||
label: Optimize Thumbnails
|
||
defaultValue: 'true'
|
||
validRegex: /^(true|false)$/
|
||
description: This usually reduces the size of thumbnails by about 20%, but uses considerable compute time during consumption.
|
||
|
||
- id: $$cap_pre_consume_script
|
||
label: Pre Consume Script (path)
|
||
description: Executed after the consumer sees a new document in the consumption folder, but before any processing of the document is performed. For more information, take a look at Pre-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html?highlight=PAPERLESS_PRE_CONSUME_SCRIPT#pre-consumption-script)
|
||
|
||
- id: $$cap_post_consume_script
|
||
label: Post Consume Script (path)
|
||
description: Executed after the consumer has successfully processed a document and has moved it into paperless. For more information, take a look at Post-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-post-consume-script)
|
||
|
||
- id: $$cap_filename_date_order
|
||
label: Filename Date Order
|
||
description: Defaults to none, which disables this feature. The date order can be set to any option as specified in https://dateparser.readthedocs.io/en/latest/settings.html#date-order.
|
||
|
||
- id: $$cap_thumbnail_font_name
|
||
label: Thumbnail Font Name
|
||
defaultValue: '/usr/share/fonts/liberation/LiberationSerif-Regular.ttf'
|
||
description: Paperless creates thumbnails for plain text files by rendering the content of the file on an image and uses a predefined font for that
|
||
|
||
- id: $$cap_paperless_ignore_dates
|
||
label: Ignore Dates
|
||
description: You may specify dates in a multitude of formats supported by dateparser (see https://dateparser.readthedocs.io/en/latest/#popular-formats)
|
||
|
||
- id: $$cap_webserver_workers
|
||
label: Webserver Workers
|
||
defaultValue: '2'
|
||
validRegex: /^\d{1,}$/
|
||
description: The number of worker processes the webserver should spawn.
|
||
|
||
- id: $$cap_usermap_uid
|
||
label: Usermap UID
|
||
defaultValue: '1000'
|
||
validRegex: /^\d{1,}$/
|
||
description: Set this to your actual user ID on the host system, which you can get by executing ```id -u```
|
||
|
||
- id: $$cap_usermap_gid
|
||
label: Usermap GID
|
||
defaultValue: '1000'
|
||
validRegex: /^\d{1,}$/
|
||
description: Set this to your actual user ID on the host system, which you can get by executing ```id -g```
|
||
|
||
- id: $$cap_docker_ocr_languages
|
||
label: OCR Languages to install
|
||
description: Additional OCR languages to install. By default, paperless comes with English, German, Italian, Spanish and French.
|
||
|
||
instructions:
|
||
start: >-
|
||
Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.
|
||
end: >-
|
||
Done! 😄
|
||
Your service is available at http://$$cap_appname.$$cap_root_domain
|
||
displayName: 'Paperless-ng'
|
||
isOfficial: true
|
||
description: Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.
|
||
documentation: https://paperless-ng.readthedocs.io/en/latest/
|