355 lines
18 KiB
YAML
355 lines
18 KiB
YAML
captainVersion: 4
|
||
services:
|
||
# Paperless-ng
|
||
$$cap_appname:
|
||
depends_on:
|
||
- $$cap_appname-db
|
||
- $$cap_appname-redis
|
||
image: jonaswinkler/paperless-ng:$$cap_app_version
|
||
restart: always
|
||
environment:
|
||
APP_URL: http://$$cap_appname.$$cap_root_domain
|
||
# Required services (https://paperless-ng.readthedocs.io/en/latest/configuration.html#required-services)
|
||
PAPERLESS_REDIS: redis://srv-captain--$$cap_appname-redis:6379/0
|
||
PAPERLESS_DBHOST: srv-captain--$$cap_appname-db
|
||
PAPERLESS_DBNAME: $$cap_dbname
|
||
PAPERLESS_DBUSER: $$cap_dbuser
|
||
PAPERLESS_DBPASS: $$cap_dbpass
|
||
# Paths and folders (https://paperless-ng.readthedocs.io/en/latest/configuration.html#paths-and-folders)
|
||
PAPERLESS_FILENAME_FORMAT: $$cap_filename_format
|
||
# Logging (https://paperless-ng.readthedocs.io/en/latest/configuration.html#logging)
|
||
PAPERLESS_LOGROTATE_MAX_SIZE: $$cap_logrotate_max_size
|
||
PAPERLESS_LOGROTATE_MAX_BACKUPS: $$cap_logrotate_max_backup
|
||
# Hosting & Security (https://paperless-ng.readthedocs.io/en/latest/configuration.html#hosting-security)
|
||
PAPERLESS_SECRET_KEY: $$cap_secret_key
|
||
PAPERLESS_ALLOWED_HOSTS: $$cap_appname.$$cap_root_domain
|
||
PAPERLESS_CORS_ALLOWED_HOSTS: http://$$cap_appname.$$cap_root_domain
|
||
PAPERLESS_ADMIN_USER: $$cap_admin_user
|
||
PAPERLESS_ADMIN_PASSWORD: $$cap_admin_password
|
||
PAPERLESS_ADMIN_MAIL: $$cap_admin_email
|
||
PAPERLESS_COOKIE_PREFIX: $$cap_cookie_prefix
|
||
# OCR settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#ocr-settings)
|
||
PAPERLESS_OCR_LANGUAGE: $$cap_ocr_language
|
||
PAPERLESS_OCR_MODE: $$cap_ocr_mode
|
||
PAPERLESS_OCR_CLEAN: $$cap_ocr_clean
|
||
PAPERLESS_OCR_DESKEW: $$cap_ocr_deskew
|
||
PAPERLESS_OCR_ROTATE_PAGES: $$cap_ocr_rotate_pages
|
||
PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD: $$cap_ocr_rotate_threshold
|
||
PAPERLESS_OCR_OUTPUT_TYPE: $$cap_ocr_output_type
|
||
PAPERLESS_OCR_PAGES: $$cap_ocr_pages
|
||
PAPERLESS_OCR_IMAGE_DPI: $$cap_ocr_image_dpi
|
||
PAPERLESS_OCR_USER_ARGS: $cap_ocr_user_args
|
||
# Tika settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#tika-settings)
|
||
PAPERLESS_TIKA_ENABLED: 1
|
||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://srv-captain--$$cap_appname-gotenberg:3000
|
||
PAPERLESS_TIKA_ENDPOINT: http://srv-captain--$$cap_appname-tika:9998
|
||
# Software tweaks (https://paperless-ng.readthedocs.io/en/latest/configuration.html#software-tweaks)
|
||
# PAPERLESS_TASK_WORKERS: $$cap_task_workers
|
||
# PAPERLESS_THREADS_PER_WORKER: $$cap_threads_per_worker
|
||
PAPERLESS_TIME_ZONE: $$cap_timezone
|
||
PAPERLESS_CONSUMER_POLLING: $$cap_consumer_polling
|
||
PAPERLESS_CONSUMER_DELETE_DUPLICATES: $$cap_consumer_delete_duplicates
|
||
PAPERLESS_CONSUMER_RECURSIVE: $$cap_consumer_recursive
|
||
PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS: $$cap_consumer_subdirs_as_tags
|
||
PAPERLESS_CONVERT_MEMORY_LIMIT: $$cap_convert_memory_limit
|
||
PAPERLESS_CONVERT_TMPDIR: $$cap_convert_tempdir
|
||
PAPERLESS_OPTIMIZE_THUMBNAILS: $$cap_optimize_thumbnails
|
||
PAPERLESS_PRE_CONSUME_SCRIPT: $$cap_pre_consume_script
|
||
PAPERLESS_POST_CONSUME_SCRIPT: $$cap_post_consume_script
|
||
PAPERLESS_FILENAME_DATE_ORDER: $$cap_filename_date_order
|
||
PAPERLESS_THUMBNAIL_FONT_NAME: $$cap_thumbnail_font_name
|
||
PAPERLESS_IGNORE_DATES: $$cap_paperless_ignore_dates
|
||
# Docker-specific options (https://paperless-ng.readthedocs.io/en/latest/configuration.html#docker-specific-options)
|
||
PAPERLESS_WEBSERVER_WORKERS: $$cap_webserver_workers
|
||
USERMAP_UID: $$cap_usermap_uid
|
||
USERMAP_GID: $$cap_usermap_gid
|
||
PAPERLESS_OCR_LANGUAGES: $$cap_docker_ocr_languages
|
||
volumes:
|
||
- $$cap_appname-data:/usr/src/paperless/data
|
||
- $$cap_appname-media:/usr/src/paperless/media
|
||
- $$cap_appname-export:/usr/src/paperless/export
|
||
- $$cap_appname-consume:/usr/src/paperless/consume
|
||
caproverExtra:
|
||
containerHttpPort: '8000'
|
||
|
||
# Redis
|
||
$$cap_appname-redis:
|
||
volumes:
|
||
- $$cap_appname-redis-data:/data
|
||
restart: always
|
||
caproverExtra:
|
||
dockerfileLines:
|
||
- FROM redis:$$cap_redis_version
|
||
- CMD exec redis-server
|
||
notExposeAsWebApp: 'true'
|
||
|
||
# Database
|
||
$$cap_appname-db:
|
||
image: postgres:$$cap_postgres_version
|
||
volumes:
|
||
- $$cap_appname-db:/var/lib/postgresql/data
|
||
restart: always
|
||
environment:
|
||
POSTGRES_DB: $$cap_dbname
|
||
POSTGRES_USER: $$cap_dbuser
|
||
POSTGRES_PASSWORD: $$cap_dbpass
|
||
caproverExtra:
|
||
notExposeAsWebApp: 'true'
|
||
|
||
# gotenberg
|
||
$$cap_appname-gotenberg:
|
||
image: thecodingmachine/gotenberg
|
||
restart: unless-stopped
|
||
environment:
|
||
DISABLE_GOOGLE_CHROME: 1
|
||
caproverExtra:
|
||
containerHttpPort: '3000'
|
||
|
||
# tika
|
||
$$cap_appname-tika:
|
||
image: apache/tika
|
||
restart: unless-stopped
|
||
caproverExtra:
|
||
containerHttpPort: '9998'
|
||
|
||
caproverOneClickApp:
|
||
variables:
|
||
- id: $$cap_app_version
|
||
label: Paperless-ng
|
||
defaultValue: '1.4.1'
|
||
description: Check out their docker page for the valid tags https://hub.docker.com/r/jonaswinkler/paperless-ng/tags
|
||
|
||
- id: $$cap_postgres_version
|
||
label: Postgres Version
|
||
defaultValue: '13'
|
||
description: Check out their Docker page for the valid tags https://hub.docker.com/r/library/postgres/tags/
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_redis_version
|
||
label: Redis version
|
||
defaultValue: '6.2.1-alpine'
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_dbname
|
||
label: Database Name
|
||
defaultValue: 'paperless'
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_dbuser
|
||
label: Database User
|
||
defaultValue: 'paperless'
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_dbpass
|
||
label: Database Password
|
||
defaultValue: $$cap_gen_random_hex(64)
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_filename_format
|
||
label: Filename Format
|
||
defaultValue:
|
||
description: 'Changes the filenames paperless uses to store documents in the media directory. See File name handling (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-file-name-handling) for details. Default is none, which disables this feature.'
|
||
|
||
- id: $$cap_logrotate_max_size
|
||
label: Log Rotate Max Size (in bytes)
|
||
description: Maximum file size for log files before they are rotated, in bytes.
|
||
|
||
- id: $$cap_logrotate_max_backup
|
||
label: Log Rotate Max Backup Count
|
||
defaultValue: '20'
|
||
validRegex: /^\d{0,}$/
|
||
description: Number of rotated log files to keep.
|
||
|
||
- id: $$cap_secret_key
|
||
label: Paperless Secret Key
|
||
defaultValue: $$cap_gen_random_hex(64)
|
||
validRegex: /^([^\s^\/])+$/
|
||
description: Paperless uses this to make session tokens.
|
||
|
||
- id: $$cap_admin_user
|
||
label: Admin User
|
||
defaultValue: 'admin'
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_admin_password
|
||
label: Admin Password
|
||
defaultValue: $$cap_gen_random_hex(10)
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_admin_email
|
||
label: Admin Email
|
||
defaultValue: root@localhost
|
||
validRegex: /^([^\s^\/])+$/
|
||
|
||
- id: $$cap_cookie_prefix
|
||
label: Cookie Prefix
|
||
defaultValue: $$cap_appname
|
||
description: Specify a prefix that is added to the cookies used by paperless to identify the currently logged in user. This is useful for when you’re running two instances of paperless on the same host.
|
||
|
||
- id: $$cap_ocr_language
|
||
label: OCR Language
|
||
defaultValue: 'eng'
|
||
description: Customize the language that paperless will attempt to use when parsing documents. It should be a 3-letter language code consistent with ISO 639 https://www.loc.gov/standards/iso639-2/php/code_list.php This can be a combination of multiple languages such as deu+eng, in which case tesseract will use whatever language matches best. Keep in mind that tesseract uses much more cpu time with multiple languages enabled.
|
||
|
||
- id: $$cap_ocr_mode
|
||
label: OCR Mode
|
||
defaultValue: 'skip'
|
||
validRegex: /^(skip|skip_noarchive|redo|force)$/
|
||
description: Tell paperless when and how to perform ocr on your documents. Four modes are available- skip, skip_noarchive, redo, force. Read more about this in the OCRmyPDF documentation (https://ocrmypdf.readthedocs.io/en/latest/advanced.html#when-ocr-is-skipped).
|
||
|
||
- id: $$cap_ocr_clean
|
||
label: OCR Clean
|
||
defaultValue: 'clean'
|
||
validRegex: /^(clean|clean-final|none)$/
|
||
description: Tells paperless to use unpaper to clean any input document before sending it to tesseract. This uses more resources, but generally results in better OCR results. Available mode- clean, clean-final, none
|
||
|
||
- id: $$cap_ocr_deskew
|
||
label: OCR Deskew
|
||
defaultValue: 'true'
|
||
validRegex: /^(true|false)$/
|
||
description: Tells paperless to correct skewing (slight rotation of input images mainly due to improper scanning).
|
||
|
||
- id: $$cap_ocr_rotate_pages
|
||
label: OCR Rotate Pages
|
||
defaultValue: 'true'
|
||
validRegex: /^(true|false)$/
|
||
description: Tells paperless to correct page rotation (90°, 180° and 270° rotation). If you notice that paperless is not rotating incorrectly rotated pages (or vice versa), try adjusting the threshold up or down (see below).
|
||
|
||
- id: $$cap_ocr_rotate_threshold
|
||
label: OCR Rotate Pages Threshold
|
||
defaultValue: '12'
|
||
validRegex: /^\d[\d.]*$/
|
||
description: This is an arbitrary value reported by tesseract. “15” is a very conservative value, whereas “2” is a very aggressive option and will often result in correctly rotated pages being rotated as well.
|
||
|
||
- id: $$cap_ocr_output_type
|
||
label: OCR Output Type
|
||
defaultValue: 'pdfa'
|
||
validRegex: /^(pdf|pdfa|pdfa-1|pdfa-2|pdfa-3)$/
|
||
description: Specify the the type of PDF documents that paperless should produce. Choices- pdf, pdfa, pdfa-1, pdfa-2, pdfa-3
|
||
|
||
- id: $$cap_ocr_pages
|
||
label: OCR Pages Count
|
||
defaultValue: '0'
|
||
validRegex: /^\d{1,}$/
|
||
description: Tells paperless to use only the specified amount of pages for OCR. Documents with less than the specified amount of pages get OCR’ed completely. Specifying 1 here will only use the first page. Specifying 0 disables this feature a and always use all pages.
|
||
|
||
- id: $$cap_ocr_image_dpi
|
||
label: OCR Image DPI
|
||
description: Set this to the DPI your scanner produces images at. Default is none, which will automatically calculate image DPI so that the produced PDF documents are A4 sized.
|
||
|
||
- id: $cap_ocr_user_args
|
||
label: OCR User Args
|
||
description: See https://ocrmypdf.readthedocs.io/en/latest/api.html#reference for valid parameters. Specify arguments as a JSON dictionary. Keep note of lower case booleans and double quoted parameter names and strings.
|
||
|
||
# commenting out but keeping the code if somone tries to add these variables
|
||
# these are optional variables and paperless-ng calculates automatically if no value is provided
|
||
# but leaving it blank raises ValueError: invalid literal for int() with base 10:
|
||
# - id: $$cap_task_workers
|
||
# label: Paperless Task Workers
|
||
# validRegex: /^\d{0,2}$/
|
||
# description: Leave blank to calculate automatically based on CPU core count
|
||
|
||
# - id: $$cap_threads_per_worker
|
||
# label: Paperless Threads per Workers
|
||
# validRegex: /^\d{0,2}$/
|
||
# description: Leave blank to calculate automatically based on CPU core count. Ensure that the product PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER does not exceed your CPU core count
|
||
|
||
- id: $$cap_timezone
|
||
label: Timezone
|
||
defaultValue: 'UTC'
|
||
description: 'Set the time zone here. See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE for details on how to set it.'
|
||
|
||
- id: $$cap_consumer_polling
|
||
label: Consumer Polling (seconds)
|
||
defaultValue: '0'
|
||
validRegex: /^\d{1,}$/
|
||
description: Defaults to 0, which disables polling and uses filesystem notifications.
|
||
|
||
- id: $$cap_consumer_delete_duplicates
|
||
label: Consumer Delete Duplicates
|
||
defaultValue: 'false'
|
||
validRegex: /^(true|false)$/
|
||
description: When the consumer detects a duplicate document, it will not touch the original document.
|
||
|
||
- id: $$cap_consumer_recursive
|
||
label: Consumer Recursive
|
||
defaultValue: 'false'
|
||
validRegex: /^(true|false)$/
|
||
description: Enable recursive watching of the consumption directory. Paperless will then pickup files from files in subdirectories within your consumption directory as well.
|
||
|
||
- id: $$cap_consumer_subdirs_as_tags
|
||
label: Consumer Sub Directory as tags
|
||
defaultValue: 'false'
|
||
validRegex: /^(true|false)$/
|
||
description: Set the names of subdirectories as tags for consumed files. E.g. <CONSUMPTION_DIR>/foo/bar/file.pdf will add the tags “foo” and “bar” to the consumed file
|
||
|
||
- id: $$cap_convert_memory_limit
|
||
label: Convert Memory Limit
|
||
defaultValue: '0'
|
||
validRegex: /^\d{1,}$/
|
||
description: For more information on how to use this value, you should search the web for “MAGICK_MEMORY_LIMIT”.
|
||
|
||
- id: $$cap_convert_tempdir
|
||
label: Convert Memory Limit
|
||
description: For more information on how to use this value, you should search the web for “MAGICK_TMPDIR”.
|
||
|
||
- id: $$cap_optimize_thumbnails
|
||
label: Optimize Thumbnails
|
||
defaultValue: 'true'
|
||
validRegex: /^(true|false)$/
|
||
description: This usually reduces the size of thumbnails by about 20%, but uses considerable compute time during consumption.
|
||
|
||
- id: $$cap_pre_consume_script
|
||
label: Pre Consume Script (path)
|
||
description: Executed after the consumer sees a new document in the consumption folder, but before any processing of the document is performed. For more information, take a look at Pre-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html?highlight=PAPERLESS_PRE_CONSUME_SCRIPT#pre-consumption-script)
|
||
|
||
- id: $$cap_post_consume_script
|
||
label: Post Consume Script (path)
|
||
description: Executed after the consumer has successfully processed a document and has moved it into paperless. For more information, take a look at Post-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-post-consume-script)
|
||
|
||
- id: $$cap_filename_date_order
|
||
label: Filename Date Order
|
||
description: Defaults to none, which disables this feature. The date order can be set to any option as specified in https://dateparser.readthedocs.io/en/latest/settings.html#date-order.
|
||
|
||
- id: $$cap_thumbnail_font_name
|
||
label: Thumbnail Font Name
|
||
defaultValue: '/usr/share/fonts/liberation/LiberationSerif-Regular.ttf'
|
||
description: Paperless creates thumbnails for plain text files by rendering the content of the file on an image and uses a predefined font for that
|
||
|
||
- id: $$cap_paperless_ignore_dates
|
||
label: Ignore Dates
|
||
description: You may specify dates in a multitude of formats supported by dateparser (see https://dateparser.readthedocs.io/en/latest/#popular-formats)
|
||
|
||
- id: $$cap_webserver_workers
|
||
label: Webserver Workers
|
||
defaultValue: '2'
|
||
validRegex: /^\d{1,}$/
|
||
description: The number of worker processes the webserver should spawn.
|
||
|
||
- id: $$cap_usermap_uid
|
||
label: Usermap UID
|
||
defaultValue: '1000'
|
||
validRegex: /^\d{1,}$/
|
||
description: Set this to your actual user ID on the host system, which you can get by executing ```id -u```
|
||
|
||
- id: $$cap_usermap_gid
|
||
label: Usermap GID
|
||
defaultValue: '1000'
|
||
validRegex: /^\d{1,}$/
|
||
description: Set this to your actual user ID on the host system, which you can get by executing ```id -g```
|
||
|
||
- id: $$cap_docker_ocr_languages
|
||
label: OCR Languages to install
|
||
description: Additional OCR languages to install. By default, paperless comes with English, German, Italian, Spanish and French.
|
||
|
||
instructions:
|
||
start: >-
|
||
Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.
|
||
end: >-
|
||
Done! 😄
|
||
Your service is available at http://$$cap_appname.$$cap_root_domain
|
||
displayName: 'Paperless-ng'
|
||
isOfficial: true
|
||
description: Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.
|
||
documentation: https://paperless-ng.readthedocs.io/en/latest/
|