Updated Paperless-ng with additional variables (#392)

* added more variables supported by paperless-ng and added variable descriptions

* fixed regex validator for numeric inputs and email

* fixed regex validator

* removed a few optional variables

* unique variable name patterns

* added url protocol for CORS header

* added pre-consume script variable

Co-authored-by: ak4zh <akash@gmail.com>
This commit is contained in:
Ak4zh 2021-04-21 05:37:54 +05:30 committed by GitHub
parent 031af00b24
commit 4f42eccfc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 189 additions and 18 deletions

View File

@ -9,30 +9,61 @@ services:
restart: always restart: always
environment: environment:
APP_URL: http://$$cap_appname.$$cap_root_domain APP_URL: http://$$cap_appname.$$cap_root_domain
# Required services (https://paperless-ng.readthedocs.io/en/latest/configuration.html#required-services)
PAPERLESS_REDIS: redis://srv-captain--$$cap_appname-redis:6379/0 PAPERLESS_REDIS: redis://srv-captain--$$cap_appname-redis:6379/0
PAPERLESS_DBHOST: srv-captain--$$cap_appname-db PAPERLESS_DBHOST: srv-captain--$$cap_appname-db
PAPERLESS_DBNAME: $$cap_dbname PAPERLESS_DBNAME: $$cap_dbname
PAPERLESS_DBUSER: $$cap_dbuser PAPERLESS_DBUSER: $$cap_dbuser
PAPERLESS_DBPASS: $$cap_dbpass PAPERLESS_DBPASS: $$cap_dbpass
# Paths and folders (https://paperless-ng.readthedocs.io/en/latest/configuration.html#paths-and-folders)
PAPERLESS_FILENAME_FORMAT: $$cap_filename_format
# Logging (https://paperless-ng.readthedocs.io/en/latest/configuration.html#logging)
PAPERLESS_LOGROTATE_MAX_SIZE: $$cap_logrotate_max_size PAPERLESS_LOGROTATE_MAX_SIZE: $$cap_logrotate_max_size
PAPERLESS_LOGROTATE_MAX_BACKUPS: $$cap_logrotate_max_backup PAPERLESS_LOGROTATE_MAX_BACKUPS: $$cap_logrotate_max_backup
# Hosting & Security (https://paperless-ng.readthedocs.io/en/latest/configuration.html#hosting-security)
PAPERLESS_SECRET_KEY: $$cap_secret_key PAPERLESS_SECRET_KEY: $$cap_secret_key
PAPERLESS_ALLOWED_HOSTS: $$cap_appname.$$cap_root_domain
PAPERLESS_CORS_ALLOWED_HOSTS: http://$$cap_appname.$$cap_root_domain
PAPERLESS_ADMIN_USER: $$cap_admin_user PAPERLESS_ADMIN_USER: $$cap_admin_user
PAPERLESS_ADMIN_PASSWORD: $$cap_admin_password PAPERLESS_ADMIN_PASSWORD: $$cap_admin_password
PAPERLESS_ADMIN_MAIL: $$cap_admin_email
PAPERLESS_COOKIE_PREFIX: $$cap_cookie_prefix PAPERLESS_COOKIE_PREFIX: $$cap_cookie_prefix
# OCR settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#ocr-settings)
PAPERLESS_OCR_LANGUAGE: $$cap_ocr_language PAPERLESS_OCR_LANGUAGE: $$cap_ocr_language
PAPERLESS_OCR_MODE: $$cap_ocr_mode PAPERLESS_OCR_MODE: $$cap_ocr_mode
PAPERLESS_OCR_CLEAN: $$cap_ocr_clean PAPERLESS_OCR_CLEAN: $$cap_ocr_clean
PAPERLESS_OCR_DESKEW: $$cap_ocr_deskew PAPERLESS_OCR_DESKEW: $$cap_ocr_deskew
PAPERLESS_OCR_ROTATE_PAGES: $$cap_ocr_rotate_pages PAPERLESS_OCR_ROTATE_PAGES: $$cap_ocr_rotate_pages
PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD: $$cap_ocr_rotate_threshold
PAPERLESS_OCR_OUTPUT_TYPE: $$cap_ocr_output_type PAPERLESS_OCR_OUTPUT_TYPE: $$cap_ocr_output_type
PAPERLESS_OCR_PAGES: $$cap_ocr_page_count PAPERLESS_OCR_PAGES: $$cap_ocr_pages
PAPERLESS_OCR_IMAGE_DPI: $$cap_ocr_image_dpi
PAPERLESS_OCR_USER_ARGS: $cap_ocr_user_args PAPERLESS_OCR_USER_ARGS: $cap_ocr_user_args
PAPERLESS_TIME_ZONE: $$cap_timezone # Tika settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#tika-settings)
PAPERLESS_TIKA_ENABLED: 1 PAPERLESS_TIKA_ENABLED: 1
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://srv-captain--$$cap_appname-gotenberg:3000 PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://srv-captain--$$cap_appname-gotenberg:3000
PAPERLESS_TIKA_ENDPOINT: http://srv-captain--$$cap_appname-tika:9998 PAPERLESS_TIKA_ENDPOINT: http://srv-captain--$$cap_appname-tika:9998
# Software tweaks (https://paperless-ng.readthedocs.io/en/latest/configuration.html#software-tweaks)
# PAPERLESS_TASK_WORKERS: $$cap_task_workers
# PAPERLESS_THREADS_PER_WORKER: $$cap_threads_per_worker
PAPERLESS_TIME_ZONE: $$cap_timezone
PAPERLESS_CONSUMER_POLLING: $$cap_consumer_polling
PAPERLESS_CONSUMER_DELETE_DUPLICATES: $$cap_consumer_delete_duplicates
PAPERLESS_CONSUMER_RECURSIVE: $$cap_consumer_recursive
PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS: $$cap_consumer_subdirs_as_tags
PAPERLESS_CONVERT_MEMORY_LIMIT: $$cap_convert_memory_limit
PAPERLESS_CONVERT_TMPDIR: $$cap_convert_tempdir
PAPERLESS_OPTIMIZE_THUMBNAILS: $$cap_optimize_thumbnails
PAPERLESS_PRE_CONSUME_SCRIPT: $$cap_pre_consume_script
PAPERLESS_POST_CONSUME_SCRIPT: $$cap_post_consume_script
PAPERLESS_FILENAME_DATE_ORDER: $$cap_filename_date_order
PAPERLESS_THUMBNAIL_FONT_NAME: $$cap_thumbnail_font_name
PAPERLESS_IGNORE_DATES: $$cap_paperless_ignore_dates
# Docker-specific options (https://paperless-ng.readthedocs.io/en/latest/configuration.html#docker-specific-options)
PAPERLESS_WEBSERVER_WORKERS: $$cap_webserver_workers
USERMAP_UID: $$cap_usermap_uid
USERMAP_GID: $$cap_usermap_gid
PAPERLESS_OCR_LANGUAGES: $$cap_docker_ocr_languages
volumes: volumes:
- $$cap_appname-data:/usr/src/paperless/data - $$cap_appname-data:/usr/src/paperless/data
- $$cap_appname-media:/usr/src/paperless/media - $$cap_appname-media:/usr/src/paperless/media
@ -54,7 +85,7 @@ services:
# Database # Database
$$cap_appname-db: $$cap_appname-db:
image: postgres:13 image: postgres:$$cap_postgres_version
volumes: volumes:
- $$cap_appname-db:/var/lib/postgresql/data - $$cap_appname-db:/var/lib/postgresql/data
restart: always restart: always
@ -88,89 +119,229 @@ caproverOneClickApp:
defaultValue: '1.4.1' defaultValue: '1.4.1'
description: Check out their docker page for the valid tags https://hub.docker.com/r/jonaswinkler/paperless-ng/tags description: Check out their docker page for the valid tags https://hub.docker.com/r/jonaswinkler/paperless-ng/tags
- id: $$cap_dbpass - id: $$cap_postgres_version
label: Database Password label: Postgres Version
defaultValue: $$cap_gen_random_hex(64) defaultValue: '13'
description: Check out their Docker page for the valid tags https://hub.docker.com/r/library/postgres/tags/
validRegex: /^([^\s^\/])+$/
- id: $$cap_redis_version - id: $$cap_redis_version
label: Redis version label: Redis version
defaultValue: 6.0 defaultValue: '6.2.1-alpine'
validRegex: /^([^\s^\/])+$/
- id: $$cap_dbname - id: $$cap_dbname
label: Database name label: Database Name
defaultValue: 'paperless' defaultValue: 'paperless'
validRegex: /^([^\s^\/])+$/
- id: $$cap_dbuser - id: $$cap_dbuser
label: Database User label: Database User
defaultValue: 'paperless' defaultValue: 'paperless'
validRegex: /^([^\s^\/])+$/
- id: $$cap_dbpass
label: Database Password
defaultValue: $$cap_gen_random_hex(64)
validRegex: /^([^\s^\/])+$/
- id: $$cap_filename_format - id: $$cap_filename_format
label: Database User label: Filename Format
defaultValue: defaultValue:
description: 'Changes the filenames paperless uses to store documents in the media directory. See File name handling (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-file-name-handling) for details. Default is none, which disables this feature.' description: 'Changes the filenames paperless uses to store documents in the media directory. See File name handling (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-file-name-handling) for details. Default is none, which disables this feature.'
- id: $$cap_logrotate_max_size - id: $$cap_logrotate_max_size
label: Log Rotate Max Size (MB) label: Log Rotate Max Size (in bytes)
defaultValue: 1 description: Maximum file size for log files before they are rotated, in bytes.
- id: $$cap_logrotate_max_backup - id: $$cap_logrotate_max_backup
label: Log Rotate Max Backup Count label: Log Rotate Max Backup Count
defaultValue: 20 defaultValue: '20'
validRegex: /^\d{0,}$/
description: Number of rotated log files to keep.
- id: $$cap_secret_key - id: $$cap_secret_key
label: Secret Key label: Paperless Secret Key
defaultValue: $$cap_gen_random_hex(64) defaultValue: $$cap_gen_random_hex(64)
validRegex: /^([^\s^\/])+$/
description: Paperless uses this to make session tokens.
- id: $$cap_admin_user - id: $$cap_admin_user
label: Admin User label: Admin User
defaultValue: 'admin' defaultValue: 'admin'
validRegex: /^([^\s^\/])+$/
- id: $$cap_admin_password - id: $$cap_admin_password
label: Admin Password label: Admin Password
defaultValue: $$cap_gen_random_hex(64) defaultValue: $$cap_gen_random_hex(10)
validRegex: /^([^\s^\/])+$/
- id: $$cap_admin_email
label: Admin Email
defaultValue: root@localhost
validRegex: /^([^\s^\/])+$/
- id: $$cap_cookie_prefix - id: $$cap_cookie_prefix
label: Cookie Prefix label: Cookie Prefix
defaultValue: $$cap_appname defaultValue: $$cap_appname
description: Specify a prefix that is added to the cookies used by paperless to identify the currently logged in user. This is useful for when youre running two instances of paperless on the same host.
- id: $$cap_ocr_language - id: $$cap_ocr_language
label: OCR Language label: OCR Language
defaultValue: 'eng' defaultValue: 'eng'
description: 'It should be a 3-letter language code consistent with ISO 639 https://www.loc.gov/standards/iso639-2/php/code_list.php This can be a combination of multiple languages such as deu+eng, in which case tesseract will use whatever language matches best. Keep in mind that tesseract uses much more cpu time with multiple languages enabled.' description: Customize the language that paperless will attempt to use when parsing documents. It should be a 3-letter language code consistent with ISO 639 https://www.loc.gov/standards/iso639-2/php/code_list.php This can be a combination of multiple languages such as deu+eng, in which case tesseract will use whatever language matches best. Keep in mind that tesseract uses much more cpu time with multiple languages enabled.
- id: $$cap_ocr_mode - id: $$cap_ocr_mode
label: OCR Mode label: OCR Mode
defaultValue: 'skip' defaultValue: 'skip'
validRegex: /^(skip|skip_noarchive|redo|force)$/
description: Tell paperless when and how to perform ocr on your documents. Four modes are available- skip, skip_noarchive, redo, force. Read more about this in the OCRmyPDF documentation (https://ocrmypdf.readthedocs.io/en/latest/advanced.html#when-ocr-is-skipped).
- id: $$cap_ocr_clean - id: $$cap_ocr_clean
label: OCR Clean label: OCR Clean
defaultValue: 'clean' defaultValue: 'clean'
validRegex: /^(clean|clean-final|none)$/
description: Tells paperless to use unpaper to clean any input document before sending it to tesseract. This uses more resources, but generally results in better OCR results. Available mode- clean, clean-final, none
- id: $$cap_ocr_deskew - id: $$cap_ocr_deskew
label: OCR Deskew label: OCR Deskew
defaultValue: 'true' defaultValue: 'true'
validRegex: /^(true|false)$/
description: Tells paperless to correct skewing (slight rotation of input images mainly due to improper scanning).
- id: $$cap_ocr_rotate_pages - id: $$cap_ocr_rotate_pages
label: OCR Rotate Pages label: OCR Rotate Pages
defaultValue: 'true' defaultValue: 'true'
validRegex: /^(true|false)$/
description: Tells paperless to correct page rotation (90°, 180° and 270° rotation). If you notice that paperless is not rotating incorrectly rotated pages (or vice versa), try adjusting the threshold up or down (see below).
- id: $$cap_ocr_rotate_threshold
label: OCR Rotate Pages Threshold
defaultValue: '12'
validRegex: /^\d[\d.]*$/
description: This is an arbitrary value reported by tesseract. “15” is a very conservative value, whereas “2” is a very aggressive option and will often result in correctly rotated pages being rotated as well.
- id: $$cap_ocr_output_type - id: $$cap_ocr_output_type
label: OCR Output Type label: OCR Output Type
defaultValue: 'pdfa' defaultValue: 'pdfa'
validRegex: /^(pdf|pdfa|pdfa-1|pdfa-2|pdfa-3)$/
description: Specify the the type of PDF documents that paperless should produce. Choices- pdf, pdfa, pdfa-1, pdfa-2, pdfa-3
- id: $$cap_ocr_page_count - id: $$cap_ocr_pages
label: OCR Pages Count label: OCR Pages Count
defaultValue: '0' defaultValue: '0'
validRegex: /^\d{1,}$/
description: Tells paperless to use only the specified amount of pages for OCR. Documents with less than the specified amount of pages get OCRed completely. Specifying 1 here will only use the first page. Specifying 0 disables this feature a and always use all pages.
- id: $$cap_ocr_image_dpi
label: OCR Image DPI
description: Set this to the DPI your scanner produces images at. Default is none, which will automatically calculate image DPI so that the produced PDF documents are A4 sized.
- id: $cap_ocr_user_args - id: $cap_ocr_user_args
label: OCR User Args label: OCR User Args
description: OCRmyPDF Refrence https://ocrmypdf.readthedocs.io/en/latest/api.html#reference description: See https://ocrmypdf.readthedocs.io/en/latest/api.html#reference for valid parameters. Specify arguments as a JSON dictionary. Keep note of lower case booleans and double quoted parameter names and strings.
# commenting out but keeping the code if somone tries to add these variables
# these are optional variables and paperless-ng calculates automatically if no value is provided
# but leaving it blank raises ValueError: invalid literal for int() with base 10:
# - id: $$cap_task_workers
# label: Paperless Task Workers
# validRegex: /^\d{0,2}$/
# description: Leave blank to calculate automatically based on CPU core count
# - id: $$cap_threads_per_worker
# label: Paperless Threads per Workers
# validRegex: /^\d{0,2}$/
# description: Leave blank to calculate automatically based on CPU core count. Ensure that the product PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER does not exceed your CPU core count
- id: $$cap_timezone - id: $$cap_timezone
label: Timezone label: Timezone
defaultValue: 'UTC' defaultValue: 'UTC'
description: 'Set the time zone here. See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE for details on how to set it.' description: 'Set the time zone here. See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE for details on how to set it.'
- id: $$cap_consumer_polling
label: Consumer Polling (seconds)
defaultValue: '0'
validRegex: /^\d{1,}$/
description: Defaults to 0, which disables polling and uses filesystem notifications.
- id: $$cap_consumer_delete_duplicates
label: Consumer Delete Duplicates
defaultValue: 'false'
validRegex: /^(true|false)$/
description: When the consumer detects a duplicate document, it will not touch the original document.
- id: $$cap_consumer_recursive
label: Consumer Recursive
defaultValue: 'false'
validRegex: /^(true|false)$/
description: Enable recursive watching of the consumption directory. Paperless will then pickup files from files in subdirectories within your consumption directory as well.
- id: $$cap_consumer_subdirs_as_tags
label: Consumer Sub Directory as tags
defaultValue: 'false'
validRegex: /^(true|false)$/
description: Set the names of subdirectories as tags for consumed files. E.g. <CONSUMPTION_DIR>/foo/bar/file.pdf will add the tags “foo” and “bar” to the consumed file
- id: $$cap_convert_memory_limit
label: Convert Memory Limit
defaultValue: '0'
validRegex: /^\d{1,}$/
description: For more information on how to use this value, you should search the web for “MAGICK_MEMORY_LIMIT”.
- id: $$cap_convert_tempdir
label: Convert Memory Limit
description: For more information on how to use this value, you should search the web for “MAGICK_TMPDIR”.
- id: $$cap_optimize_thumbnails
label: Optimize Thumbnails
defaultValue: 'true'
validRegex: /^(true|false)$/
description: This usually reduces the size of thumbnails by about 20%, but uses considerable compute time during consumption.
- id: $$cap_pre_consume_script
label: Pre Consume Script (path)
description: Executed after the consumer sees a new document in the consumption folder, but before any processing of the document is performed. For more information, take a look at Pre-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html?highlight=PAPERLESS_PRE_CONSUME_SCRIPT#pre-consumption-script)
- id: $$cap_post_consume_script
label: Post Consume Script (path)
description: Executed after the consumer has successfully processed a document and has moved it into paperless. For more information, take a look at Post-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-post-consume-script)
- id: $$cap_filename_date_order
label: Filename Date Order
description: Defaults to none, which disables this feature. The date order can be set to any option as specified in https://dateparser.readthedocs.io/en/latest/settings.html#date-order.
- id: $$cap_thumbnail_font_name
label: Thumbnail Font Name
defaultValue: '/usr/share/fonts/liberation/LiberationSerif-Regular.ttf'
description: Paperless creates thumbnails for plain text files by rendering the content of the file on an image and uses a predefined font for that
- id: $$cap_paperless_ignore_dates
label: Ignore Dates
description: You may specify dates in a multitude of formats supported by dateparser (see https://dateparser.readthedocs.io/en/latest/#popular-formats)
- id: $$cap_webserver_workers
label: Webserver Workers
defaultValue: '2'
validRegex: /^\d{1,}$/
description: The number of worker processes the webserver should spawn.
- id: $$cap_usermap_uid
label: Usermap UID
defaultValue: '1000'
validRegex: /^\d{1,}$/
description: Set this to your actual user ID on the host system, which you can get by executing ```id -u```
- id: $$cap_usermap_gid
label: Usermap GID
defaultValue: '1000'
validRegex: /^\d{1,}$/
description: Set this to your actual user ID on the host system, which you can get by executing ```id -g```
- id: $$cap_docker_ocr_languages
label: OCR Languages to install
description: Additional OCR languages to install. By default, paperless comes with English, German, Italian, Spanish and French.
instructions: instructions:
start: >- start: >-
Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents. Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.