captainVersion: 4 services: # Paperless-ng $$cap_appname: depends_on: - $$cap_appname-db - $$cap_appname-redis image: jonaswinkler/paperless-ng:$$cap_app_version restart: always environment: APP_URL: http://$$cap_appname.$$cap_root_domain # Required services (https://paperless-ng.readthedocs.io/en/latest/configuration.html#required-services) PAPERLESS_REDIS: redis://srv-captain--$$cap_appname-redis:6379/0 PAPERLESS_DBHOST: srv-captain--$$cap_appname-db PAPERLESS_DBNAME: $$cap_dbname PAPERLESS_DBUSER: $$cap_dbuser PAPERLESS_DBPASS: $$cap_dbpass # Paths and folders (https://paperless-ng.readthedocs.io/en/latest/configuration.html#paths-and-folders) PAPERLESS_FILENAME_FORMAT: $$cap_filename_format # Logging (https://paperless-ng.readthedocs.io/en/latest/configuration.html#logging) PAPERLESS_LOGROTATE_MAX_SIZE: $$cap_logrotate_max_size PAPERLESS_LOGROTATE_MAX_BACKUPS: $$cap_logrotate_max_backup # Hosting & Security (https://paperless-ng.readthedocs.io/en/latest/configuration.html#hosting-security) PAPERLESS_SECRET_KEY: $$cap_secret_key PAPERLESS_ALLOWED_HOSTS: $$cap_appname.$$cap_root_domain PAPERLESS_CORS_ALLOWED_HOSTS: http://$$cap_appname.$$cap_root_domain PAPERLESS_ADMIN_USER: $$cap_admin_user PAPERLESS_ADMIN_PASSWORD: $$cap_admin_password PAPERLESS_ADMIN_MAIL: $$cap_admin_email PAPERLESS_COOKIE_PREFIX: $$cap_appname # OCR settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#ocr-settings) PAPERLESS_OCR_LANGUAGE: $$cap_ocr_language PAPERLESS_OCR_MODE: $$cap_ocr_mode PAPERLESS_OCR_CLEAN: $$cap_ocr_clean PAPERLESS_OCR_DESKEW: $$cap_ocr_deskew PAPERLESS_OCR_ROTATE_PAGES: $$cap_ocr_rotate_pages PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD: $$cap_ocr_rotate_threshold PAPERLESS_OCR_OUTPUT_TYPE: $$cap_ocr_output_type PAPERLESS_OCR_PAGES: $$cap_ocr_pages PAPERLESS_OCR_IMAGE_DPI: $$cap_ocr_image_dpi PAPERLESS_OCR_USER_ARGS: $cap_ocr_user_args # Tika settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#tika-settings) PAPERLESS_TIKA_ENABLED: 1 PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://srv-captain--$$cap_appname-gotenberg:3000 PAPERLESS_TIKA_ENDPOINT: http://srv-captain--$$cap_appname-tika:9998 # Software tweaks (https://paperless-ng.readthedocs.io/en/latest/configuration.html#software-tweaks) # PAPERLESS_TASK_WORKERS: $$cap_task_workers # PAPERLESS_THREADS_PER_WORKER: $$cap_threads_per_worker PAPERLESS_TIME_ZONE: $$cap_timezone PAPERLESS_CONSUMER_POLLING: $$cap_consumer_polling PAPERLESS_CONSUMER_DELETE_DUPLICATES: $$cap_consumer_delete_duplicates PAPERLESS_CONSUMER_RECURSIVE: $$cap_consumer_recursive PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS: $$cap_consumer_subdirs_as_tags PAPERLESS_CONVERT_MEMORY_LIMIT: $$cap_convert_memory_limit PAPERLESS_CONVERT_TMPDIR: $$cap_convert_tempdir PAPERLESS_OPTIMIZE_THUMBNAILS: $$cap_optimize_thumbnails PAPERLESS_PRE_CONSUME_SCRIPT: $$cap_pre_consume_script PAPERLESS_POST_CONSUME_SCRIPT: $$cap_post_consume_script PAPERLESS_FILENAME_DATE_ORDER: $$cap_filename_date_order PAPERLESS_THUMBNAIL_FONT_NAME: $$cap_thumbnail_font_name PAPERLESS_IGNORE_DATES: $$cap_paperless_ignore_dates # Docker-specific options (https://paperless-ng.readthedocs.io/en/latest/configuration.html#docker-specific-options) PAPERLESS_WEBSERVER_WORKERS: $$cap_webserver_workers USERMAP_UID: $$cap_usermap_uid USERMAP_GID: $$cap_usermap_gid PAPERLESS_OCR_LANGUAGES: $$cap_docker_ocr_languages volumes: - $$cap_appname-data:/usr/src/paperless/data - $$cap_appname-media:/usr/src/paperless/media - $$cap_appname-export:/usr/src/paperless/export - $$cap_appname-consume:/usr/src/paperless/consume caproverExtra: containerHttpPort: '8000' # Redis $$cap_appname-redis: volumes: - $$cap_appname-redis-data:/data restart: always caproverExtra: dockerfileLines: - FROM redis:$$cap_redis_version - CMD exec redis-server notExposeAsWebApp: 'true' # Database $$cap_appname-db: image: postgres:$$cap_postgres_version volumes: - $$cap_appname-db:/var/lib/postgresql/data restart: always environment: POSTGRES_DB: $$cap_dbname POSTGRES_USER: $$cap_dbuser POSTGRES_PASSWORD: $$cap_dbpass caproverExtra: notExposeAsWebApp: 'true' # gotenberg $$cap_appname-gotenberg: image: thecodingmachine/gotenberg:6 restart: unless-stopped environment: DISABLE_GOOGLE_CHROME: 1 caproverExtra: containerHttpPort: '3000' # tika $$cap_appname-tika: image: apache/tika restart: unless-stopped caproverExtra: containerHttpPort: '9998' caproverOneClickApp: variables: - id: $$cap_app_version label: Paperless-ng defaultValue: '1.4.1' description: Check out their docker page for the valid tags https://hub.docker.com/r/jonaswinkler/paperless-ng/tags - id: $$cap_postgres_version label: Postgres Version defaultValue: '13' description: Check out their Docker page for the valid tags https://hub.docker.com/r/library/postgres/tags/ validRegex: /^([^\s^\/])+$/ - id: $$cap_redis_version label: Redis version defaultValue: '6.2.1-alpine' validRegex: /^([^\s^\/])+$/ - id: $$cap_dbname label: Database Name defaultValue: 'paperless' validRegex: /^([^\s^\/])+$/ - id: $$cap_dbuser label: Database User defaultValue: 'paperless' validRegex: /^([^\s^\/])+$/ - id: $$cap_dbpass label: Database Password defaultValue: $$cap_gen_random_hex(64) validRegex: /^([^\s^\/])+$/ - id: $$cap_filename_format label: Filename Format defaultValue: description: 'Changes the filenames paperless uses to store documents in the media directory. See File name handling (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-file-name-handling) for details. Default is none, which disables this feature.' - id: $$cap_logrotate_max_size label: Log Rotate Max Size (in bytes) description: Maximum file size for log files before they are rotated, in bytes. - id: $$cap_logrotate_max_backup label: Log Rotate Max Backup Count defaultValue: '20' validRegex: /^\d{0,}$/ description: Number of rotated log files to keep. - id: $$cap_secret_key label: Paperless Secret Key defaultValue: $$cap_gen_random_hex(64) validRegex: /^([^\s^\/])+$/ description: Paperless uses this to make session tokens. - id: $$cap_admin_user label: Admin User defaultValue: 'admin' validRegex: /^([^\s^\/])+$/ - id: $$cap_admin_password label: Admin Password defaultValue: $$cap_gen_random_hex(10) validRegex: /^([^\s^\/])+$/ - id: $$cap_admin_email label: Admin Email defaultValue: root@localhost validRegex: /^([^\s^\/])+$/ - id: $$cap_ocr_language label: OCR Language defaultValue: 'eng' description: Customize the language that paperless will attempt to use when parsing documents. It should be a 3-letter language code consistent with ISO 639 https://www.loc.gov/standards/iso639-2/php/code_list.php This can be a combination of multiple languages such as deu+eng, in which case tesseract will use whatever language matches best. Keep in mind that tesseract uses much more cpu time with multiple languages enabled. - id: $$cap_ocr_mode label: OCR Mode defaultValue: 'skip' validRegex: /^(skip|skip_noarchive|redo|force)$/ description: Tell paperless when and how to perform ocr on your documents. Four modes are available- skip, skip_noarchive, redo, force. Read more about this in the OCRmyPDF documentation (https://ocrmypdf.readthedocs.io/en/latest/advanced.html#when-ocr-is-skipped). - id: $$cap_ocr_clean label: OCR Clean defaultValue: 'clean' validRegex: /^(clean|clean-final|none)$/ description: Tells paperless to use unpaper to clean any input document before sending it to tesseract. This uses more resources, but generally results in better OCR results. Available mode- clean, clean-final, none - id: $$cap_ocr_deskew label: OCR Deskew defaultValue: 'true' validRegex: /^(true|false)$/ description: Tells paperless to correct skewing (slight rotation of input images mainly due to improper scanning). - id: $$cap_ocr_rotate_pages label: OCR Rotate Pages defaultValue: 'true' validRegex: /^(true|false)$/ description: Tells paperless to correct page rotation (90°, 180° and 270° rotation). If you notice that paperless is not rotating incorrectly rotated pages (or vice versa), try adjusting the threshold up or down (see below). - id: $$cap_ocr_rotate_threshold label: OCR Rotate Pages Threshold defaultValue: '12' validRegex: /^\d[\d.]*$/ description: This is an arbitrary value reported by tesseract. “15” is a very conservative value, whereas “2” is a very aggressive option and will often result in correctly rotated pages being rotated as well. - id: $$cap_ocr_output_type label: OCR Output Type defaultValue: 'pdfa' validRegex: /^(pdf|pdfa|pdfa-1|pdfa-2|pdfa-3)$/ description: Specify the the type of PDF documents that paperless should produce. Choices- pdf, pdfa, pdfa-1, pdfa-2, pdfa-3 - id: $$cap_ocr_pages label: OCR Pages Count defaultValue: '0' validRegex: /^\d{1,}$/ description: Tells paperless to use only the specified amount of pages for OCR. Documents with less than the specified amount of pages get OCR’ed completely. Specifying 1 here will only use the first page. Specifying 0 disables this feature a and always use all pages. - id: $$cap_ocr_image_dpi label: OCR Image DPI description: Set this to the DPI your scanner produces images at. Default is none, which will automatically calculate image DPI so that the produced PDF documents are A4 sized. - id: $cap_ocr_user_args label: OCR User Args description: See https://ocrmypdf.readthedocs.io/en/latest/api.html#reference for valid parameters. Specify arguments as a JSON dictionary. Keep note of lower case booleans and double quoted parameter names and strings. # commenting out but keeping the code if somone tries to add these variables # these are optional variables and paperless-ng calculates automatically if no value is provided # but leaving it blank raises ValueError: invalid literal for int() with base 10: # - id: $$cap_task_workers # label: Paperless Task Workers # validRegex: /^\d{0,2}$/ # description: Leave blank to calculate automatically based on CPU core count # - id: $$cap_threads_per_worker # label: Paperless Threads per Workers # validRegex: /^\d{0,2}$/ # description: Leave blank to calculate automatically based on CPU core count. Ensure that the product PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER does not exceed your CPU core count - id: $$cap_timezone label: Timezone defaultValue: 'UTC' description: 'Set the time zone here. See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE for details on how to set it.' - id: $$cap_consumer_polling label: Consumer Polling (seconds) defaultValue: '0' validRegex: /^\d{1,}$/ description: Defaults to 0, which disables polling and uses filesystem notifications. - id: $$cap_consumer_delete_duplicates label: Consumer Delete Duplicates defaultValue: 'false' validRegex: /^(true|false)$/ description: When the consumer detects a duplicate document, it will not touch the original document. - id: $$cap_consumer_recursive label: Consumer Recursive defaultValue: 'false' validRegex: /^(true|false)$/ description: Enable recursive watching of the consumption directory. Paperless will then pickup files from files in subdirectories within your consumption directory as well. - id: $$cap_consumer_subdirs_as_tags label: Consumer Sub Directory as tags defaultValue: 'false' validRegex: /^(true|false)$/ description: Set the names of subdirectories as tags for consumed files. E.g. /foo/bar/file.pdf will add the tags “foo” and “bar” to the consumed file - id: $$cap_convert_memory_limit label: Convert Memory Limit defaultValue: '0' validRegex: /^\d{1,}$/ description: For more information on how to use this value, you should search the web for “MAGICK_MEMORY_LIMIT”. - id: $$cap_convert_tempdir label: Convert Memory Limit description: For more information on how to use this value, you should search the web for “MAGICK_TMPDIR”. - id: $$cap_optimize_thumbnails label: Optimize Thumbnails defaultValue: 'true' validRegex: /^(true|false)$/ description: This usually reduces the size of thumbnails by about 20%, but uses considerable compute time during consumption. - id: $$cap_pre_consume_script label: Pre Consume Script (path) description: Executed after the consumer sees a new document in the consumption folder, but before any processing of the document is performed. For more information, take a look at Pre-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html?highlight=PAPERLESS_PRE_CONSUME_SCRIPT#pre-consumption-script) - id: $$cap_post_consume_script label: Post Consume Script (path) description: Executed after the consumer has successfully processed a document and has moved it into paperless. For more information, take a look at Post-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-post-consume-script) - id: $$cap_filename_date_order label: Filename Date Order description: Defaults to none, which disables this feature. The date order can be set to any option as specified in https://dateparser.readthedocs.io/en/latest/settings.html#date-order. - id: $$cap_thumbnail_font_name label: Thumbnail Font Name defaultValue: '/usr/share/fonts/liberation/LiberationSerif-Regular.ttf' description: Paperless creates thumbnails for plain text files by rendering the content of the file on an image and uses a predefined font for that - id: $$cap_paperless_ignore_dates label: Ignore Dates description: You may specify dates in a multitude of formats supported by dateparser (see https://dateparser.readthedocs.io/en/latest/#popular-formats) - id: $$cap_webserver_workers label: Webserver Workers defaultValue: '2' validRegex: /^\d{1,}$/ description: The number of worker processes the webserver should spawn. - id: $$cap_usermap_uid label: Usermap UID defaultValue: '1000' validRegex: /^\d{1,}$/ description: Set this to your actual user ID on the host system, which you can get by executing ```id -u``` - id: $$cap_usermap_gid label: Usermap GID defaultValue: '1000' validRegex: /^\d{1,}$/ description: Set this to your actual user ID on the host system, which you can get by executing ```id -g``` - id: $$cap_docker_ocr_languages label: OCR Languages to install description: Additional OCR languages to install. By default, paperless comes with English, German, Italian, Spanish and French. instructions: start: >- Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents. end: >- Done! 😄 Your service is available at http://$$cap_appname.$$cap_root_domain displayName: 'Paperless-ng' isOfficial: true description: Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents. documentation: https://paperless-ng.readthedocs.io/en/latest/