one-click-apps/public/v4/apps/paperless-ng.yml

captainVersion: 4
services:
    # Paperless-ng
    $$cap_appname:
        depends_on:
            - $$cap_appname-db
            - $$cap_appname-redis
        image: jonaswinkler/paperless-ng:$$cap_app_version
        restart: always
        environment:
            APP_URL: http://$$cap_appname.$$cap_root_domain
            # Required services (https://paperless-ng.readthedocs.io/en/latest/configuration.html#required-services)
            PAPERLESS_REDIS: redis://srv-captain--$$cap_appname-redis:6379/0
            PAPERLESS_DBHOST: srv-captain--$$cap_appname-db
            PAPERLESS_DBNAME: $$cap_dbname
            PAPERLESS_DBUSER: $$cap_dbuser
            PAPERLESS_DBPASS: $$cap_dbpass
            # Paths and folders (https://paperless-ng.readthedocs.io/en/latest/configuration.html#paths-and-folders)
            PAPERLESS_FILENAME_FORMAT: $$cap_filename_format
            # Logging (https://paperless-ng.readthedocs.io/en/latest/configuration.html#logging)
            PAPERLESS_LOGROTATE_MAX_SIZE: $$cap_logrotate_max_size
            PAPERLESS_LOGROTATE_MAX_BACKUPS: $$cap_logrotate_max_backup
            # Hosting & Security (https://paperless-ng.readthedocs.io/en/latest/configuration.html#hosting-security)
            PAPERLESS_SECRET_KEY: $$cap_secret_key
            PAPERLESS_ALLOWED_HOSTS: $$cap_appname.$$cap_root_domain
            PAPERLESS_CORS_ALLOWED_HOSTS: http://$$cap_appname.$$cap_root_domain
            PAPERLESS_ADMIN_USER: $$cap_admin_user
            PAPERLESS_ADMIN_PASSWORD: $$cap_admin_password
            PAPERLESS_ADMIN_MAIL: $$cap_admin_email
            PAPERLESS_COOKIE_PREFIX: $$cap_appname
            # OCR settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#ocr-settings)
            PAPERLESS_OCR_LANGUAGE: $$cap_ocr_language
            PAPERLESS_OCR_MODE: $$cap_ocr_mode
            PAPERLESS_OCR_CLEAN: $$cap_ocr_clean
            PAPERLESS_OCR_DESKEW: $$cap_ocr_deskew
            PAPERLESS_OCR_ROTATE_PAGES: $$cap_ocr_rotate_pages
            PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD: $$cap_ocr_rotate_threshold
            PAPERLESS_OCR_OUTPUT_TYPE: $$cap_ocr_output_type
            PAPERLESS_OCR_PAGES: $$cap_ocr_pages
            PAPERLESS_OCR_IMAGE_DPI: $$cap_ocr_image_dpi
            PAPERLESS_OCR_USER_ARGS: $cap_ocr_user_args
            # Tika settings (https://paperless-ng.readthedocs.io/en/latest/configuration.html#tika-settings)
            PAPERLESS_TIKA_ENABLED: 1
            PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://srv-captain--$$cap_appname-gotenberg:3000
            PAPERLESS_TIKA_ENDPOINT: http://srv-captain--$$cap_appname-tika:9998
            # Software tweaks (https://paperless-ng.readthedocs.io/en/latest/configuration.html#software-tweaks)
            # PAPERLESS_TASK_WORKERS: $$cap_task_workers
            # PAPERLESS_THREADS_PER_WORKER: $$cap_threads_per_worker
            PAPERLESS_TIME_ZONE: $$cap_timezone
            PAPERLESS_CONSUMER_POLLING: $$cap_consumer_polling
            PAPERLESS_CONSUMER_DELETE_DUPLICATES: $$cap_consumer_delete_duplicates
            PAPERLESS_CONSUMER_RECURSIVE: $$cap_consumer_recursive
            PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS: $$cap_consumer_subdirs_as_tags
            PAPERLESS_CONVERT_MEMORY_LIMIT: $$cap_convert_memory_limit
            PAPERLESS_CONVERT_TMPDIR: $$cap_convert_tempdir
            PAPERLESS_OPTIMIZE_THUMBNAILS: $$cap_optimize_thumbnails
            PAPERLESS_PRE_CONSUME_SCRIPT: $$cap_pre_consume_script
            PAPERLESS_POST_CONSUME_SCRIPT: $$cap_post_consume_script
            PAPERLESS_FILENAME_DATE_ORDER: $$cap_filename_date_order
            PAPERLESS_THUMBNAIL_FONT_NAME: $$cap_thumbnail_font_name
            PAPERLESS_IGNORE_DATES: $$cap_paperless_ignore_dates
            # Docker-specific options (https://paperless-ng.readthedocs.io/en/latest/configuration.html#docker-specific-options)
            PAPERLESS_WEBSERVER_WORKERS: $$cap_webserver_workers
            USERMAP_UID: $$cap_usermap_uid
            USERMAP_GID: $$cap_usermap_gid
            PAPERLESS_OCR_LANGUAGES: $$cap_docker_ocr_languages
        volumes:
            - $$cap_appname-data:/usr/src/paperless/data
            - $$cap_appname-media:/usr/src/paperless/media
            - $$cap_appname-export:/usr/src/paperless/export
            - $$cap_appname-consume:/usr/src/paperless/consume
        caproverExtra:
            containerHttpPort: '8000'

    # Redis
    $$cap_appname-redis:
        volumes:
            - $$cap_appname-redis-data:/data
        restart: always
        caproverExtra:
            dockerfileLines:
                - FROM redis:$$cap_redis_version
                - CMD exec redis-server
            notExposeAsWebApp: 'true'

    # Database
    $$cap_appname-db:
        image: postgres:$$cap_postgres_version
        volumes:
            - $$cap_appname-db:/var/lib/postgresql/data
        restart: always
        environment:
            POSTGRES_DB: $$cap_dbname
            POSTGRES_USER: $$cap_dbuser
            POSTGRES_PASSWORD: $$cap_dbpass
        caproverExtra:
            notExposeAsWebApp: 'true'

    # gotenberg
    $$cap_appname-gotenberg:
        image: thecodingmachine/gotenberg:6
        restart: unless-stopped
        environment:
            DISABLE_GOOGLE_CHROME: 1
        caproverExtra:
            containerHttpPort: '3000'

    # tika
    $$cap_appname-tika:
        image: apache/tika
        restart: unless-stopped
        caproverExtra:
            containerHttpPort: '9998'

caproverOneClickApp:
    variables:
        - id: $$cap_app_version
          label: Paperless-ng
          defaultValue: '1.4.1'
          description: Check out their docker page for the valid tags https://hub.docker.com/r/jonaswinkler/paperless-ng/tags

        - id: $$cap_postgres_version
          label: Postgres Version
          defaultValue: '13'
          description: Check out their Docker page for the valid tags https://hub.docker.com/r/library/postgres/tags/
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_redis_version
          label: Redis version
          defaultValue: '6.2.1-alpine'
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_dbname
          label: Database Name
          defaultValue: 'paperless'
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_dbuser
          label: Database User
          defaultValue: 'paperless'
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_dbpass
          label: Database Password
          defaultValue: $$cap_gen_random_hex(64)
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_filename_format
          label: Filename Format
          defaultValue:
          description: 'Changes the filenames paperless uses to store documents in the media directory. See File name handling (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-file-name-handling) for details. Default is none, which disables this feature.'

        - id: $$cap_logrotate_max_size
          label: Log Rotate Max Size (in bytes)
          description: Maximum file size for log files before they are rotated, in bytes.

        - id: $$cap_logrotate_max_backup
          label: Log Rotate Max Backup Count
          defaultValue: '20'
          validRegex: /^\d{0,}$/
          description: Number of rotated log files to keep.

        - id: $$cap_secret_key
          label: Paperless Secret Key
          defaultValue: $$cap_gen_random_hex(64)
          validRegex: /^([^\s^\/])+$/
          description: Paperless uses this to make session tokens.

        - id: $$cap_admin_user
          label: Admin User
          defaultValue: 'admin'
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_admin_password
          label: Admin Password
          defaultValue: $$cap_gen_random_hex(10)
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_admin_email
          label: Admin Email
          defaultValue: root@localhost
          validRegex: /^([^\s^\/])+$/

        - id: $$cap_ocr_language
          label: OCR Language
          defaultValue: 'eng'
          description: Customize the language that paperless will attempt to use when parsing documents. It should be a 3-letter language code consistent with ISO 639 https://www.loc.gov/standards/iso639-2/php/code_list.php This can be a combination of multiple languages such as deu+eng, in which case tesseract will use whatever language matches best. Keep in mind that tesseract uses much more cpu time with multiple languages enabled.

        - id: $$cap_ocr_mode
          label: OCR Mode
          defaultValue: 'skip'
          validRegex: /^(skip|skip_noarchive|redo|force)$/
          description: Tell paperless when and how to perform ocr on your documents. Four modes are available- skip, skip_noarchive, redo, force. Read more about this in the OCRmyPDF documentation (https://ocrmypdf.readthedocs.io/en/latest/advanced.html#when-ocr-is-skipped).

        - id: $$cap_ocr_clean
          label: OCR Clean
          defaultValue: 'clean'
          validRegex: /^(clean|clean-final|none)$/
          description: Tells paperless to use unpaper to clean any input document before sending it to tesseract. This uses more resources, but generally results in better OCR results. Available mode- clean, clean-final, none

        - id: $$cap_ocr_deskew
          label: OCR Deskew
          defaultValue: 'true'
          validRegex: /^(true|false)$/
          description: Tells paperless to correct skewing (slight rotation of input images mainly due to improper scanning).

        - id: $$cap_ocr_rotate_pages
          label: OCR Rotate Pages
          defaultValue: 'true'
          validRegex: /^(true|false)$/
          description: Tells paperless to correct page rotation (90°, 180° and 270° rotation). If you notice that paperless is not rotating incorrectly rotated pages (or vice versa), try adjusting the threshold up or down (see below).

        - id: $$cap_ocr_rotate_threshold
          label: OCR Rotate Pages Threshold
          defaultValue: '12'
          validRegex: /^\d[\d.]*$/
          description: This is an arbitrary value reported by tesseract. “15” is a very conservative value, whereas “2” is a very aggressive option and will often result in correctly rotated pages being rotated as well.

        - id: $$cap_ocr_output_type
          label: OCR Output Type
          defaultValue: 'pdfa'
          validRegex: /^(pdf|pdfa|pdfa-1|pdfa-2|pdfa-3)$/
          description: Specify the the type of PDF documents that paperless should produce. Choices- pdf, pdfa, pdfa-1, pdfa-2, pdfa-3

        - id: $$cap_ocr_pages
          label: OCR Pages Count
          defaultValue: '0'
          validRegex: /^\d{1,}$/
          description: Tells paperless to use only the specified amount of pages for OCR. Documents with less than the specified amount of pages get OCR’ed completely. Specifying 1 here will only use the first page. Specifying 0 disables this feature a and always use all pages.

        - id: $$cap_ocr_image_dpi
          label: OCR Image DPI
          description: Set this to the DPI your scanner produces images at. Default is none, which will automatically calculate image DPI so that the produced PDF documents are A4 sized.

        - id: $cap_ocr_user_args
          label: OCR User Args
          description: See https://ocrmypdf.readthedocs.io/en/latest/api.html#reference for valid parameters. Specify arguments as a JSON dictionary. Keep note of lower case booleans and double quoted parameter names and strings.

        #   commenting out but keeping the code if somone tries to add these variables
        #   these are optional variables and paperless-ng calculates automatically if no value is provided
        #   but leaving it blank raises ValueError: invalid literal for int() with base 10:
        # - id: $$cap_task_workers
        #   label: Paperless Task Workers
        #   validRegex: /^\d{0,2}$/
        #   description: Leave blank to calculate automatically based on CPU core count

        # - id: $$cap_threads_per_worker
        #   label: Paperless Threads per Workers
        #   validRegex: /^\d{0,2}$/
        #   description: Leave blank to calculate automatically based on CPU core count. Ensure that the product PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER does not exceed your CPU core count

        - id: $$cap_timezone
          label: Timezone
          defaultValue: 'UTC'
          description: 'Set the time zone here. See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE for details on how to set it.'

        - id: $$cap_consumer_polling
          label: Consumer Polling (seconds)
          defaultValue: '0'
          validRegex: /^\d{1,}$/
          description: Defaults to 0, which disables polling and uses filesystem notifications.

        - id: $$cap_consumer_delete_duplicates
          label: Consumer Delete Duplicates
          defaultValue: 'false'
          validRegex: /^(true|false)$/
          description: When the consumer detects a duplicate document, it will not touch the original document.

        - id: $$cap_consumer_recursive
          label: Consumer Recursive
          defaultValue: 'false'
          validRegex: /^(true|false)$/
          description: Enable recursive watching of the consumption directory. Paperless will then pickup files from files in subdirectories within your consumption directory as well.

        - id: $$cap_consumer_subdirs_as_tags
          label: Consumer Sub Directory as tags
          defaultValue: 'false'
          validRegex: /^(true|false)$/
          description: Set the names of subdirectories as tags for consumed files. E.g. <CONSUMPTION_DIR>/foo/bar/file.pdf will add the tags “foo” and “bar” to the consumed file

        - id: $$cap_convert_memory_limit
          label: Convert Memory Limit
          defaultValue: '0'
          validRegex: /^\d{1,}$/
          description: For more information on how to use this value, you should search the web for “MAGICK_MEMORY_LIMIT”.

        - id: $$cap_convert_tempdir
          label: Convert Memory Limit
          description: For more information on how to use this value, you should search the web for “MAGICK_TMPDIR”.

        - id: $$cap_optimize_thumbnails
          label: Optimize Thumbnails
          defaultValue: 'true'
          validRegex: /^(true|false)$/
          description: This usually reduces the size of thumbnails by about 20%, but uses considerable compute time during consumption.

        - id: $$cap_pre_consume_script
          label: Pre Consume Script (path)
          description: Executed after the consumer sees a new document in the consumption folder, but before any processing of the document is performed. For more information, take a look at Pre-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html?highlight=PAPERLESS_PRE_CONSUME_SCRIPT#pre-consumption-script)

        - id: $$cap_post_consume_script
          label: Post Consume Script (path)
          description: Executed after the consumer has successfully processed a document and has moved it into paperless. For more information, take a look at Post-consumption script (https://paperless-ng.readthedocs.io/en/latest/advanced_usage.html#advanced-post-consume-script)

        - id: $$cap_filename_date_order
          label: Filename Date Order
          description: Defaults to none, which disables this feature. The date order can be set to any option as specified in https://dateparser.readthedocs.io/en/latest/settings.html#date-order.

        - id: $$cap_thumbnail_font_name
          label: Thumbnail Font Name
          defaultValue: '/usr/share/fonts/liberation/LiberationSerif-Regular.ttf'
          description: Paperless creates thumbnails for plain text files by rendering the content of the file on an image and uses a predefined font for that

        - id: $$cap_paperless_ignore_dates
          label: Ignore Dates
          description: You may specify dates in a multitude of formats supported by dateparser (see https://dateparser.readthedocs.io/en/latest/#popular-formats)

        - id: $$cap_webserver_workers
          label: Webserver Workers
          defaultValue: '2'
          validRegex: /^\d{1,}$/
          description: The number of worker processes the webserver should spawn.

        - id: $$cap_usermap_uid
          label: Usermap UID
          defaultValue: '1000'
          validRegex: /^\d{1,}$/
          description: Set this to your actual user ID on the host system, which you can get by executing ```id -u```

        - id: $$cap_usermap_gid
          label: Usermap GID
          defaultValue: '1000'
          validRegex: /^\d{1,}$/
          description: Set this to your actual user ID on the host system, which you can get by executing ```id -g```

        - id: $$cap_docker_ocr_languages
          label: OCR Languages to install
          description: Additional OCR languages to install. By default, paperless comes with English, German, Italian, Spanish and French.

    instructions:
        start: >-
            Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.
        end: >-
            Done! 😄
            Your service is available at http://$$cap_appname.$$cap_root_domain
    displayName: 'Paperless-ng'
    isOfficial: true
    description: Paperless is an application by Daniel Quinn and others that indexes your scanned documents and allows you to easily search for documents and store metadata alongside your documents.
    documentation: https://paperless-ng.readthedocs.io/en/latest/