From 65d572ccd64f7a50633510021f800cb705d0d3a1 Mon Sep 17 00:00:00 2001 From: Fabre Florian Date: Fri, 3 Oct 2025 10:35:50 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D(backend)=20add=20fulltext=20search?= =?UTF-8?q?=20documentation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add documentation for env & Find+Docs configuration in dev mode Signed-off-by: Fabre Florian --- docs/architecture.md | 1 + docs/env.md | 203 ++++++++++++++++++------------------ docs/search.md | 39 +++++++ docs/system-requirements.md | 11 ++ env.d/development/common | 1 + 5 files changed, 155 insertions(+), 100 deletions(-) create mode 100644 docs/search.md diff --git a/docs/architecture.md b/docs/architecture.md index 230d3245..f858eb01 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -12,6 +12,7 @@ flowchart TD Back --> DB("Database (PostgreSQL)") Back <--> Celery --> DB Back ----> S3("Minio (S3)") + Back -- REST API --> Find ``` ### Architecture decision records diff --git a/docs/env.md b/docs/env.md index 7c7ee85d..f11e6c62 100644 --- a/docs/env.md +++ b/docs/env.md @@ -6,106 +6,109 @@ Here we describe all environment variables that can be set for the docs applicat These are the environment variables you can set for the `impress-backend` container. -| Option | Description | default | -|-------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------| -| AI_ALLOW_REACH_FROM | Users that can use AI must be this level. options are "public", "authenticated", "restricted" | authenticated | -| AI_API_KEY | AI key to be used for AI Base url | | -| AI_BASE_URL | OpenAI compatible AI base url | | -| AI_FEATURE_ENABLED | Enable AI options | false | -| AI_MODEL | AI Model to use | | -| ALLOW_LOGOUT_GET_METHOD | Allow get logout method | true | -| API_USERS_LIST_LIMIT | Limit on API users | 5 | -| API_USERS_LIST_THROTTLE_RATE_BURST | Throttle rate for api on burst | 30/minute | -| API_USERS_LIST_THROTTLE_RATE_SUSTAINED | Throttle rate for api | 180/hour | -| AWS_S3_ACCESS_KEY_ID | Access id for s3 endpoint | | -| AWS_S3_ENDPOINT_URL | S3 endpoint | | -| AWS_S3_REGION_NAME | Region name for s3 endpoint | | -| AWS_S3_SECRET_ACCESS_KEY | Access key for s3 endpoint | | -| AWS_STORAGE_BUCKET_NAME | Bucket name for s3 endpoint | impress-media-storage | -| CACHES_DEFAULT_TIMEOUT | Cache default timeout | 30 | -| CACHES_KEY_PREFIX | The prefix used to every cache keys. | docs | -| COLLABORATION_API_URL | Collaboration api host | | -| COLLABORATION_SERVER_SECRET | Collaboration api secret | | -| COLLABORATION_WS_NOT_CONNECTED_READY_ONLY | Users not connected to the collaboration server cannot edit | false | -| COLLABORATION_WS_URL | Collaboration websocket url | | -| CONVERSION_API_CONTENT_FIELD | Conversion api content field | content | -| CONVERSION_API_ENDPOINT | Conversion API endpoint | convert | -| CONVERSION_API_SECURE | Require secure conversion api | false | -| CONVERSION_API_TIMEOUT | Conversion api timeout | 30 | -| CRISP_WEBSITE_ID | Crisp website id for support | | -| DB_ENGINE | Engine to use for database connections | django.db.backends.postgresql_psycopg2 | -| DB_HOST | Host of the database | localhost | -| DB_NAME | Name of the database | impress | -| DB_PASSWORD | Password to authenticate with | pass | -| DB_PORT | Port of the database | 5432 | -| DB_USER | User to authenticate with | dinum | -| DJANGO_ALLOWED_HOSTS | Allowed hosts | [] | -| DJANGO_CELERY_BROKER_TRANSPORT_OPTIONS | Celery broker transport options | {} | -| DJANGO_CELERY_BROKER_URL | Celery broker url | redis://redis:6379/0 | -| DJANGO_CORS_ALLOW_ALL_ORIGINS | Allow all CORS origins | false | -| DJANGO_CORS_ALLOWED_ORIGIN_REGEXES | List of origins allowed for CORS using regulair expressions | [] | -| DJANGO_CORS_ALLOWED_ORIGINS | List of origins allowed for CORS | [] | -| DJANGO_CSRF_TRUSTED_ORIGINS | CSRF trusted origins | [] | -| DJANGO_EMAIL_BACKEND | Email backend library | django.core.mail.backends.smtp.EmailBackend | -| DJANGO_EMAIL_BRAND_NAME | Brand name for email | | -| DJANGO_EMAIL_FROM | Email address used as sender | from@example.com | -| DJANGO_EMAIL_HOST | Hostname of email | | -| DJANGO_EMAIL_HOST_PASSWORD | Password to authenticate with on the email host | | -| DJANGO_EMAIL_HOST_USER | User to authenticate with on the email host | | -| DJANGO_EMAIL_LOGO_IMG | Logo for the email | | -| DJANGO_EMAIL_PORT | Port used to connect to email host | | -| DJANGO_EMAIL_USE_SSL | Use ssl for email host connection | false | -| DJANGO_EMAIL_USE_TLS | Use tls for email host connection | false | -| DJANGO_SECRET_KEY | Secret key | | -| DJANGO_SERVER_TO_SERVER_API_TOKENS | | [] | -| DOCUMENT_IMAGE_MAX_SIZE | Maximum size of document in bytes | 10485760 | -| FRONTEND_CSS_URL | To add a external css file to the app | | -| FRONTEND_HOMEPAGE_FEATURE_ENABLED | Frontend feature flag to display the homepage | false | -| FRONTEND_THEME | Frontend theme to use | | -| LANGUAGE_CODE | Default language | en-us | -| LASUITE_MARKETING_BACKEND | Backend used when SIGNUP_NEW_USER_TO_MARKETING_EMAIL is True. See https://github.com/suitenumerique/django-lasuite/blob/main/documentation/how-to-use-marketing-backend.md | lasuite.marketing.backends.dummy.DummyBackend | -| LASUITE_MARKETING_PARAMETERS | The parameters to configure LASUITE_MARKETING_BACKEND. See https://github.com/suitenumerique/django-lasuite/blob/main/documentation/how-to-use-marketing-backend.md | {} | -| LOGGING_LEVEL_LOGGERS_APP | Application logging level. options are "DEBUG", "INFO", "WARN", "ERROR", "CRITICAL" | INFO | -| LOGGING_LEVEL_LOGGERS_ROOT | Default logging level. options are "DEBUG", "INFO", "WARN", "ERROR", "CRITICAL" | INFO | -| LOGIN_REDIRECT_URL | Login redirect url | | -| LOGIN_REDIRECT_URL_FAILURE | Login redirect url on failure | | -| LOGOUT_REDIRECT_URL | Logout redirect url | | -| MALWARE_DETECTION_BACKEND | The malware detection backend use from the django-lasuite package | lasuite.malware_detection.backends.dummy.DummyBackend | -| MALWARE_DETECTION_PARAMETERS | A dict containing all the parameters to initiate the malware detection backend | {"callback_path": "core.malware_detection.malware_detection_callback",} | -| MEDIA_BASE_URL | | | -| NO_WEBSOCKET_CACHE_TIMEOUT | Cache used to store current editor session key when only users without websocket are editing a document | 120 | -| OIDC_ALLOW_DUPLICATE_EMAILS | Allow duplicate emails | false | -| OIDC_AUTH_REQUEST_EXTRA_PARAMS | OIDC extra auth parameters | {} | -| OIDC_CREATE_USER | Create used on OIDC | false | -| OIDC_FALLBACK_TO_EMAIL_FOR_IDENTIFICATION | Fallback to email for identification | true | -| OIDC_OP_AUTHORIZATION_ENDPOINT | Authorization endpoint for OIDC | | -| OIDC_OP_JWKS_ENDPOINT | JWKS endpoint for OIDC | | -| OIDC_OP_LOGOUT_ENDPOINT | Logout endpoint for OIDC | | -| OIDC_OP_TOKEN_ENDPOINT | Token endpoint for OIDC | | -| OIDC_OP_USER_ENDPOINT | User endpoint for OIDC | | -| OIDC_REDIRECT_ALLOWED_HOSTS | Allowed hosts for OIDC redirect url | [] | -| OIDC_REDIRECT_REQUIRE_HTTPS | Require https for OIDC redirect url | false | -| OIDC_RP_CLIENT_ID | Client id used for OIDC | impress | -| OIDC_RP_CLIENT_SECRET | Client secret used for OIDC | | -| OIDC_RP_SCOPES | Scopes requested for OIDC | openid email | -| OIDC_RP_SIGN_ALGO | verification algorithm used OIDC tokens | RS256 | -| OIDC_STORE_ID_TOKEN | Store OIDC token | true | -| OIDC_USE_NONCE | Use nonce for OIDC | true | -| OIDC_USERINFO_FULLNAME_FIELDS | OIDC token claims to create full name | ["first_name", "last_name"] | -| OIDC_USERINFO_SHORTNAME_FIELD | OIDC token claims to create shortname | first_name | -| POSTHOG_KEY | Posthog key for analytics | | -| REDIS_URL | Cache url | redis://redis:6379/1 | -| SENTRY_DSN | Sentry host | | -| SESSION_COOKIE_AGE | duration of the cookie session | 60*60*12 | -| SIGNUP_NEW_USER_TO_MARKETING_EMAIL | Register new user to the marketing onboarding. If True, see env LASUITE_MARKETING_* system | False -| SPECTACULAR_SETTINGS_ENABLE_DJANGO_DEPLOY_CHECK | | false | -| STORAGES_STATICFILES_BACKEND | | whitenoise.storage.CompressedManifestStaticFilesStorage | -| THEME_CUSTOMIZATION_CACHE_TIMEOUT | Cache duration for the customization settings | 86400 | -| THEME_CUSTOMIZATION_FILE_PATH | Full path to the file customizing the theme. An example is provided in src/backend/impress/configuration/theme/default.json | BASE_DIR/impress/configuration/theme/default.json | -| TRASHBIN_CUTOFF_DAYS | Trashbin cutoff | 30 | -| USER_OIDC_ESSENTIAL_CLAIMS | Essential claims in OIDC token | [] | -| Y_PROVIDER_API_BASE_URL | Y Provider url | | -| Y_PROVIDER_API_KEY | Y provider API key | | +| Option | Description | default | +|-------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------| +| AI_ALLOW_REACH_FROM | Users that can use AI must be this level. options are "public", "authenticated", "restricted" | authenticated | +| AI_API_KEY | AI key to be used for AI Base url | | +| AI_BASE_URL | OpenAI compatible AI base url | | +| AI_FEATURE_ENABLED | Enable AI options | false | +| AI_MODEL | AI Model to use | | +| ALLOW_LOGOUT_GET_METHOD | Allow get logout method | true | +| API_USERS_LIST_LIMIT | Limit on API users | 5 | +| API_USERS_LIST_THROTTLE_RATE_BURST | Throttle rate for api on burst | 30/minute | +| API_USERS_LIST_THROTTLE_RATE_SUSTAINED | Throttle rate for api | 180/hour | +| AWS_S3_ACCESS_KEY_ID | Access id for s3 endpoint | | +| AWS_S3_ENDPOINT_URL | S3 endpoint | | +| AWS_S3_REGION_NAME | Region name for s3 endpoint | | +| AWS_S3_SECRET_ACCESS_KEY | Access key for s3 endpoint | | +| AWS_STORAGE_BUCKET_NAME | Bucket name for s3 endpoint | impress-media-storage | +| CACHES_DEFAULT_TIMEOUT | Cache default timeout | 30 | +| CACHES_KEY_PREFIX | The prefix used to every cache keys. | docs | +| COLLABORATION_API_URL | Collaboration api host | | +| COLLABORATION_SERVER_SECRET | Collaboration api secret | | +| COLLABORATION_WS_NOT_CONNECTED_READY_ONLY | Users not connected to the collaboration server cannot edit | false | +| COLLABORATION_WS_URL | Collaboration websocket url | | +| CONVERSION_API_CONTENT_FIELD | Conversion api content field | content | +| CONVERSION_API_ENDPOINT | Conversion API endpoint | convert | +| CONVERSION_API_SECURE | Require secure conversion api | false | +| CONVERSION_API_TIMEOUT | Conversion api timeout | 30 | +| CRISP_WEBSITE_ID | Crisp website id for support | | +| DB_ENGINE | Engine to use for database connections | django.db.backends.postgresql_psycopg2 | +| DB_HOST | Host of the database | localhost | +| DB_NAME | Name of the database | impress | +| DB_PASSWORD | Password to authenticate with | pass | +| DB_PORT | Port of the database | 5432 | +| DB_USER | User to authenticate with | dinum | +| DJANGO_ALLOWED_HOSTS | Allowed hosts | [] | +| DJANGO_CELERY_BROKER_TRANSPORT_OPTIONS | Celery broker transport options | {} | +| DJANGO_CELERY_BROKER_URL | Celery broker url | redis://redis:6379/0 | +| DJANGO_CORS_ALLOWED_ORIGINS | List of origins allowed for CORS | [] | +| DJANGO_CORS_ALLOWED_ORIGIN_REGEXES | List of origins allowed for CORS using regulair expressions | [] | +| DJANGO_CORS_ALLOW_ALL_ORIGINS | Allow all CORS origins | false | +| DJANGO_CSRF_TRUSTED_ORIGINS | CSRF trusted origins | [] | +| DJANGO_EMAIL_BACKEND | Email backend library | django.core.mail.backends.smtp.EmailBackend | +| DJANGO_EMAIL_BRAND_NAME | Brand name for email | | +| DJANGO_EMAIL_FROM | Email address used as sender | from@example.com | +| DJANGO_EMAIL_HOST | Hostname of email | | +| DJANGO_EMAIL_HOST_PASSWORD | Password to authenticate with on the email host | | +| DJANGO_EMAIL_HOST_USER | User to authenticate with on the email host | | +| DJANGO_EMAIL_LOGO_IMG | Logo for the email | | +| DJANGO_EMAIL_PORT | Port used to connect to email host | | +| DJANGO_EMAIL_USE_SSL | Use ssl for email host connection | false | +| DJANGO_EMAIL_USE_TLS | Use tls for email host connection | false | +| DJANGO_SECRET_KEY | Secret key | | +| DJANGO_SERVER_TO_SERVER_API_TOKENS | | [] | +| DOCUMENT_IMAGE_MAX_SIZE | Maximum size of document in bytes | 10485760 | +| FRONTEND_CSS_URL | To add a external css file to the app | | +| FRONTEND_HOMEPAGE_FEATURE_ENABLED | Frontend feature flag to display the homepage | false | +| FRONTEND_THEME | Frontend theme to use | | +| LANGUAGE_CODE | Default language | en-us | +| LASUITE_MARKETING_BACKEND | Backend used when SIGNUP_NEW_USER_TO_MARKETING_EMAIL is True. See https://github.com/suitenumerique/django-lasuite/blob/main/documentation/how-to-use-marketing-backend.md | lasuite.marketing.backends.dummy.DummyBackend | +| LASUITE_MARKETING_PARAMETERS | The parameters to configure LASUITE_MARKETING_BACKEND. See https://github.com/suitenumerique/django-lasuite/blob/main/documentation/how-to-use-marketing-backend.md | {} | +| LOGGING_LEVEL_LOGGERS_APP | Application logging level. options are "DEBUG", "INFO", "WARN", "ERROR", "CRITICAL" | INFO | +| LOGGING_LEVEL_LOGGERS_ROOT | Default logging level. options are "DEBUG", "INFO", "WARN", "ERROR", "CRITICAL" | INFO | +| LOGIN_REDIRECT_URL | Login redirect url | | +| LOGIN_REDIRECT_URL_FAILURE | Login redirect url on failure | | +| LOGOUT_REDIRECT_URL | Logout redirect url | | +| MALWARE_DETECTION_BACKEND | The malware detection backend use from the django-lasuite package | lasuite.malware_detection.backends.dummy.DummyBackend | +| MALWARE_DETECTION_PARAMETERS | A dict containing all the parameters to initiate the malware detection backend | {"callback_path": "core.malware_detection.malware_detection_callback",} | +| MEDIA_BASE_URL | | | +| NO_WEBSOCKET_CACHE_TIMEOUT | Cache used to store current editor session key when only users without websocket are editing a document | 120 | +| OIDC_ALLOW_DUPLICATE_EMAILS | Allow duplicate emails | false | +| OIDC_AUTH_REQUEST_EXTRA_PARAMS | OIDC extra auth parameters | {} | +| OIDC_CREATE_USER | Create used on OIDC | false | +| OIDC_FALLBACK_TO_EMAIL_FOR_IDENTIFICATION | Fallback to email for identification | true | +| OIDC_OP_AUTHORIZATION_ENDPOINT | Authorization endpoint for OIDC | | +| OIDC_OP_JWKS_ENDPOINT | JWKS endpoint for OIDC | | +| OIDC_OP_LOGOUT_ENDPOINT | Logout endpoint for OIDC | | +| OIDC_OP_TOKEN_ENDPOINT | Token endpoint for OIDC | | +| OIDC_OP_USER_ENDPOINT | User endpoint for OIDC | | +| OIDC_REDIRECT_ALLOWED_HOSTS | Allowed hosts for OIDC redirect url | [] | +| OIDC_REDIRECT_REQUIRE_HTTPS | Require https for OIDC redirect url | false | +| OIDC_RP_CLIENT_ID | Client id used for OIDC | impress | +| OIDC_RP_CLIENT_SECRET | Client secret used for OIDC | | +| OIDC_RP_SCOPES | Scopes requested for OIDC | openid email | +| OIDC_RP_SIGN_ALGO | verification algorithm used OIDC tokens | RS256 | +| OIDC_STORE_ID_TOKEN | Store OIDC token | true | +| OIDC_USE_NONCE | Use nonce for OIDC | true | +| OIDC_USERINFO_FULLNAME_FIELDS | OIDC token claims to create full name | ["first_name", "last_name"] | +| OIDC_USERINFO_SHORTNAME_FIELD | OIDC token claims to create shortname | first_name | +| POSTHOG_KEY | Posthog key for analytics | | +| REDIS_URL | Cache url | redis://redis:6379/1 | +| SEARCH_INDEXER_BATCH_SIZE | Size of each batch for indexation of all documents | 100000 | +| SEARCH_INDEXER_COUNTDOWN | Minimum debounce delay of indexation jobs (in seconds) | 1 | +| SEARCH_INDEXER_SECRET | Token for indexation queries | | +| SENTRY_DSN | Sentry host | | +| SESSION_COOKIE_AGE | duration of the cookie session | 60*60*12 | +| SIGNUP_NEW_USER_TO_MARKETING_EMAIL | Register new user to the marketing onboarding. If True, see env LASUITE_MARKETING_* system | False | +| SPECTACULAR_SETTINGS_ENABLE_DJANGO_DEPLOY_CHECK | | false | +| STORAGES_STATICFILES_BACKEND | | whitenoise.storage.CompressedManifestStaticFilesStorage | +| THEME_CUSTOMIZATION_CACHE_TIMEOUT | Cache duration for the customization settings | 86400 | +| THEME_CUSTOMIZATION_FILE_PATH | Full path to the file customizing the theme. An example is provided in src/backend/impress/configuration/theme/default.json | BASE_DIR/impress/configuration/theme/default.json | +| TRASHBIN_CUTOFF_DAYS | Trashbin cutoff | 30 | +| USER_OIDC_ESSENTIAL_CLAIMS | Essential claims in OIDC token | [] | +| Y_PROVIDER_API_BASE_URL | Y Provider url | | +| Y_PROVIDER_API_KEY | Y provider API key | | ## impress-frontend image diff --git a/docs/search.md b/docs/search.md new file mode 100644 index 00000000..63581081 --- /dev/null +++ b/docs/search.md @@ -0,0 +1,39 @@ +# Setup the Find search for Impress + +This configuration will enable the fulltext search feature for Docs : +- Each save on **core.Document** or **core.DocumentAccess** will trigger the indexer +- The `api/v1.0/documents/search/` will work as a proxy with the Find API for fulltext search. + +## Create an index service for Docs + +Configure a **Service** for Docs application with these settings + +- **Name**: `docs`
_request.auth.name of the Docs application._ +- **Client id**: `impress`
_Name of the token audience or client_id of the Docs application._ + +See [how-to-use-indexer.md](how-to-use-indexer.md) for details. + +## Configure settings of Docs + +Add those Django settings the Docs application to enable the feature. + +```shell +SEARCH_INDEXER_CLASS="core.services.search_indexers.FindDocumentIndexer" +SEARCH_INDEXER_COUNTDOWN=10 # Debounce delay in seconds for the indexer calls. + +# The token from service "docs" of Find application (development). +SEARCH_INDEXER_SECRET="find-api-key-for-docs-with-exactly-50-chars-length" +SEARCH_INDEXER_URL="http://find:8000/api/v1.0/documents/index/" + +# Search endpoint. Uses the OIDC token for authentication +SEARCH_INDEXER_QUERY_URL="http://find:8000/api/v1.0/documents/search/" +``` + +We also need to enable the **OIDC Token** refresh or the authentication will fail quickly. + +```shell +# Store OIDC tokens in the session +OIDC_STORE_ACCESS_TOKEN = True # Store the access token in the session +OIDC_STORE_REFRESH_TOKEN = True # Store the encrypted refresh token in the session +OIDC_STORE_REFRESH_TOKEN_KEY = "your-32-byte-encryption-key==" # Must be a valid Fernet key (32 url-safe base64-encoded bytes) +``` diff --git a/docs/system-requirements.md b/docs/system-requirements.md index 8f2daafb..db337d9b 100644 --- a/docs/system-requirements.md +++ b/docs/system-requirements.md @@ -97,6 +97,17 @@ Production deployments differ significantly from development environments. The t | 5433 | PostgreSQL (Keycloak) | | 1081 | MailCatcher | +**With fulltext search service** + +| Port | Service | +| --------- | --------------------- | +| 8081 | Find (Django) | +| 9200 | Opensearch | +| 9600 | Opensearch admin | +| 5601 | Opensearch dashboard | +| 25432 | PostgreSQL (Find) | + + ## 6. Sizing Guidelines **RAM** – start at 8 GB dev / 16 GB staging / 32 GB prod. Postgres and Keycloak are the first to OOM; scale them first. diff --git a/env.d/development/common b/env.d/development/common index 5b1564ad..cdd734b5 100644 --- a/env.d/development/common +++ b/env.d/development/common @@ -36,6 +36,7 @@ OIDC_OP_JWKS_ENDPOINT=http://nginx:8083/realms/impress/protocol/openid-connect/c OIDC_OP_AUTHORIZATION_ENDPOINT=http://localhost:8083/realms/impress/protocol/openid-connect/auth OIDC_OP_TOKEN_ENDPOINT=http://nginx:8083/realms/impress/protocol/openid-connect/token OIDC_OP_USER_ENDPOINT=http://nginx:8083/realms/impress/protocol/openid-connect/userinfo +OIDC_OP_INTROSPECTION_ENDPOINT=http://nginx:8083/realms/impress/protocol/openid-connect/token/introspect OIDC_RP_CLIENT_ID=impress OIDC_RP_CLIENT_SECRET=ThisIsAnExampleKeyForDevPurposeOnly