# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Utilities for safely resolving component asset paths and content types. These helpers are used by server handlers to construct safe absolute paths for component files and to determine the appropriate ``Content-Type`` header for responses. Path resolution prevents directory traversal by normalizing and checking real paths against a component's root directory. """ from __future__ import annotations import mimetypes import os from typing import Final _OCTET_STREAM: Final[str] = "application/octet-stream" def build_safe_abspath(component_root: str, relative_url_path: str) -> str | None: """Build an absolute path inside ``component_root`` if safe. The function joins ``relative_url_path`` with ``component_root`` and normalizes and resolves symlinks. If the resulting path escapes the component root, ``None`` is returned to indicate a forbidden traversal. Parameters ---------- component_root : str Absolute path to the component's root directory. relative_url_path : str Relative URL path from the component root to the requested file. Returns ------- str or None The resolved absolute path if it stays within ``component_root``; otherwise ``None`` when the path would traverse outside the root. """ root_real = os.path.realpath(component_root) candidate = os.path.normpath(os.path.join(root_real, relative_url_path)) candidate_real = os.path.realpath(candidate) try: # Ensure the candidate stays within the real component root if os.path.commonpath([root_real, candidate_real]) != root_real: return None except ValueError: # On some platforms, commonpath can raise if drives differ; treat as forbidden. return None return candidate_real def guess_content_type(abspath: str) -> str: """Guess the HTTP ``Content-Type`` for a file path. This logic mirrors Tornado's ``StaticFileHandler`` by respecting encoding metadata from ``mimetypes.guess_type`` and falling back to ``application/octet-stream`` when no specific type can be determined. Parameters ---------- abspath : str Absolute file path used for type detection (only the suffix matters). Returns ------- str Guessed content type string suitable for the ``Content-Type`` header. """ mime_type, encoding = mimetypes.guess_type(abspath) # per RFC 6713, use the appropriate type for a gzip compressed file if encoding == "gzip": return "application/gzip" # As of 2015-07-21 there is no bzip2 encoding defined at # http://www.iana.org/assignments/media-types/media-types.xhtml # So for that (and any other encoding), use octet-stream. if encoding is not None: return _OCTET_STREAM if mime_type is not None: return mime_type # if mime_type not detected, use application/octet-stream return _OCTET_STREAM