From e0583298a2218c2aaf8ed1faedbd9c67f6b3aa83 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 22 Jan 2026 13:21:40 +0100 Subject: [PATCH 1/5] chore: Replace Makefile with poethepoet task runner --- .github/workflows/_release_docs.yaml | 7 +-- .github/workflows/on_schedule_tests.yaml | 4 +- .pre-commit-config.yaml | 4 +- CONTRIBUTING.md | 28 ++++----- Makefile | 79 ------------------------ pyproject.toml | 42 +++++++++++++ uv.lock | 25 ++++++++ 7 files changed, 87 insertions(+), 102 deletions(-) delete mode 100644 Makefile diff --git a/.github/workflows/_release_docs.yaml b/.github/workflows/_release_docs.yaml index e5eb4a2f9b..653ed88d43 100644 --- a/.github/workflows/_release_docs.yaml +++ b/.github/workflows/_release_docs.yaml @@ -50,13 +50,10 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Python dependencies - run: make install-dev - - - name: Build generated API reference - run: make build-api-reference + run: uv run poe install-dev - name: Build Docusaurus docs - run: make build-docs + run: uv run poe build-docs env: APIFY_SIGNING_TOKEN: ${{ secrets.APIFY_SIGNING_TOKEN }} SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} diff --git a/.github/workflows/on_schedule_tests.yaml b/.github/workflows/on_schedule_tests.yaml index 85bd076ab2..fa50bdc468 100644 --- a/.github/workflows/on_schedule_tests.yaml +++ b/.github/workflows/on_schedule_tests.yaml @@ -57,9 +57,9 @@ jobs: # Sync the project, but no need to install the browsers into the test runner environment. - name: Install Python dependencies - run: make install-sync + run: uv run poe install-sync - name: Run templates end-to-end tests - run: make e2e-templates-tests args="-m ${{ matrix.http-client }} and ${{ matrix.crawler-type }} and ${{ matrix.package-manager }}" + run: uv run poe e2e-templates-tests -- -m "${{ matrix.http-client }} and ${{ matrix.crawler-type }} and ${{ matrix.package-manager }}" env: APIFY_TEST_USER_API_TOKEN: ${{ secrets.APIFY_TEST_USER_API_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 47a52fc038..c823138b25 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,12 +3,12 @@ repos: hooks: - id: lint-check name: Lint check - entry: make lint + entry: uv run poe lint language: system pass_filenames: false - id: type-check name: Type check - entry: make type-check + entry: uv run poe type-check language: system pass_filenames: false diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 95b1982bdb..daebb52139 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,12 +8,15 @@ For local development, it is required to have Python 3.10 (or a later version) i We use [uv](https://docs.astral.sh/uv/) for project management. Install it and set up your IDE accordingly. +We use [Poe the Poet](https://poethepoet.natn.io/) as a task runner, similar to npm scripts in `package.json`. +All tasks are defined in `pyproject.toml` under `[tool.poe.tasks]` and can be run with `uv run poe `. + ## Dependencies To install this package and its development dependencies, run: ```sh -make install-dev +uv run poe install-dev ``` ## Code checking @@ -21,7 +24,7 @@ make install-dev To execute all code checking tools together, run: ```sh -make check-code +uv run poe check-code ``` ### Linting @@ -31,7 +34,7 @@ We utilize [ruff](https://docs.astral.sh/ruff/) for linting, which analyzes code To run linting: ```sh -make lint +uv run poe lint ``` ### Formatting @@ -41,35 +44,33 @@ Our automated code formatting also leverages [ruff](https://docs.astral.sh/ruff/ To run formatting: ```sh -make format +uv run poe format ``` ### Type checking Type checking is handled by [ty](https://docs.astral.sh/ty/), verifying code against type annotations. Configuration settings can be found in `pyproject.toml`. -To run type checking: +To run type-check: ```sh -make type-check +uv run poe type-check ``` ### Unit tests -We employ pytest as our testing framework, equipped with various plugins. Check pyproject.toml for configuration details and installed plugins. - We use [pytest](https://docs.pytest.org/) as a testing framework with many plugins. Check `pyproject.toml` for configuration details and installed plugins. To run unit tests: ```sh -make unit-tests +uv run poe unit-tests ``` -To run unit tests with HTML coverage report: +To run unit tests with coverage report: ```sh -make unit-tests-cov +uv run poe unit-tests-cov ``` ## End-to-end tests @@ -79,11 +80,10 @@ Pre-requisites for running end-to-end tests: - `apify-cli` available in `PATH` environment variable - Your [apify token](https://docs.apify.com/platform/integrations/api#api-token) is available in `APIFY_TEST_USER_API_TOKEN` environment variable - To run end-to-end tests: ```sh -make e2e-templates-tests +uv run poe e2e-templates-tests ``` ## Documentation @@ -95,7 +95,7 @@ Our API documentation is generated from these docstrings using [pydoc-markdown]( To run the documentation locally, ensure you have `Node.js` 20+ installed, then run: ```sh -make run-docs +uv run poe run-docs ``` ## Release process diff --git a/Makefile b/Makefile deleted file mode 100644 index 7224fcb752..0000000000 --- a/Makefile +++ /dev/null @@ -1,79 +0,0 @@ -.PHONY: clean install-sync install-dev build publish-to-pypi lint type-check unit-tests unit-tests-cov \ - e2e-templates-tests format check-code build-api-reference run-docs - -# This is default for local testing, but GitHub workflows override it to a higher value in CI -E2E_TESTS_CONCURRENCY = 1 - -clean: - rm -rf .uv_cache .pytest_cache .ruff_cache .uv-cache build dist htmlcov .coverage - -install-sync: - uv sync --all-extras - -install-dev: - make install-sync - uv run pre-commit install - uv run playwright install - -build: - uv build --verbose - -# APIFY_PYPI_TOKEN_CRAWLEE is expected to be set in the environment -publish-to-pypi: - uv publish --verbose --token "${APIFY_PYPI_TOKEN_CRAWLEE}" - -lint: - uv run ruff format --check - uv run ruff check - -type-check: - uv run ty check - -unit-tests: - uv run pytest \ - --numprocesses=1 \ - --verbose \ - -m "run_alone" \ - tests/unit - uv run pytest \ - --numprocesses=auto \ - --verbose \ - -m "not run_alone" \ - tests/unit - -unit-tests-cov: - uv run pytest \ - --numprocesses=1 \ - --verbose \ - -m "run_alone" \ - --cov=src/crawlee \ - --cov-report=xml:coverage-unit.xml \ - tests/unit - uv run pytest \ - --numprocesses=auto \ - --verbose \ - -m "not run_alone" \ - --cov=src/crawlee \ - --cov-report=xml:coverage-unit.xml \ - --cov-append \ - tests/unit - -e2e-templates-tests $(args): - uv run pytest --numprocesses=$(E2E_TESTS_CONCURRENCY) --verbose tests/e2e/project_template "$(args)" --timeout=600 - -format: - uv run ruff check --fix - uv run ruff format - -# The check-code target runs a series of checks equivalent to those performed by pre-commit hooks -# and the run_checks.yaml GitHub Actions workflow. -check-code: lint type-check unit-tests - -build-api-reference: - cd website && uv run ./build_api_reference.sh - -build-docs: - cd website && corepack enable && yarn && uv run yarn build - -run-docs: build-api-reference - cd website && corepack enable && yarn && uv run yarn start diff --git a/pyproject.toml b/pyproject.toml index bf4ed8291d..16650e7103 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,7 @@ dev = [ "build<2.0.0", # For e2e tests. "dycw-pytest-only<3.0.0", "fakeredis[probabilistic,json,lua]<3.0.0", + "poethepoet<1.0.0", "pre-commit<5.0.0", "proxy-py<3.0.0", "pydoc-markdown<5.0.0", @@ -255,3 +256,44 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING:", "assert_never()"] [tool.ipdb] context = 7 + +# Run tasks with: uv run poe +[tool.poe.tasks] +clean = "rm -rf .uv_cache .pytest_cache .ruff_cache .uv-cache build dist htmlcov .coverage" +install-sync = "uv sync --all-extras" +build = "uv build --verbose" +publish-to-pypi = "uv publish --verbose --token ${APIFY_PYPI_TOKEN_CRAWLEE}" +type-check = "uv run ty check" +check-code = ["lint", "type-check", "unit-tests"] + +[tool.poe.tasks.install-dev] +shell = "uv sync --all-extras && uv run pre-commit install && uv run playwright install" + +[tool.poe.tasks.lint] +shell = "uv run ruff format --check && uv run ruff check" + +[tool.poe.tasks.format] +shell = "uv run ruff check --fix && uv run ruff format" + +[tool.poe.tasks.unit-tests] +shell = """ +uv run pytest --numprocesses=1 --verbose -m "run_alone" tests/unit && \ +uv run pytest --numprocesses=auto --verbose -m "not run_alone" tests/unit +""" + +[tool.poe.tasks.unit-tests-cov] +shell = """ +uv run pytest --numprocesses=1 --verbose -m "run_alone" --cov=src/crawlee --cov-report=xml:coverage-unit.xml tests/unit && \ +uv run pytest --numprocesses=auto --verbose -m "not run_alone" --cov=src/crawlee --cov-report=xml:coverage-unit.xml --cov-append tests/unit +""" + +[tool.poe.tasks.e2e-templates-tests] +cmd = "uv run pytest --numprocesses=${E2E_TESTS_CONCURRENCY:-1} --verbose tests/e2e/project_template --timeout=600" + +[tool.poe.tasks.build-docs] +shell = "./build_api_reference.sh && corepack enable && yarn && yarn build" +cwd = "website" + +[tool.poe.tasks.run-docs] +shell = "./build_api_reference.sh && corepack enable && yarn && yarn start" +cwd = "website" diff --git a/uv.lock b/uv.lock index 3afc7cd3b5..b6a206c5a5 100644 --- a/uv.lock +++ b/uv.lock @@ -811,6 +811,7 @@ dev = [ { name = "build" }, { name = "dycw-pytest-only" }, { name = "fakeredis", extra = ["json", "lua", "probabilistic"] }, + { name = "poethepoet" }, { name = "pre-commit" }, { name = "proxy-py" }, { name = "pydoc-markdown" }, @@ -887,6 +888,7 @@ dev = [ { name = "build", specifier = "<2.0.0" }, { name = "dycw-pytest-only", specifier = "<3.0.0" }, { name = "fakeredis", extras = ["probabilistic", "json", "lua"], specifier = "<3.0.0" }, + { name = "poethepoet", specifier = "<1.0.0" }, { name = "pre-commit", specifier = "<5.0.0" }, { name = "proxy-py", specifier = "<3.0.0" }, { name = "pydoc-markdown", specifier = "<5.0.0" }, @@ -2479,6 +2481,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/18/35d1d947553d24909dca37e2ff11720eecb601360d1bac8d7a9a1bc7eb08/parsel-1.10.0-py2.py3-none-any.whl", hash = "sha256:6a0c28bd81f9df34ba665884c88efa0b18b8d2c44c81f64e27f2f0cb37d46169", size = 17266, upload-time = "2025-01-17T15:38:27.83Z" }, ] +[[package]] +name = "pastel" +version = "0.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/76/f1/4594f5e0fcddb6953e5b8fe00da8c317b8b41b547e2b3ae2da7512943c62/pastel-0.2.1.tar.gz", hash = "sha256:e6581ac04e973cac858828c6202c1e1e81fee1dc7de7683f3e1ffe0bfd8a573d", size = 7555, upload-time = "2020-09-16T19:21:12.43Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/18/a8444036c6dd65ba3624c63b734d3ba95ba63ace513078e1580590075d21/pastel-0.2.1-py2.py3-none-any.whl", hash = "sha256:4349225fcdf6c2bb34d483e523475de5bb04a5c10ef711263452cb37d7dd4364", size = 5955, upload-time = "2020-09-16T19:21:11.409Z" }, +] + [[package]] name = "pathspec" version = "1.0.3" @@ -2534,6 +2545,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" }, ] +[[package]] +name = "poethepoet" +version = "0.40.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pastel" }, + { name = "pyyaml" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/9d/054c8435b03324ed9abd5d5ab8c45065b1f42c23952cd23f13a5921d8465/poethepoet-0.40.0.tar.gz", hash = "sha256:91835f00d03d6c4f0e146f80fa510e298ad865e7edd27fe4cb9c94fdc090791b", size = 81114, upload-time = "2026-01-05T19:09:13.116Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/bc/73327d12b176abea7a3c6c7d760e1a953992f7b59d72c0354e39d7a353b5/poethepoet-0.40.0-py3-none-any.whl", hash = "sha256:afd276ae31d5c53573c0c14898118d4848ccee3709b6b0be6a1c6cbe522bbc8a", size = 106672, upload-time = "2026-01-05T19:09:11.536Z" }, +] + [[package]] name = "pre-commit" version = "4.5.1" From 96e3b1d9ed3d8fcb56fa140f0eff4a3c07e9ed45 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 22 Jan 2026 14:35:15 +0100 Subject: [PATCH 2/5] improve poe clean --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 16650e7103..f228294f24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -259,7 +259,7 @@ context = 7 # Run tasks with: uv run poe [tool.poe.tasks] -clean = "rm -rf .uv_cache .pytest_cache .ruff_cache .uv-cache build dist htmlcov .coverage" +clean = "rm -rf .coverage .pytest_cache .ruff_cache .ty_cache .uv-cache build coverage-unit.xml dist htmlcov website/.docusaurus website/.yarn website/module_shortcuts.json website/node_modules " install-sync = "uv sync --all-extras" build = "uv build --verbose" publish-to-pypi = "uv publish --verbose --token ${APIFY_PYPI_TOKEN_CRAWLEE}" From 46e0dc0b4054ed92396d90b94e45dfff893a7c15 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 22 Jan 2026 16:28:18 +0100 Subject: [PATCH 3/5] Update contributing guide --- CONTRIBUTING.md | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index daebb52139..4a4951829f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,6 +11,23 @@ We use [uv](https://docs.astral.sh/uv/) for project management. Install it and s We use [Poe the Poet](https://poethepoet.natn.io/) as a task runner, similar to npm scripts in `package.json`. All tasks are defined in `pyproject.toml` under `[tool.poe.tasks]` and can be run with `uv run poe `. +### Available tasks + +| Task | Description | +| ---- | ----------- | +| `install-dev` | Install dependencies and pre-commit hooks | +| `check-code` | Run lint, type-check, and unit-tests | +| `lint` | Run linter | +| `format` | Fix lint issues and format code | +| `type-check` | Run type checker | +| `unit-tests` | Run unit tests | +| `unit-tests-cov` | Run unit tests with coverage | +| `e2e-templates-tests` | Run end-to-end template tests | +| `build-docs` | Build documentation website | +| `run-docs` | Run documentation locally | +| `build` | Build package | +| `clean` | Remove build artifacts | + ## Dependencies To install this package and its development dependencies, run: @@ -51,7 +68,7 @@ uv run poe format Type checking is handled by [ty](https://docs.astral.sh/ty/), verifying code against type annotations. Configuration settings can be found in `pyproject.toml`. -To run type-check: +To run type checking: ```sh uv run poe type-check @@ -75,10 +92,10 @@ uv run poe unit-tests-cov ## End-to-end tests -Pre-requisites for running end-to-end tests: - - [apify-cli](https://docs.apify.com/cli/docs/installation) correctly installed - - `apify-cli` available in `PATH` environment variable - - Your [apify token](https://docs.apify.com/platform/integrations/api#api-token) is available in `APIFY_TEST_USER_API_TOKEN` environment variable +Prerequisites: + +- [apify-cli](https://docs.apify.com/cli/docs/installation) installed and available in `PATH` +- Set `APIFY_TEST_USER_API_TOKEN` to your [Apify API token](https://docs.apify.com/platform/integrations/api#api-token) To run end-to-end tests: @@ -90,7 +107,7 @@ uv run poe e2e-templates-tests We follow the [Google docstring format](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for code documentation. All user-facing classes and functions must be documented. Documentation standards are enforced using [Ruff](https://docs.astral.sh/ruff/). -Our API documentation is generated from these docstrings using [pydoc-markdown](https://pypi.org/project/pydoc-markdown/) with custom post-processing. Additional content is provided through markdown files in the `docs/` directory. The final documentation is rendered using [Docusaurus](https://docusaurus.io/) and published to GitHub pages. +Our API documentation is generated from these docstrings using [pydoc-markdown](https://pypi.org/project/pydoc-markdown/) with custom post-processing. Additional content is provided through markdown files in the `docs/` directory. The final documentation is rendered using [Docusaurus](https://docusaurus.io/) and published to GitHub Pages. To run the documentation locally, ensure you have `Node.js` 20+ installed, then run: @@ -120,14 +137,14 @@ name = "crawlee" version = "x.z.y" ``` -4. Generate the distribution archives for the package: +4. Build the package: -```shell -uv build +```sh +uv run poe build ``` -5. Set up the PyPI API token for authentication and upload the package to PyPI: +5. Upload to PyPI: -```shell +```sh uv publish --token YOUR_API_TOKEN ``` From 9ca1dd7d408cdcd9e807890cb7153d29f73ec15c Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 22 Jan 2026 16:42:28 +0100 Subject: [PATCH 4/5] rerun flaky test --- tests/unit/crawlers/_playwright/test_playwright_crawler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py index a2e823c195..8298d37dbf 100644 --- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py +++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py @@ -234,6 +234,7 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None: assert 'headless' not in headers['user-agent'].lower() +@pytest.mark.flaky(reruns=3, reason='Test is flaky.') async def test_firefox_headless_headers(header_network: dict, server_url: URL) -> None: browser_type: BrowserType = 'firefox' crawler = PlaywrightCrawler(headless=True, browser_type=browser_type) From d5ceccfd3547e00eee62b89fec80773ff236498c Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 22 Jan 2026 16:49:59 +0100 Subject: [PATCH 5/5] description --- CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4a4951829f..f5b6fd0e59 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ All tasks are defined in `pyproject.toml` under `[tool.poe.tasks]` and can be ru | Task | Description | | ---- | ----------- | -| `install-dev` | Install dependencies and pre-commit hooks | +| `install-dev` | Install development dependencies | | `check-code` | Run lint, type-check, and unit-tests | | `lint` | Run linter | | `format` | Fix lint issues and format code | @@ -24,9 +24,9 @@ All tasks are defined in `pyproject.toml` under `[tool.poe.tasks]` and can be ru | `unit-tests-cov` | Run unit tests with coverage | | `e2e-templates-tests` | Run end-to-end template tests | | `build-docs` | Build documentation website | -| `run-docs` | Run documentation locally | +| `run-docs` | Run documentation website locally | | `build` | Build package | -| `clean` | Remove build artifacts | +| `clean` | Remove build artifacts and clean caches | ## Dependencies