diff --git a/.asf.yaml b/.asf.yaml index 73adb1c058b7b..d953f9c585ae8 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -55,6 +55,7 @@ github: contexts: - "Check License Header" - "Use prettier to check formatting of documents" + - "Check Markdown Links" - "Validate required_status_checks in .asf.yaml" - "Spell Check with Typos" # needs to be updated as part of the release process diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index a247f07333ee5..55e8572408efe 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -23,6 +23,9 @@ on: pull_request: merge_group: +permissions: + contents: read + concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true @@ -51,6 +54,22 @@ jobs: # if you encounter error, see instructions inside the script run: ci/scripts/doc_prettier_check.sh + markdown-link-check: + name: Check Markdown Links + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Load tool versions + run: | + source ci/scripts/utils/tool_versions.sh + echo "LYCHEE_VERSION=${LYCHEE_VERSION}" >> "$GITHUB_ENV" + - name: Install lychee + uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486 # v2.75.18 + with: + tool: lychee@${{ env.LYCHEE_VERSION }} + - name: Run markdown link check + run: bash ci/scripts/markdown_link_check.sh + asf-yaml-check: name: Validate required_status_checks in .asf.yaml runs-on: ubuntu-latest diff --git a/ci/scripts/markdown_link_check.sh b/ci/scripts/markdown_link_check.sh new file mode 100644 index 0000000000000..65fede4111dd8 --- /dev/null +++ b/ci/scripts/markdown_link_check.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +ROOT_DIR="$(git rev-parse --show-toplevel)" + +cd "${ROOT_DIR}" + +MARKDOWN_FILES=() +while IFS= read -r file; do + MARKDOWN_FILES+=("${file}") +done < <( + git -C "${ROOT_DIR}" ls-files 'README.md' 'CONTRIBUTING.md' 'docs/**/*.md' 'datafusion-cli/README.md' 'datafusion-examples/README.md' 'dev/**/*.md' +) + +lychee --no-progress --config "${ROOT_DIR}/lychee.toml" "${MARKDOWN_FILES[@]}" diff --git a/ci/scripts/utils/tool_versions.sh b/ci/scripts/utils/tool_versions.sh index ac731ed0d5341..237b18b62ef40 100644 --- a/ci/scripts/utils/tool_versions.sh +++ b/ci/scripts/utils/tool_versions.sh @@ -21,3 +21,4 @@ # It is intended to be sourced by other scripts and should not be executed directly. PRETTIER_VERSION="2.7.1" +LYCHEE_VERSION="0.23.0" diff --git a/docs/source/contributor-guide/roadmap.md b/docs/source/contributor-guide/roadmap.md index aac0710dadf77..bfaf398d3f549 100644 --- a/docs/source/contributor-guide/roadmap.md +++ b/docs/source/contributor-guide/roadmap.md @@ -19,7 +19,7 @@ under the License. # Roadmap and Improvement Proposals -The [project introduction](../user-guide/introduction) explains the +The [project introduction](../user-guide/introduction.md) explains the overview and goals of DataFusion, and our development efforts largely align to that vision. diff --git a/docs/source/contributor-guide/testing.md b/docs/source/contributor-guide/testing.md index 6b8e4568ec8ab..3b644f610b90e 100644 --- a/docs/source/contributor-guide/testing.md +++ b/docs/source/contributor-guide/testing.md @@ -186,6 +186,34 @@ tested in the same way using the [doc_comment] crate. See the end of [doc_comment]: https://docs.rs/doc-comment/latest/doc_comment [core/src/lib.rs]: https://github.com/apache/datafusion/blob/main/datafusion/core/src/lib.rs#L583 +## Documentation Link Checks + +Run the internal markdown link check locally: + +```shell +source ci/scripts/utils/tool_versions.sh +cargo install lychee --locked --version "${LYCHEE_VERSION}" +bash ci/scripts/markdown_link_check.sh +``` + +Notes: + +- The script is run with `bash` and is compatible with the default Bash on macOS (no `mapfile` dependency). +- The CI configuration currently checks internal markdown links only. External `http(s)` and `mailto` links are excluded to avoid flaky failures. + +When a link is broken, lychee prints the file and URL/path that failed. For example: + +```text +[docs/source/user-guide/cli/overview.md]: + [ERROR] file:///.../docs/source/user-guide/cli/missing-page.md | Cannot find file: File not found. Check if file exists and path is correct +``` + +Rust doc comments are validated by rustdoc in CI and can be checked locally with: + +```shell +bash ci/scripts/rust_docs.sh +``` + ## Benchmarks ### Criterion Benchmarks diff --git a/docs/source/library-user-guide/upgrading/49.0.0.md b/docs/source/library-user-guide/upgrading/49.0.0.md index 92dee8135590a..92267a80fae69 100644 --- a/docs/source/library-user-guide/upgrading/49.0.0.md +++ b/docs/source/library-user-guide/upgrading/49.0.0.md @@ -123,7 +123,7 @@ Or via SQL: SET datafusion.execution.spill_compression = 'zstd'; ``` -For more details about this configuration option, including performance trade-offs between different compression codecs, see the [Configuration Settings](../../user-guide/configs) documentation. +For more details about this configuration option, including performance trade-offs between different compression codecs, see the [Configuration Settings](../../user-guide/configs.md) documentation. ### Deprecated `map_varchar_to_utf8view` configuration option diff --git a/docs/source/user-guide/cli/overview.md b/docs/source/user-guide/cli/overview.md index 86beea0e82d5c..e0228d3ea00e4 100644 --- a/docs/source/user-guide/cli/overview.md +++ b/docs/source/user-guide/cli/overview.md @@ -41,5 +41,5 @@ DataFusion CLI v37.0.0 Elapsed 1.969 seconds. ``` -For more information, see the [Installation](installation), [Usage Guide](usage) -and [Data Sources](datasources) sections. +For more information, see the [Installation](installation.md), [Usage Guide](usage.md) +and [Data Sources](datasources.md) sections. diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md index 85724a72399ad..d35b543bfdadd 100644 --- a/docs/source/user-guide/dataframe.md +++ b/docs/source/user-guide/dataframe.md @@ -122,4 +122,4 @@ async fn main() -> Result<()> { [`collect`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.collect [library users guide]: ../library-user-guide/using-the-dataframe-api.md [api reference on docs.rs]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html -[expressions reference]: expressions +[expressions reference]: expressions.md diff --git a/docs/source/user-guide/sql/format_options.md b/docs/source/user-guide/sql/format_options.md index 338508031413c..46d251c18ed74 100644 --- a/docs/source/user-guide/sql/format_options.md +++ b/docs/source/user-guide/sql/format_options.md @@ -29,7 +29,7 @@ Format-related options can be specified in three ways, in decreasing order of pr - `COPY` option tuples - Session-level config defaults -For a list of supported session-level config defaults, see [Configuration Settings](../configs). These defaults apply to all operations but have the lowest level of precedence. +For a list of supported session-level config defaults, see [Configuration Settings](../configs.md). These defaults apply to all operations but have the lowest level of precedence. If creating an external table, table-specific format options can be specified when the table is created using the `OPTIONS` clause: diff --git a/lychee.toml b/lychee.toml new file mode 100644 index 0000000000000..74245ee7ecbfb --- /dev/null +++ b/lychee.toml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +timeout = 20 +max_retries = 2 +retry_wait_time = 2 + +exclude_path = [ + "target", + "docs/build", + "datafusion/core/benches/tpch-csv", +] + +exclude = [ + "^http://", + "^https://", + "^mailto:", +]