diff --git a/.github/actions/workflow-info/action.yaml b/.github/actions/workflow-info/action.yaml index 185b891..2062041 100644 --- a/.github/actions/workflow-info/action.yaml +++ b/.github/actions/workflow-info/action.yaml @@ -1,19 +1,19 @@ -name: 'Report Workflow Information' -description: 'Reusable action meant to be used in workflow steps' +name: "Report Workflow Information" +description: "Reusable action meant to be used in workflow steps" branding: - icon: 'watch' - color: 'green' + icon: "watch" + color: "green" inputs: title: - description: 'A reference to put in the report title' + description: "A reference to put in the report title" required: true - default: 'the Job' + default: "the Job" parameters: - description: 'Input variables used in Job' + description: "Input variables used in Job" required: false content: - description: 'Content to put in the report' + description: "Content to put in the report" required: false outputs: status: @@ -24,12 +24,12 @@ runs: steps: - name: Report Workflow Information id: report_workflow_generation - env: - REPORT_PARAMS: '${{ inputs.parameters }}' - REPORT_CONTENT: '${{ inputs.content }}' + env: + REPORT_PARAMS: "${{ inputs.parameters }}" + REPORT_CONTENT: "${{ inputs.content }}" shell: bash run: | - echo "::group::Generating report" + echo "::group::Generating report" echo "status=started" >> "${GITHUB_OUTPUT}"; dump_ctx(){ @@ -60,4 +60,3 @@ runs: echo "status=finished" >> "${GITHUB_OUTPUT}"; echo "::endgroup::" - diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 210b8d5..616bdd5 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -8,11 +8,11 @@ on: workflow_dispatch: inputs: release_version_tag: - description: 'Run the workflow with what release tag?' + description: "Run the workflow with what release tag?" required: true type: string branch_commit_or_ref: - description: 'Run the release workflow in what branch/commit?' + description: "Run the release workflow in what branch/commit?" required: true type: string default: master @@ -24,20 +24,20 @@ jobs: strategy: fail-fast: true matrix: - platform: [ linux, windows, macos ] + platform: [linux, windows, macos] include: - - platform: linux - os: ubuntu-latest - target: x86_64-unknown-linux-gnu - channel: stable - - platform: windows - os: windows-latest - target: x86_64-pc-windows-gnu - channel: stable - - platform: macos - os: macos-latest - target: x86_64-apple-darwin - channel: stable + - platform: linux + os: ubuntu-latest + target: x86_64-unknown-linux-gnu + channel: stable + - platform: windows + os: windows-latest + target: x86_64-pc-windows-gnu + channel: stable + - platform: macos + os: macos-latest + target: x86_64-apple-darwin + channel: stable env: RELEASE_VERSION: ${{ inputs.release_version_tag || github.event.release.tag_name }} @@ -49,7 +49,7 @@ jobs: # region Workflow information - name: Checkout sources - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: ref: ${{ inputs.branch_commit_or_ref || '' }} @@ -57,8 +57,8 @@ jobs: id: workflow_report uses: ./.github/actions/workflow-info with: - title: '${{ github.ref_name }}' - parameters: '${{ toJson(inputs) }}' + title: "${{ github.ref_name }}" + parameters: "${{ toJson(inputs) }}" content: | ### Release information - Tag Name: ${{ github.event.release.tag_name }} @@ -148,11 +148,11 @@ jobs: if: matrix.os == 'ubuntu-latest' shell: bash run: | - CARGO_VERSION=$(cargo get package.version --pretty) - echo "CARGO_VERSION=$CARGO_VERSION" - echo "RELEASE_VERSION=${{ env.RELEASE_VERSION }}" + CARGO_VERSION=$(cargo get package.version --pretty) + echo "CARGO_VERSION=$CARGO_VERSION" + echo "RELEASE_VERSION=${{ env.RELEASE_VERSION }}" - test "$CARGO_VERSION" = "${{ env.RELEASE_VERSION }}" + test "$CARGO_VERSION" = "${{ env.RELEASE_VERSION }}" - name: Upload Debian, RPM and tar.gz Packages if: matrix.platform == 'linux' diff --git a/.github/workflows/build-test-and-lint.yml b/.github/workflows/build-test-and-lint.yml index 92ba49c..6fd6924 100644 --- a/.github/workflows/build-test-and-lint.yml +++ b/.github/workflows/build-test-and-lint.yml @@ -6,17 +6,17 @@ name: build-test-and-lint on: push: - branches: [ "master", "release/*" ] + branches: ["master", "release/*"] pull_request: - branches: [ master ] + branches: [master] types: [opened, reopened, synchronize, ready_for_review] schedule: # Run on the 3rd of every month at 2:01 - - cron: '1 2 3 * *' + - cron: "1 2 3 * *" workflow_dispatch: inputs: branch_commit_or_ref: - description: 'Run this workflow in what branch/commit?' + description: "Run this workflow in what branch/commit?" required: true type: string default: master @@ -31,7 +31,7 @@ jobs: JOB_GITHUB_REF: ${{ github.head_ref || github.ref }} steps: - name: Checkout sources - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: ref: ${{ inputs.branch_commit_or_ref || '' }} @@ -39,8 +39,8 @@ jobs: id: workflow_report uses: ./.github/actions/workflow-info with: - title: '${{ github.ref_name }}' - parameters: '${{ toJson(inputs) }}' + title: "${{ github.ref_name }}" + parameters: "${{ toJson(inputs) }}" content: | - Ref: ${{ github.head_ref }} @@ -50,14 +50,14 @@ jobs: strategy: fail-fast: true matrix: - os: [ ubuntu-latest, windows-latest, macos-latest ] - channel: [ stable ] + os: [ubuntu-latest, windows-latest, macos-latest] + channel: [stable] steps: # region Checkout and Install - name: Checkout sources - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: ref: ${{ inputs.branch_commit_or_ref || '' }} @@ -141,7 +141,7 @@ jobs: - name: Run cargo clippy if: matrix.os == 'ubuntu-latest' - continue-on-error: true # show all errors + continue-on-error: true # show all errors shell: bash run: cargo clippy --all-features @@ -200,7 +200,7 @@ jobs: uses: coverallsapp/github-action@master with: github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: './lcov.info' + path-to-lcov: "./lcov.info" - name: Stop Solr server after tests if: matrix.os == 'ubuntu-latest' @@ -225,12 +225,12 @@ jobs: strategy: fail-fast: true matrix: - channel: [ stable ] + channel: [stable] steps: # region Environment setup - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: ref: ${{ inputs.branch_commit_or_ref || '' }} diff --git a/.github/workflows/security_audit.yml b/.github/workflows/security_audit.yml index 46e9853..6d71e13 100644 --- a/.github/workflows/security_audit.yml +++ b/.github/workflows/security_audit.yml @@ -3,11 +3,11 @@ name: Monthly Security Audit on: schedule: - - cron: '3 2 1 * *' + - cron: "3 2 1 * *" workflow_dispatch: inputs: branch_commit_or_ref: - description: 'Run this workflow in what branch/commit?' + description: "Run this workflow in what branch/commit?" required: true type: string default: master @@ -18,16 +18,16 @@ jobs: strategy: fail-fast: true matrix: - channel: [ stable ] + channel: [stable] steps: # region Environment setup - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: ref: ${{ inputs.branch_commit_or_ref || '' }} - - name: Update Rust toolchain from channel ${{ matrix.channel }} + - name: Update Rust toolchain from channel ${{ matrix.channel }} shell: bash run: | rustup self update @@ -43,7 +43,7 @@ jobs: rustup show # endregion - + # region Run security audit - name: Intall cargo-audit from channel ${{ matrix.channel }} diff --git a/.gitignore b/.gitignore index ee6ee17..5ece7bc 100644 --- a/.gitignore +++ b/.gitignore @@ -180,6 +180,7 @@ Cargo.lock # Files generated by solrcopy testing *.zip +*.pdf .env #endregion ---------------------------------------------------------------------------------------- diff --git a/Cargo.toml b/Cargo.toml index adc3180..58ad2ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,8 +28,8 @@ exclude = [ ] [dependencies] -clap = { version = "4.5.*", features = ["derive", "env", "color"] } -clap_complete = "4.5.*" +clap = { version = "4.6.*", features = ["derive", "env", "color"] } +clap_complete = "4.6.*" clap_mangen = "0.2.*" regex = "1.12.*" url = "2.5.*" diff --git a/README.md b/README.md index eb2e690..74cbe5a 100644 --- a/README.md +++ b/README.md @@ -17,16 +17,67 @@ Command line tool useful for migration, transformations, backup, and restore of ## Usage -1. Use the command `solrcopy backup` for dumping documents from a Solr core into local zip files. +### Process + +1. Use the command `solrcopy backup` for dumping documents from a Solr core into local zip files: 1. Use the switch `--query` for filtering the documents extracted by using a [Solr](https://lucene.apache.org/solr/guide/8_4/the-standard-query-parser.html) [Query](https://lucene.apache.org/solr/guide/8_4/the-standard-query-parser.html) 2. Use the switch `--order` for specifying the sorting of documents extracted. 3. Use the switches `--limit` and `--skip` for restricting the number of documents extracted. 4. Use the switches `--select` and `--exclude` for restricting the columns extracted. -2. Use the command `solrcopy restore` for uploading the extracted documents from local zip files into the same Solr core or another with same field names as extracted. +2. Use the command `solrcopy restore` for uploading the extracted documents from local zip files into the same Solr core or another with same field names as extracted: 1. The documents are updated in the target core in the same format that they were extracted. 2. The documents are inserted/updated based on their `uniqueKey` field defined in core. 3. If you want to change the documents/columns use the switches in `solrcopy backup` for extracting more than one slice of documents to be updated. +### Workflow + +```mermaid +flowchart LR + A[(Source Solr Core)] --> B(solrcopy backup) + B --> F[/Local zip archives/] + F --> G(solrcopy restore) + G --> M[(Target Solr core)] +``` + +#### Backup Workflow + +```mermaid +flowchart TD + A[(Source Solr Core)] --> B(solrcopy backup) + B --> C{Backup options} + C --> C1[Filter: --query / --fq] + C --> C2[Select columns: --select / --exclude] + C --> C3[Slice and order: --order / --skip / --limit / --iterate-by] + C1 --> D@{ shape: processes, label: "Read documents from Solr core"} + C2 --> D + C3 --> D + D --> E@{ shape: docs, label: "Export JSON batches"} + E --> F@{ shape: processes, label: "Write local zip archives in --dir"} + F --> G[/Documents stored as local zip archives/] +``` + +#### Restore Workflow + +```mermaid +flowchart TD + + E[/Local zip archives/] --> F[\Optional: inspect or transform extracted JSON/] + F --> G(solrcopy restore) + G --> H{Restore options} + H --> H1[Select files: --search] + H --> H2[Parallelism: --readers / --writers] + H --> H3[Target: --url + --core] + H1 --> I[[Find local zip archives in --dir]] + H2 --> I + H3 --> I + I --> J@{ shape: processes, label: "Extract documents from archives"} + J --> K@{ shape: docs, label: "Upload documents to target core"} + K --> L@{ shape: processes, label: "Insert/Update by uniqueKey"} + L --> M[(Target Solr core)] + M --> N[/Documents persisted in Solr/] + N --> O(solrcopy commit: Optional) +``` + ### Environment Variables The following environment variables can be used for common parameters: @@ -258,6 +309,23 @@ Options: $ solrcopy restore --url http://localhost:8983/solr --dir ./tmp --core demo ``` +#### solrcopy info + +``` text +Get information about the Solr instance + +Usage: solrcopy info [OPTIONS] --core + +Options: + -u, --url Url pointing to the Solr cluster [env: SOLR_COPY_URL=] [default: http://localhost:8983/solr] + -c, --core Case sensitive name of the core in the Solr server + --log-level What level of detail should print messages [default: INFO] + --log-mode Terminal output to print messages [default: mixed] + --log-file-path Write messages to a local file + --log-file-level What level of detail should write messages to the file [default: DEBUG] + -h, --help Print help +``` + #### solrcopy delete ``` text diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 995de47..94c9b16 100755 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -91,7 +91,6 @@ configs: alias l='ls --escape --dereference-command-line --human-readable --time-style=iso --no-group --color=auto -CF' # END OF SCRIPT # - solr-setup-precreate.sh: content: | #!/bin/bash