From 563660320d45a3f76d411dbfee12ebf8f9827aea Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 7 Mar 2026 00:25:46 -0300 Subject: [PATCH 01/28] fix: skip writing empty json when --iterate-by slices match no docs --- src/save.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/save.rs b/src/save.rs index d54aa6f..9e9f723 100644 --- a/src/save.rs +++ b/src/save.rs @@ -81,6 +81,10 @@ impl Archiver { pub(crate) fn write_documents(&mut self, docs: &Documents) -> ZipResult<()> { let json = &docs.docs; + let json_size = json.len(); + if json_size <= 2 { + return Ok(()); + } let step = &docs.step; let filename = format!("docs_at_{:09}.json", step.curr + 1); From ef0056dc0c89e9001ac5e45ea5cb68977adb5fd4 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 7 Mar 2026 00:31:35 -0300 Subject: [PATCH 02/28] fix: add support to testing with docker image solr:10-slim --- docker/docker-compose.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 28a9458..983ca42 100755 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,5 +1,7 @@ #!/usr/bin/env -S bash -x -c 'docker compose up -d' +#@ https://solr.apache.org/guide/solr/latest/deployment-guide/solr-in-docker.html#docker-compose + services: solr: container_name: solr4test @@ -118,7 +120,8 @@ configs: if test -n "${VERBOSE:-}"; then set -x; fi echo 'Configuring Solr cores...' solr-setup-start - echo 'Starting Solr Server...' - exec solr-fg + # Solr 10+ defaults to SolrCloud; precreated cores require standalone (user-managed) mode. + echo "Starting Solr Server in user-managed mode..." + exec solr-fg --user-managed echo 'Solr Server Finished.' # END OF SCRIPT # From 50e0f1038864a0d9a024f9231ed89a5f5ddbcd41 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 7 Mar 2026 00:35:24 -0300 Subject: [PATCH 03/28] feat: add support to selecting the solr docker image tag for tests --- Makefile.toml | 5 +++++ docker/docker-compose.yml | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Makefile.toml b/Makefile.toml index 1c5f8fa..df461ac 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -50,6 +50,11 @@ dependencies = ["test-basic", "test-docker"] #region Docker +[env] +TAG="slim" +#? Choose the Solr image tag to use in the docker container passing TAG like: +## cargo make --env TAG=9-slim test + [tasks.compose-up] category = "Test" description = "Runs the command `docker compose up` to create a container running Solr for use in testing" diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 983ca42..efc0b6c 100755 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -5,7 +5,10 @@ services: solr: container_name: solr4test - image: solr:slim + image: "solr:${TAG:-slim}" # Or: latest, 10-slim, 10, 9, ... + hostname: solrhost + environment: + - SOLR_IMAGE_TAG=${TAG:-latest} ports: - "8983:8983" volumes: @@ -36,8 +39,18 @@ services: start_period: 20s timeout: 10s command: - # - solr-precreate gettingstarted + #? Solr docs example: + # - solr-precreate + # - gettingstarted # - solr-demo + #? Solr cloud pre-create/start/post-ingest example: + # - solr-precreate demo + # - solr-precreate films + # - solr-precreate films2 + # - solr-precreate target + # - solr-foreground + # - solr-foreground --user-managed + #? Solr standalone-style, precreated cores example: - solr-start-server volumes: @@ -118,7 +131,7 @@ configs: #!/bin/bash set -eu -o pipefail if test -n "${VERBOSE:-}"; then set -x; fi - echo 'Configuring Solr cores...' + echo "Configuring Solr cores: ${SOLR_IMAGE_TAG:-}..." solr-setup-start # Solr 10+ defaults to SolrCloud; precreated cores require standalone (user-managed) mode. echo "Starting Solr Server in user-managed mode..." From 4ed6416ec4944d8ec5946c327142cfb704be01a7 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 7 Mar 2026 00:50:13 -0300 Subject: [PATCH 04/28] refactor: improve cargo make task names --- Makefile.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile.toml b/Makefile.toml index df461ac..6b27941 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -101,7 +101,7 @@ description = "Runs tests that do not require a Solr container" command = "cargo" args = ["test"] -[tasks.test-start] +[tasks.test-begin] category = "Test" description = "Setup a local Solr container and ingest some documents allowing to run tests manually after" dependencies = ["compose-up", "compose-ingest"] @@ -111,7 +111,7 @@ category = "Test" description = "Cleanup the local Solr container after testing" dependencies = ["compose-down"] -[tasks.test-solr] +[tasks.test-full] category = "Test" description = "Runs tests against an existing local solr server" command = "cargo" @@ -122,8 +122,8 @@ category = "Test" description = "Setup a local Solr container, ingest some documents, run all tests, and cleanup the container" private = true run_task = { name = [ - "test-start", - "test-solr", + "test-begin", + "test-full", ], fork = true, cleanup_task = "compose-down" } #endregion Test @@ -156,9 +156,9 @@ args = ["llvm-cov", "--all-features", "--html", "--output-dir", "target/coverage [tasks.coverage] category = "Test Coverage" -description = "Setu Solr container, generate a coverage report, and cleanup" +description = "Setup Solr container, generate a coverage report, and cleanup" run_task = { name = [ - "test-start", + "test-begin", "coverage-full", ], fork = true, cleanup_task = "compose-down" } From 9bb6d04fd94d796d41825796ef332ec146f11998 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sat, 7 Mar 2026 00:52:06 -0300 Subject: [PATCH 05/28] chore: update crate dependencies, especially zip v8.x --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9c0e22a..c345a40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,11 +31,11 @@ exclude = [ clap = { version = "4.5.*", features = ["derive", "env", "color"] } clap_complete = "4.5.*" clap_mangen = "0.2.*" -regex = "1.11.*" +regex = "1.12.*" url = "2.5.*" lazy_static = "1.5.*" -ureq = { version = "3.1.*", features = ["rustls", "charset", "cookies", "brotli", "socks-proxy"] } -zip = { version = "5.*", features = ["deflate", "time"] } +ureq = { version = "3.2.*", features = ["rustls", "charset", "cookies", "brotli", "socks-proxy"] } +zip = { version = "8.*", features = ["deflate", "deflate64", "time"] } indicatif = "^0.18" chrono = "^0.4.*" glob = "0.3.*" From 5ae8e6913a8e779e9f838d527c3d78e66457a759 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 13:42:01 -0300 Subject: [PATCH 06/28] chore: map dir docker/configuration inside docker test container --- Makefile.toml | 2 +- docker/configuration/.gitignore | 2 ++ docker/docker-compose.yml | 15 ++++++++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 docker/configuration/.gitignore diff --git a/Makefile.toml b/Makefile.toml index 6b27941..b218888 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -88,7 +88,7 @@ args = [ "./docker/docker-compose.yml", "exec", "solr", - "solr-ingest-all", + "solr-ingest-examples", ] #endregion Docker diff --git a/docker/configuration/.gitignore b/docker/configuration/.gitignore new file mode 100644 index 0000000..5ce6c66 --- /dev/null +++ b/docker/configuration/.gitignore @@ -0,0 +1,2 @@ +## Ignore all files in the volume directory +* diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index efc0b6c..51ff397 100755 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -12,7 +12,17 @@ services: ports: - "8983:8983" volumes: - - data:/var/solr + - type: volume + source: data + target: /var/solr + - type: bind + source: ./configuration + target: /opt/configuration + ulimits: + nofile: + soft: 65000 + hard: 65000 + healthcheck: configs: - source: solr-setup-start.sh target: /opt/solr/docker/scripts/solr-setup-start @@ -55,6 +65,9 @@ services: volumes: data: + configuration: + external: true + name: configuration configs: solr-setup-start.sh: From 8a673df40da0aae3208322863422ca9c108095b6 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 13:43:25 -0300 Subject: [PATCH 07/28] refactor: improve healthcheck of the test contatiner --- docker/docker-compose.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 51ff397..496b6ee 100755 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -23,6 +23,12 @@ services: soft: 65000 hard: 65000 healthcheck: + # test: solr status || exit 121 + test: curl -X GET --fail http://localhost:8983/solr/admin/info/health + start_period: 10s + timeout: 10s + retries: 1 + interval: 60s configs: - source: solr-setup-start.sh target: /opt/solr/docker/scripts/solr-setup-start From 5a6d0fadd43751cc833b521946aecc0c55dd7ebf Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 13:45:38 -0300 Subject: [PATCH 08/28] refactor: add MODE/SOLR_RUN_MODE to choose the Solr run mode in the test container --- Makefile.toml | 5 +- docker/docker-compose.yml | 177 +++++++++++++++++++++++--------------- 2 files changed, 111 insertions(+), 71 deletions(-) diff --git a/Makefile.toml b/Makefile.toml index b218888..fd27743 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -51,9 +51,12 @@ dependencies = ["test-basic", "test-docker"] #region Docker [env] -TAG="slim" #? Choose the Solr image tag to use in the docker container passing TAG like: ## cargo make --env TAG=9-slim test +TAG="slim" +#? Choose the Solr run mode: standalone, cloud, precreate, demo, testing +## cargo make --env MODE=cloud test +MODE="testing" [tasks.compose-up] category = "Test" diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 496b6ee..d394c8b 100755 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,4 +1,4 @@ -#!/usr/bin/env -S bash -x -c 'docker compose up -d' +#!/usr/bin/env -S bash -x -c 'docker compose up --no-recreate --wait --detach' #@ https://solr.apache.org/guide/solr/latest/deployment-guide/solr-in-docker.html#docker-compose @@ -9,8 +9,9 @@ services: hostname: solrhost environment: - SOLR_IMAGE_TAG=${TAG:-latest} + - SOLR_RUN_MODE=${MODE:-testing} ports: - - "8983:8983" + - "8983:8983" volumes: - type: volume source: data @@ -30,43 +31,33 @@ services: retries: 1 interval: 60s configs: - - source: solr-setup-start.sh - target: /opt/solr/docker/scripts/solr-setup-start + - source: solr-setup-precreate.sh + target: /opt/solr/docker/scripts/solr-setup-precreate mode: 0755 - - source: solr-ingest-demo.sh - target: /opt/solr/docker/scripts/solr-ingest-demo + - source: solr-ingest-core.sh + target: /opt/solr/docker/scripts/solr-ingest-core mode: 0755 - - source: solr-ingest-films.sh - target: /opt/solr/docker/scripts/solr-ingest-films - mode: 0755 - - source: solr-ingest-all.sh - target: /opt/solr/docker/scripts/solr-ingest-all + - source: solr-ingest-examples.sh + target: /opt/solr/docker/scripts/solr-ingest-examples mode: 0755 - source: solr-commit.sh target: /opt/solr/docker/scripts/solr-commit mode: 0755 + - source: solr-runas-user-managed.sh + target: /opt/solr/docker/scripts/solr-runas-user-managed + mode: 0755 - source: solr-start-server.sh target: /opt/solr/docker/scripts/solr-start-server mode: 0755 - healthcheck: - test: solr status || exit 121 - interval: 60s - retries: 5 - start_period: 20s - timeout: 10s command: #? Solr docs example: # - solr-precreate # - gettingstarted # - solr-demo - #? Solr cloud pre-create/start/post-ingest example: - # - solr-precreate demo - # - solr-precreate films - # - solr-precreate films2 - # - solr-precreate target + #? Solr cloud/standalone startup example: # - solr-foreground - # - solr-foreground --user-managed - #? Solr standalone-style, precreated cores example: + # - --user-managed + #? Choose Solr run mode from MODE/SOLR_RUN_MODE environment variable: - solr-start-server volumes: @@ -76,84 +67,130 @@ volumes: name: configuration configs: - solr-setup-start.sh: + solr-setup-precreate.sh: content: | #!/bin/bash set -eu -o pipefail - echo 'Checking if it needs to create Solr cores before starting up' + echo 'CREATE: Checking if it needs to create Solr cores before starting up...' if [ -d /var/solr/data/demo ]; then - echo 'Solr core named demo already exists; skipping core creation' + echo 'CREATE: Solr core named demo already exists; skipping core creation.' else - # start-local-solr - # /opt/solr/bin/solr create -c demo - # /opt/solr/bin/solr create -c target - # stop-local-solr - precreate-core demo - precreate-core films - precreate-core films2 - precreate-core target - echo 'Initializind solr db' - source run-initdb + for core in demo gettingstarted films films2 target "$$@"; do + echo "CREATE: Creating Solr core named [$${core}]..." + if ! precreate-core "$${core}"; then + echo "ERROR: Failed to create Solr core named $${core}. Exiting container..." + exit 21 + fi + done + echo 'CREATE: Initializing solr db...' + if ! source run-initdb; then + echo "ERROR: Failed to initialize solr db. Exiting container..." + exit 22 + fi fi - echo 'Finished Solr cores creation' + echo 'CREATE: Finished Solr cores creation successfully.' # END OF SCRIPT # - solr-ingest-demo.sh: + solr-ingest-core.sh: content: | #!/bin/bash set -eu -o pipefail - echo 'Ingest data into a Solr core named demo' - if [ ! -d /var/solr/data/demo ]; then - echo 'Solr core named demo does not exists; skipping core ingestion' + export CORE="$${1:-}"; shift; + echo "INGEST: Ingesting data into a Solr core named $${CORE:-}..." + if test -z "$${CORE:-}"; then + echo 'INGEST: No core name provided; aborting container execution...' + exit 29 + fi + if ! test -d "/var/solr/data/$${CORE}"; then + echo "INGEST: Solr core named $${CORE} does not exists; skipping core ingestion." else - /opt/solr/bin/solr post -c demo /opt/solr/example/exampledocs/books.json - /opt/solr/bin/solr post -c demo /opt/solr/example/exampledocs/books.csv - /opt/solr/bin/solr post -c demo /opt/solr/example/exampledocs/*.xml + for docpath in "$$@"; do + if ! test -f "$${docpath}"; then + echo "INGEST: Source data for '$${docpath}' does not exists; skipping core $${CORE} ingestion." + continue + fi + echo "INGEST: Uploading data from '$${docpath}' into Solr core $${CORE} ingestion..." + if ! solr post -c "$${CORE}" "$${docpath}"; then + echo "INGEST: Failed to ingest data into core $${CORE} from $${docpath}; skipping data ingestion in core $${CORE}." + continue + else + echo "INGEST: Commiting ingested data in core $${CORE}..." + echo '{"commit": {}}' | solr post -c "$${CORE}" + fi + done fi - echo 'Finished Solr demo core ingestion' + echo "INGEST: Finished to ingest data into Solr core $${CORE}." # END OF SCRIPT # - solr-ingest-films.sh: + solr-ingest-examples.sh: content: | #!/bin/bash set -eu -o pipefail - echo 'Ingest data into a Solr core named films' - if [ ! -d /var/solr/data/films ]; then - echo 'Solr core named films does not exists; skipping core ingestion' - else - /opt/solr/bin/solr post -c films /opt/solr/example/films/films.xml - /opt/solr/bin/solr post -c films2 /opt/solr/example/films/films.json - fi - echo 'Finished Solr films core ingestion' + echo 'INGEST: Starting example data ingestion in Solr cores. Wait some seconds...' + solr-ingest-core demo /opt/solr/example/exampledocs/books.json /opt/solr/example/exampledocs/books.csv /opt/solr/example/exampledocs/*.xml + solr-ingest-core films /opt/solr/example/films/films.xml + solr-ingest-core films2 /opt/solr/example/films/films.json + echo 'INGEST: Finished ingestion of Solr example data successfully.' # END OF SCRIPT # - solr-ingest-all.sh: + solr-commit.sh: content: | #!/bin/bash + # Sends a commit to the Solr Core set -eu -o pipefail - echo 'Starting solr core data ingestion. Wait some seconds...' - solr-ingest-demo - solr-ingest-films - echo 'Solr cores created and data ingested.' + echo '{"commit": {}}' | solr post -c "$${1:-}" -type application/json # END OF SCRIPT # - solr-commit.sh: + solr-runas-user-managed.sh: content: | #!/bin/bash - # Sends a commit to the Solr Core set -eu -o pipefail - echo '{"commit": {}}' | post -c "$${1:demo}" -type application/json --out yes -d + echo "TEST: Creating Solr cores: $${SOLR_IMAGE_TAG:-}..." + if ! solr-setup-precreate; then + echo "ERROR: Solr cores creation failed. Exiting container..." + exit 21 + fi + echo "TEST: Setting up Solr configsets for Solr cores..." + mkdir -p /var/solr/data/configsets + cp --recursive /opt/solr/server/solr/configsets/_default /var/solr/data/configsets/ + # Solr 10+ defaults to SolrCloud; precreated cores require standalone (user-managed) mode. + echo "TEST: Starting Solr Server in user-managed mode..." + if ! exec solr-fg --user-managed "$$@"; then + echo "ERROR: Solr Server in user-managed mode failed. Exiting container..." + exit 22 + fi + echo 'TEST: Solr Server in user-managed mode finished successfully.' # END OF SCRIPT # solr-start-server.sh: content: | #!/bin/bash set -eu -o pipefail - if test -n "${VERBOSE:-}"; then set -x; fi - echo "Configuring Solr cores: ${SOLR_IMAGE_TAG:-}..." - solr-setup-start - # Solr 10+ defaults to SolrCloud; precreated cores require standalone (user-managed) mode. - echo "Starting Solr Server in user-managed mode..." - exec solr-fg --user-managed - echo 'Solr Server Finished.' + if test -n "$${DEBUG:-}"; then set -x; fi + echo "RUN: Setting system limits..." + if ! ulimit -n 65000; then + echo "RUN: Failed to set system limits. Exiting container..." + exit 11 + fi + echo "RUN: Starting Solr version $${SOLR_IMAGE_TAG:-} in $${SOLR_RUN_MODE:-} mode..." + case "$${SOLR_RUN_MODE:-testing}" in + standalone) SOLR_RUN_COMMAND="solr-foreground --user-managed";; + cloud) SOLR_RUN_COMMAND="solr-foreground";; + precreate) SOLR_RUN_COMMAND="solr-precreate";; + demo) SOLR_RUN_COMMAND="solr-demo";; + testing) SOLR_RUN_COMMAND="solr-runas-user-managed";; + *) echo "ERROR: Invalid Solr run mode: $${SOLR_RUN_MODE:-}." + echo "RUN: Please set the SOLR_RUN_MODE environment variable to one of the allowed values: standalone, cloud, precreate, demo, testing." + echo "RUN: For example:" + echo "$ cargo make --env SOLR_RUN_MODE=testing test" + echo "$ SOLR_RUN_MODE=standalone docker compose up --no-recreate --wait --detach" + echo "RUN: Aborting container execution..." + exit 99 ;; + esac + echo "RUN: Running Solr using command: [$${SOLR_RUN_COMMAND} $$*]" + if ! exec $${SOLR_RUN_COMMAND} "$$@"; then + echo "RUN: Failed to run Solr version $${SOLR_IMAGE_TAG:-} in $${SOLR_RUN_MODE:-} mode. Exiting container..." + exit 13 + fi + echo 'RUN: Solr Server execution finished successfully. Exiting container...' # END OF SCRIPT # From c4839591f0f5c42a5fbe1c38e6de3ef01d06fae0 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 13:51:14 -0300 Subject: [PATCH 09/28] feat: add command: solrcopy info --- Cargo.toml | 1 + src/args.rs | 2 ++ src/connection.rs | 44 ++++++++++++++++++++++++++++++++++++++++++-- src/information.rs | 12 ++++++++++++ src/main.rs | 4 +++- src/testsolr.rs | 16 ++++++++++++++-- 6 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 src/information.rs diff --git a/Cargo.toml b/Cargo.toml index c345a40..cf1e2a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ crossbeam-channel = "0.5.*" crossbeam-utils = "0.8.*" ctrlc = { version = "3.5.*", features = ["termination"] } dotenvy = "0.15.*" +serde_json = "1.*" # standard crate data is left out [dev-dependencies] diff --git a/src/args.rs b/src/args.rs index c48b95a..1514c09 100644 --- a/src/args.rs +++ b/src/args.rs @@ -50,6 +50,8 @@ pub(crate) enum Commands { Delete(Delete), /// Create a new empty core in the Solr instance Create(Execute), + /// Get information about the Solr instance + Info(Execute), /// Generates man page and completion scripts for different shells Generate(Generate), } diff --git a/src/connection.rs b/src/connection.rs index 199d91f..faac8b6 100644 --- a/src/connection.rs +++ b/src/connection.rs @@ -1,4 +1,4 @@ -use super::helpers::{IntegerHelpers, env_value, wait}; +use super::helpers::{IntegerHelpers, StringHelpers, env_value, wait}; use log::{debug, trace}; use std::time::Duration; use std::{error::Error, fmt}; @@ -21,6 +21,10 @@ impl SolrError { SolrError { details: message, code: Some(error_code) } } + pub(crate) fn of(message: &str, error_code: u16) -> Self { + SolrError { details: message.to_string(), code: Some(error_code) } + } + fn say(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.details) } @@ -47,6 +51,16 @@ impl Error for SolrError { // endregion +// region SolrInfo + +#[derive(Debug)] +pub(crate) struct SolrInfo { + pub version: i64, + pub standalone: bool, +} + +// endregion + // region SolrClient #[derive(Debug)] @@ -104,6 +118,32 @@ impl SolrClient { } } + pub(crate) fn get_solr_info(&mut self, url: &str) -> Result { + let system_url = url.with_suffix("/").append("admin/info/system?wt=json"); + debug!("GET {}", system_url); + + let system_json = self.get_as_text(&system_url)?; + + let parsed = serde_json::from_str::(&system_json); + let info = parsed.map_err(|e| SolrError::new(e.to_string(), 500))?; + + let version = info + .get("lucene") + .and_then(|lucene| lucene.get("solr-spec-version")) + .and_then(|solr_impl_version| solr_impl_version.as_str()) + .ok_or_else(|| SolrError::of("Solr version not found in system info", 500))?; + + let parts: Vec<&str> = version.split('.').collect(); + let major = parts[0].parse::().map_err(|e| SolrError::new(e.to_string(), 500))?; + + let mode = info + .get("mode") + .and_then(|mode| mode.as_str()) + .ok_or_else(|| SolrError::of("Solr mode not found in system info", 500))?; + + Ok(SolrInfo { version: major, standalone: mode == "std" }) + } + pub(crate) fn post_as_json(&mut self, url: &str, content: &str) -> Result { self.post_with_content_type(url, "application/json", content) } @@ -165,7 +205,7 @@ impl SolrClient { let content = body.read_to_string(); let status = response.status(); match content { - Ok(content) => Ok(content), + Ok(text) => Ok(text), Err(failed) => Err(SolrError::new(failed.to_string(), status.as_u16())), } } diff --git a/src/information.rs b/src/information.rs new file mode 100644 index 0000000..3cf8b1a --- /dev/null +++ b/src/information.rs @@ -0,0 +1,12 @@ +use super::{args::Execute, connection::SolrClient}; +use log::debug; + +pub(crate) fn info_main(params: &Execute) -> Result<(), Box> { + debug!("# INFORMATION {:?}", params); + + let mut client = SolrClient::new(); + let info = client.get_solr_info(¶ms.options.url)?; + println!("# Solr {}:\n {:?}", params.options.url, info); + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 6cf0997..287c2f0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -43,6 +43,7 @@ mod delete; mod fails; mod fetch; mod helpers; +mod information; mod ingest; mod models; mod restore; @@ -76,7 +77,7 @@ mod wrangle { use crate::args::{Cli, Commands}; use crate::fails::{BoxedResult, throw}; - use crate::{assets, backup, commit, create, delete, restore}; + use crate::{assets, backup, commit, create, delete, information, restore}; use clap::Parser; pub(crate) fn command_exec(args: &Commands) -> Result<(), Box> { @@ -86,6 +87,7 @@ mod wrangle { Commands::Commit(cmd) => commit::commit_main(cmd), Commands::Delete(del) => delete::delete_main(del), Commands::Create(cre) => create::create_main(cre), + Commands::Info(inf) => information::info_main(inf), Commands::Generate(cpl) => assets::gen_assets(cpl), } } diff --git a/src/testsolr.rs b/src/testsolr.rs index 4ce5d84..a4501ae 100644 --- a/src/testsolr.rs +++ b/src/testsolr.rs @@ -106,7 +106,7 @@ mod testsolr { check_exec_backup(url, dir, "demo", "zstd"); } - /// Run this command to test backup from a running Solr instance + /// Run this command to test the command commit with a running Solr instance #[test] fn check_exec_commit() { let uri = get_solr_url(); @@ -118,7 +118,19 @@ mod testsolr { test_command_line_args_for(test_args); } - /// Run this command to test backup from a running Solr instance + /// Run this command to test the command info with a running Solr instance + #[test] + fn check_exec_info() { + let uri = get_solr_url(); + let url = uri.as_str(); + + let test_args = + &["solrcopy", "info", "--url", url, "--core", "demo", "--log-level", "debug"]; + + test_command_line_args_for(test_args); + } + + /// Run this command to test creating a new core in a running Solr instance #[test] fn check_exec_create() { let uri = get_solr_url(); From acade3389de0a776f66915827bd8fb0a8ce4706a Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 17:59:34 -0300 Subject: [PATCH 10/28] fix: make solrcopy create work with Solr v10 --- src/args.rs | 11 +++-------- src/create.rs | 28 ++++++++++++++++++++++------ src/main.rs | 7 ++++++- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/args.rs b/src/args.rs index 1514c09..b6f49bc 100644 --- a/src/args.rs +++ b/src/args.rs @@ -576,15 +576,10 @@ impl CommonArgs { self.get_update_url_with(EMPTY_STR) } - pub(crate) fn get_core_admin_v2_url_with(&self, query_string_params: &str) -> String { + pub(crate) fn get_url_from(&self, path: &str) -> String { let mut solr_uri = Url::parse(&self.url).unwrap(); - solr_uri.set_path("api/cores"); - let parts: Vec = vec![solr_uri.to_string(), query_string_params.with_prefix("?")]; - parts.concat() - } - - pub(crate) fn get_core_admin_v2_url(&self) -> String { - self.get_core_admin_v2_url_with(EMPTY_STR) + solr_uri.set_path(path); + solr_uri.to_string() } pub(crate) fn get_logging(&self) -> &LoggingArgs { diff --git a/src/create.rs b/src/create.rs index 2320e3b..48f183d 100644 --- a/src/create.rs +++ b/src/create.rs @@ -4,19 +4,35 @@ use log::{debug, info}; pub(crate) fn create_main(params: &Execute) -> Result<(), Box> { debug!("# CREATE {:?}", params); + let mut client = SolrClient::new(); + let information = client.get_solr_info(¶ms.options.url)?; + info!("# Solr {}: {:?}", params.options.url, information); + let core_name = params.options.core.clone(); - let json = "{ \"create\": { \"name\": \"%s\", \"configSet\": \"%c\" } }"; - let config_set = "/opt/solr/server/solr/configsets/_default"; - let content = json.replace("%s", &core_name).replace("%c", config_set); + let std8 = + r#"{ "create": { "name": "%s", "configSet": "/var/solr/data/configsets/_default" } }"#; + let std9 = r#"{ "create": { "name": "%s", "configSet": "_default" } }"#; + let stdx = r#"{ "name": "%s", "configSet": "_default" }"#; + let cld9 = r#"{ "name": "%s", "config": "_default", "numShards": 1 }"#; + + let std = match information.version { + 8 => std8, + 9 => std9, + _ => stdx, + }; + let json = if information.standalone { std } else { cld9 }; + let content = json.replace("%s", &core_name); + + let api_url = if information.standalone { "api/cores" } else { "api/collections" }; + let url = params.options.get_url_from(api_url); - let url = params.options.get_core_admin_v2_url(); debug!("# POST {}:\n {}", url, content); println!("# POST {}:\n {}", url, content); - let res = SolrClient::new().post_as_json(&url, &content)?; + let res = client.post_as_json(&url, &content)?; - info!("Created the core {} in {}:\n {}", core_name, url, res); + println!("Created the core {} in {}:\n {}", core_name, url, res); Ok(()) } diff --git a/src/main.rs b/src/main.rs index 287c2f0..f87bb3d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,7 +66,12 @@ pub fn main() -> Result<(), Box> { let parsed = Cli::parse_from_args()?; - wrangle::command_exec(&parsed) + let result = wrangle::command_exec(&parsed); + if let Err(err) = result { + eprintln!("Error: {}", err); + return Err(err); + } + Ok(()) } // endregion From de5a6578fdde0cb5253e186bff37ff4ef42f3a3c Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 18:00:41 -0300 Subject: [PATCH 11/28] fix: dont wait in test execution --- src/connection.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/connection.rs b/src/connection.rs index faac8b6..0971f12 100644 --- a/src/connection.rs +++ b/src/connection.rs @@ -1,9 +1,12 @@ -use super::helpers::{IntegerHelpers, StringHelpers, env_value, wait}; +use super::helpers::{IntegerHelpers, StringHelpers, env_value}; use log::{debug, trace}; use std::time::Duration; use std::{error::Error, fmt}; use ureq::Agent; +#[cfg(not(any(test, debug_assertions)))] +use super::helpers::wait; + // region SolrError #[derive(Debug)] @@ -82,9 +85,7 @@ const SOLR_DEF_RETRIES: isize = 1; #[cfg(not(debug_assertions))] const SOLR_DEF_RETRIES: isize = 8; -#[cfg(debug_assertions)] -const SOLR_WAIT_SECS: usize = 1; -#[cfg(not(debug_assertions))] +#[cfg(not(any(test, debug_assertions)))] const SOLR_WAIT_SECS: usize = 5; impl SolrClient { @@ -187,6 +188,7 @@ impl SolrClient { self.retry_count, self.max_retries, failure ); // wait a little for the server recovering before retrying + #[cfg(not(any(test, debug_assertions)))] wait(SOLR_WAIT_SECS * self.retry_count); None } else { From 4a3645bf1f534ee1b3892e7ed3e03a71b7f11a99 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 18:01:12 -0300 Subject: [PATCH 12/28] chore: fix lint from clone usage --- src/backup.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backup.rs b/src/backup.rs index f9c4ecf..3aad51c 100644 --- a/src/backup.rs +++ b/src/backup.rs @@ -85,14 +85,14 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { let dir = params.transfer.dir.clone(); let name = output_pat.clone(); let max = params.archive_files; - let compression = params.archive_compression.clone(); + let comp = params.archive_compression; let writer = iw; let thread_name = format!("Writer_{}", writer); pool.builder() .name(thread_name) .spawn(move |_| { - start_storing_docs(writer, dir, name, compression, max, consumer, updater); + start_storing_docs(writer, dir, name, comp, max, consumer, updater); debug!("Finished writer #{}", writer); }) .unwrap(); From c0294cb68066e88e172b721bf7ebd9d8ee766ecc Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 18:02:18 -0300 Subject: [PATCH 13/28] chore(vscode): debug test cases withh feature testsolr --- .vscode/settings.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index fc2b4b7..cffd89e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -44,6 +44,8 @@ "testing.coverageToolbarEnabled": true, // Show the Test Explorer view. "rust-analyzer.testExplorer": true, + "rust-analyzer.cargo.features": [ "testsolr" ], + "rust-analyzer.check.features": [ "testsolr" ], "[rust]": { "editor.defaultFormatter": "rust-lang.rust-analyzer" }, From b0de3a80206111d1b57ffece123d52acb5d3dc07 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 18:05:03 -0300 Subject: [PATCH 14/28] chore(vscode): add lauch config for tests --- .vscode/launch.json | 61 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index d6655d2..fa3dc64 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -174,6 +174,48 @@ "args": [], "cwd": "${workspaceFolder}" }, + { + "type": "lldb", + "request": "launch", + "name": "Solr-tests", + "cargo": { + "args": [ + "test", + "--no-run", + "--bin=solrcopy", + "--package=solrcopy", + "--features", + "testsolr", + ], + "filter": { + "name": "solrcopy", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "testcase", + "cargo": { + "args": [ + "test", + "--no-run", + "--bin=solrcopy", + "--package=solrcopy", + "--features", + "testsolr", + ], + "filter": { + "name": "${input:testcase}", + "kind": "test" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + }, { "type": "lldb", "request": "launch", @@ -201,6 +243,11 @@ "type": "promptString", "description": "Enter the arguments to be passed to program running:" }, + { + "id": "testcase", + "type": "promptString", + "description": "Enter the name of the test case/function to run/debug:" + }, { "id": "folder", "type": "pickString", @@ -228,6 +275,20 @@ "description": "Enter the SOLR URL:", "default": "http://localhost:8983/solr" }, + { + "id": "core", + "type": "pickString", + "description": "What Solr core do you want to use?", + "options": [ + "demo", + "films", + "films2", + "target", + "testing", + "testing2", + ], + "default": "demo" + }, { "id": "query", "type": "promptString", From 305dc337b337d3eed9525e891e43a34eb95fdd7a Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Mon, 9 Mar 2026 18:31:32 -0300 Subject: [PATCH 15/28] ci: fix renamed command in the test container --- .github/workflows/build-test-and-lint.yml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-test-and-lint.yml b/.github/workflows/build-test-and-lint.yml index 1c0034a..92ba49c 100644 --- a/.github/workflows/build-test-and-lint.yml +++ b/.github/workflows/build-test-and-lint.yml @@ -119,7 +119,7 @@ jobs: run: | sleep 10 echo '::group::Executing a script inside the docker container'; - docker compose -f docker/docker-compose.yml exec solr solr-ingest-all 2>&1 + docker compose -f docker/docker-compose.yml exec solr solr-ingest-examples 2>&1 echo '::endgroup::'; - name: Run cargo test diff --git a/README.md b/README.md index 751269b..ba3958d 100644 --- a/README.md +++ b/README.md @@ -389,7 +389,7 @@ Please also [check all available tasks](#available-tasks). # This command creates the container with a solr server with two cores: 'demo' and 'target' $ docker compose -f docker/docker-compose.yml up -d # Run this command to insert some data into the cores -$ docker compose exec solr solr-ingest-all +$ docker compose exec solr solr-ingest-examples # Run this command to test backup $ cargo run -- backup --url http://localhost:8983/solr --core demo --dir $PWD # Run this command to test restoring the backukp data into a existing empty core From 4f54c7243975fa77e1671197eebd32f0dc5ead92 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Tue, 10 Mar 2026 12:53:00 -0300 Subject: [PATCH 16/28] fix: enable logging for commands create and info --- src/args.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/args.rs b/src/args.rs index b6f49bc..8a092e7 100644 --- a/src/args.rs +++ b/src/args.rs @@ -539,6 +539,8 @@ impl Commands { Self::Restore(put) => Some(&put.options), Self::Commit(com) => Some(&com.options), Self::Delete(del) => Some(&del.options), + Self::Create(cre) => Some(&cre.options), + Self::Info(inf) => Some(&inf.options), _ => None, } } From fb4fb2e89abdbc91c3f051c3715b826f5fa04759 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Tue, 10 Mar 2026 13:03:30 -0300 Subject: [PATCH 17/28] refactor: make connection::get_as_json send Content-Type: application/json --- src/backup.rs | 2 +- src/connection.rs | 28 +++++++++++++++++++--------- src/fetch.rs | 2 +- src/restore.rs | 2 +- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/backup.rs b/src/backup.rs index 3aad51c..a937367 100644 --- a/src/backup.rs +++ b/src/backup.rs @@ -203,7 +203,7 @@ fn fetch_docs_from_solr( ) -> Result { let mut times = 0; loop { - let response = client.get_as_text(query_url); + let response = client.get_as_json(query_url); match response { Err(cause) => { error!("Error in thread #{} retrieving docs from solr: {}", reader, cause); diff --git a/src/connection.rs b/src/connection.rs index 0971f12..4f4f0f0 100644 --- a/src/connection.rs +++ b/src/connection.rs @@ -80,6 +80,10 @@ const SOLR_COPY_RETRIES: &str = "SOLR_COPY_RETRIES"; const SOLR_DEF_TIMEOUT: isize = 60; +const CONTENT_TYPE: &str = "Content-Type"; +const APPLICATION_JSON: &str = "application/json"; +const APPLICATION_XML: &str = "application/xml"; + #[cfg(debug_assertions)] const SOLR_DEF_RETRIES: isize = 1; #[cfg(not(debug_assertions))] @@ -106,10 +110,17 @@ impl SolrClient { timeout.to_u64() } - pub(crate) fn get_as_text(&mut self, url: &str) -> Result { - trace!("GET {}", url); + pub(crate) fn get_as_json(&mut self, url: &str) -> Result { + self.get_with_content_type(url, APPLICATION_JSON) + } + + pub(crate) fn get_with_content_type( + &mut self, url: &str, content_type: &str, + ) -> Result { + debug!("# curl --fail --location -X GET {}", url); loop { - let request = self.http.get(url); + let req = self.http.get(url); + let request = req.header(CONTENT_TYPE, content_type); let answer = request.call(); let result = self.handle_response(answer); match result { @@ -121,9 +132,8 @@ impl SolrClient { pub(crate) fn get_solr_info(&mut self, url: &str) -> Result { let system_url = url.with_suffix("/").append("admin/info/system?wt=json"); - debug!("GET {}", system_url); - let system_json = self.get_as_text(&system_url)?; + let system_json = self.get_as_json(&system_url)?; let parsed = serde_json::from_str::(&system_json); let info = parsed.map_err(|e| SolrError::new(e.to_string(), 500))?; @@ -146,11 +156,11 @@ impl SolrClient { } pub(crate) fn post_as_json(&mut self, url: &str, content: &str) -> Result { - self.post_with_content_type(url, "application/json", content) + self.post_with_content_type(url, APPLICATION_JSON, content) } pub(crate) fn post_as_xml(&mut self, url: &str, content: &str) -> Result { - self.post_with_content_type(url, "application/xml", content) + self.post_with_content_type(url, APPLICATION_XML, content) } fn post_with_content_type( @@ -220,9 +230,9 @@ impl SolrClient { // region Helpers - pub(crate) fn query_get_as_text(url: &str) -> Result { + pub(crate) fn send_get_as_json(url: &str) -> Result { let mut con = SolrClient::new(); - con.get_as_text(url) + con.get_as_json(url) } pub(crate) fn send_post_as_json(url: &str, content: &str) -> Result { diff --git a/src/fetch.rs b/src/fetch.rs index 3e98e69..858ebe1 100644 --- a/src/fetch.rs +++ b/src/fetch.rs @@ -15,7 +15,7 @@ impl Backup { let mut res = SolrCore { num_found: 0, fields: vec![] }; for it in 0..times { - let json = SolrClient::query_get_as_text(&diagnostics_query_url)?; + let json = SolrClient::send_get_as_json(&diagnostics_query_url)?; if let Ok(next) = SolrCore::parse_core_schema(self, &json) { debug!("#{} Solr query returned num_found: {}", it, next.num_found); if next.num_found > res.num_found { diff --git a/src/restore.rs b/src/restore.rs index c351366..fdbde4a 100644 --- a/src/restore.rs +++ b/src/restore.rs @@ -166,7 +166,7 @@ fn pre_post_processing(params: &Restore, enable: bool) -> BoxedResult<()> { info!("Now {} replication in {}.", verb, core); let url = params.options.get_core_handler_url(handler_path); - SolrClient::query_get_as_text(&url)?; + SolrClient::send_get_as_json(&url)?; } Ok(()) } From 806fc36c2e60f3d895b3b693b7d1c0d7e71daab0 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Tue, 10 Mar 2026 13:19:11 -0300 Subject: [PATCH 18/28] refactor: rework logging of messages --- src/backup.rs | 11 +++++------ src/connection.rs | 6 ++++-- src/create.rs | 20 ++++++++++---------- src/information.rs | 7 +++++-- src/restore.rs | 6 +++++- 5 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/backup.rs b/src/backup.rs index a937367..1ee0028 100644 --- a/src/backup.rs +++ b/src/backup.rs @@ -53,6 +53,7 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { let (progress, reporter) = bounded::(transfer.writers.to_usize()); pool.spawn(|_| { + debug!("Started generator thread"); start_querying_core(requests, slices, generator, &ctrl_c); debug!("Finished generator thread"); }); @@ -68,6 +69,7 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { pool.builder() .name(thread_name) .spawn(move |_| { + debug!("Started reader #{}", reader); start_retrieving_docs(reader, iterator, producer, must_match, merr, delay); debug!("Finished reader #{}", reader); }) @@ -92,6 +94,7 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { pool.builder() .name(thread_name) .spawn(move |_| { + debug!("Started writer #{}", writer); start_storing_docs(writer, dir, name, comp, max, consumer, updater); debug!("Finished writer #{}", writer); }) @@ -108,12 +111,8 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { if ctrl_c.aborted() { raise("# Execution aborted by user!") } else { - info!( - "Dowloaded {} of {} documents in {:?}.", - retrieved, - num_retrieving, - started.elapsed() - ); + let (r, n, s) = (retrieved, num_retrieving, started.elapsed()); + info!("Dowloaded {} of {} documents in {:?}.", r, n, s); if retrieved > 0 { wait_with_progress(params.transfer.delay_after, "Waiting after all processing..."); } diff --git a/src/connection.rs b/src/connection.rs index 4f4f0f0..e3105f5 100644 --- a/src/connection.rs +++ b/src/connection.rs @@ -166,10 +166,10 @@ impl SolrClient { fn post_with_content_type( &mut self, url: &str, content_type: &str, content: &str, ) -> Result { - trace!("POST as {} {}", content_type, url); + debug!("# curl --fail --location -X POST -H 'Content-Type: {}' {}", content_type, url); loop { let req = self.http.post(url); - let request = req.header("Content-Type", content_type); + let request = req.header(CONTENT_TYPE, content_type); let answer = request.send(content); let result = self.handle_response(answer); match result { @@ -188,6 +188,7 @@ impl SolrClient { if self.retry_count > 0 { self.retry_count -= 1; } + trace!("# Response: {}", content); Some(Ok(content)) } Err(failure) => { @@ -202,6 +203,7 @@ impl SolrClient { wait(SOLR_WAIT_SECS * self.retry_count); None } else { + trace!("# Failure: {}", failure); Some(Err(failure)) } } diff --git a/src/create.rs b/src/create.rs index 48f183d..8d2caff 100644 --- a/src/create.rs +++ b/src/create.rs @@ -2,11 +2,14 @@ use super::{args::Execute, connection::SolrClient}; use log::{debug, info}; pub(crate) fn create_main(params: &Execute) -> Result<(), Box> { - debug!("# CREATE {:?}", params); + debug!("# CREATE: {:?}", params); let mut client = SolrClient::new(); - let information = client.get_solr_info(¶ms.options.url)?; - info!("# Solr {}: {:?}", params.options.url, information); + let sinf = client.get_solr_info(¶ms.options.url)?; + info!( + "# URL: '{}', version: {}, standalone: {}", + params.options.url, sinf.version, sinf.standalone + ); let core_name = params.options.core.clone(); @@ -16,23 +19,20 @@ pub(crate) fn create_main(params: &Execute) -> Result<(), Box std8, 9 => std9, _ => stdx, }; - let json = if information.standalone { std } else { cld9 }; + let json = if sinf.standalone { std } else { cld9 }; let content = json.replace("%s", &core_name); - let api_url = if information.standalone { "api/cores" } else { "api/collections" }; + let api_url = if sinf.standalone { "api/cores" } else { "api/collections" }; let url = params.options.get_url_from(api_url); - debug!("# POST {}:\n {}", url, content); - println!("# POST {}:\n {}", url, content); - let res = client.post_as_json(&url, &content)?; - println!("Created the core {} in {}:\n {}", core_name, url, res); + info!("Created the core {} in {}:\n {}", core_name, url, res); Ok(()) } diff --git a/src/information.rs b/src/information.rs index 3cf8b1a..f9222fc 100644 --- a/src/information.rs +++ b/src/information.rs @@ -1,12 +1,15 @@ use super::{args::Execute, connection::SolrClient}; -use log::debug; +use log::{debug, info}; pub(crate) fn info_main(params: &Execute) -> Result<(), Box> { debug!("# INFORMATION {:?}", params); let mut client = SolrClient::new(); let info = client.get_solr_info(¶ms.options.url)?; - println!("# Solr {}:\n {:?}", params.options.url, info); + info!( + "# {{ url: '{}', version: {}, standalone: {} }}", + params.options.url, info.version, info.standalone + ); Ok(()) } diff --git a/src/restore.rs b/src/restore.rs index fdbde4a..a972303 100644 --- a/src/restore.rs +++ b/src/restore.rs @@ -71,6 +71,7 @@ fn unzip_archives_and_send(params: &Restore, found: &[PathBuf]) -> BoxedResult(transfer.writers.to_usize()); pool.spawn(move |_| { + debug!("Started generator thread"); start_listing_archives(found, generator); debug!("Finished generator thread"); }); @@ -84,6 +85,7 @@ fn unzip_archives_and_send(params: &Restore, found: &[PathBuf]) -> BoxedResult BoxedResult Date: Tue, 10 Mar 2026 15:52:51 -0300 Subject: [PATCH 19/28] refactor: simplify backup/restore code by encapsuling the algorith in smaller functions --- src/backup.rs | 55 ++++++++++++++++++-------- src/restore.rs | 103 +++++++++++++++++++++++++++++-------------------- 2 files changed, 101 insertions(+), 57 deletions(-) diff --git a/src/backup.rs b/src/backup.rs index 1ee0028..41a0e5a 100644 --- a/src/backup.rs +++ b/src/backup.rs @@ -26,7 +26,6 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { let end_limit = params.get_docs_to_retrieve(&schema); let num_retrieving = params.estimate_docs_quantity(&schema, &slices)?; let num_found = schema.num_found.to_u64(); - let must_match = if params.workaround_shards > 0 { num_found } else { 0 }; let mut retrieved = 0; info!( @@ -58,14 +57,33 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { debug!("Finished generator thread"); }); - for ir in 0..transfer.readers { + start_solr_readers(pool, params, sender, sequence, num_found); + + start_archive_writers(pool, params, receiver, progress, num_found); + + retrieved = + foreach_progress(reporter, num_retrieving, params.num_docs, params.options.is_quiet()); + }) + .unwrap(); + + finish_writing(ctrl_c, started, num_retrieving, retrieved, params.transfer.delay_after) +} + +fn start_solr_readers( + pool: &thread::Scope<'_>, params: &Backup, sender: Sender, sequence: Receiver, + num_found: u64, +) { + let merr = params.transfer.max_errors; + let delay = params.transfer.delay_per_request; + let must_match = if params.workaround_shards > 0 { num_found } else { 0 }; + + for ir in 0..params.transfer.readers { let producer = sender.clone(); let iterator = sequence.clone(); - let reader = ir; - let merr = params.transfer.max_errors; - let delay = params.transfer.delay_per_request; + let reader = ir; let thread_name = format!("Reader_{}", reader); + pool.builder() .name(thread_name) .spawn(move |_| { @@ -77,20 +95,25 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { } drop(sequence); drop(sender); +} - let output_pat = params.get_archive_pattern(schema.num_found); +fn start_archive_writers( + pool: &thread::Scope<'_>, params: &Backup, receiver: Receiver, + progress: Sender, num_found: u64, +) { + let output_pat = params.get_archive_pattern(num_found); + let max = params.archive_files; + let comp = params.archive_compression; - for iw in 0..transfer.writers { + for iw in 0..params.transfer.writers { let consumer = receiver.clone(); let updater = progress.clone(); - let dir = params.transfer.dir.clone(); let name = output_pat.clone(); - let max = params.archive_files; - let comp = params.archive_compression; let writer = iw; let thread_name = format!("Writer_{}", writer); + pool.builder() .name(thread_name) .spawn(move |_| { @@ -102,19 +125,19 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { } drop(receiver); drop(progress); +} - retrieved = - foreach_progress(reporter, num_retrieving, params.num_docs, params.options.is_quiet()); - }) - .unwrap(); - +fn finish_writing( + ctrl_c: Arc, started: Instant, num_retrieving: u64, retrieved: u64, + delay_after: u64, +) -> BoxedError { if ctrl_c.aborted() { raise("# Execution aborted by user!") } else { let (r, n, s) = (retrieved, num_retrieving, started.elapsed()); info!("Dowloaded {} of {} documents in {:?}.", r, n, s); if retrieved > 0 { - wait_with_progress(params.transfer.delay_after, "Waiting after all processing..."); + wait_with_progress(delay_after, "Waiting after all processing..."); } Ok(()) } diff --git a/src/restore.rs b/src/restore.rs index a972303..0c5287a 100644 --- a/src/restore.rs +++ b/src/restore.rs @@ -1,5 +1,11 @@ use super::{ - args::Restore, bars::*, connection::SolrClient, fails::*, helpers::*, ingest::*, state::*, + args::{ParallelArgs, Restore}, + bars::*, + connection::SolrClient, + fails::*, + helpers::*, + ingest::*, + state::*, }; use crossbeam_channel::{Receiver, Sender, bounded}; use crossbeam_utils::thread; @@ -76,50 +82,12 @@ fn unzip_archives_and_send(params: &Restore, found: &[PathBuf]) -> BoxedResult BoxedResult( + pool: &thread::Scope<'scope>, transfer: &ParallelArgs, sequence: Receiver<&'scope Path>, + sender: Sender, +) { + for ir in 0..transfer.readers { + let producer = sender.clone(); + let iterator = sequence.clone(); + + let reader = ir; + let thread_name = format!("Reader_{}", reader); + + pool.builder() + .name(thread_name) + .spawn(move |_| { + debug!("Started reader #{}", reader); + start_reading_archive(reader, iterator, producer); + debug!("Finished reader #{}", reader); + }) + .unwrap(); + } + drop(sequence); + drop(sender); +} + +fn start_solr_writers( + pool: &thread::Scope<'_>, transfer: &ParallelArgs, receiver: Receiver, + progress: Sender, update_hadler_url: String, +) { + let update_errors = Arc::new(AtomicU64::new(0)); + let merr = transfer.max_errors; + let delay = transfer.delay_per_request; + + for iw in 0..transfer.writers { + let consumer = receiver.clone(); + let updater = progress.clone(); + let arcerr = Arc::clone(&update_errors); + let url = update_hadler_url.clone(); + + let writer = iw; + let thread_name = format!("Writer_{}", writer); + pool.builder() + .name(thread_name) + .spawn(move |_| { + debug!("Started writer #{}", writer); + start_indexing_docs(writer, &url, consumer, updater, &arcerr, merr, delay); + debug!("Finished writer #{}", writer); + }) + .unwrap(); + } + drop(receiver); + drop(progress); +} + fn finish_sending(params: &Restore, updated: u64) -> BoxedResult { let ctrl_c = monitor_term_sinal(); From ee89d056008fc4c16f251ae0f2e04acd0816418f Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Tue, 10 Mar 2026 16:16:36 -0300 Subject: [PATCH 20/28] fix: renable some min/max validation for command line args --- src/args.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/args.rs b/src/args.rs index 8a092e7..91e3842 100644 --- a/src/args.rs +++ b/src/args.rs @@ -117,7 +117,7 @@ pub(crate) struct Backup { display_order = 52, default_value_t = 1, value_name = "num", - // value_parser = clap::value_parser!(u64).range(0..366), + value_parser = clap::value_parser!(u64).range(0..366), )] pub iterate_step: u64, @@ -302,7 +302,7 @@ pub(crate) struct ParallelArgs { display_order = 80, default_value_t = 1, value_name = "count", - // value_parser = clap::value_parser!(u64).range(1..128), + value_parser = clap::value_parser!(u64).range(1..128), )] pub readers: u64, @@ -313,7 +313,7 @@ pub(crate) struct ParallelArgs { display_order = 80, default_value_t = 1, value_name = "count", - // value_parser = clap::value_parser!(u64).range(1..=128), + value_parser = clap::value_parser!(u64).range(1..128), )] pub writers: u64, } From 3b3dde6a3166394c9007b337545359c523b4e9c5 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Wed, 11 Mar 2026 17:40:52 -0300 Subject: [PATCH 21/28] chore: update rust-version to 1.88 to fix CI MRSV check --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cf1e2a2..1d9991a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "solrcopy" version = "0.8.1" edition = "2024" -rust-version = "1.85.1" +rust-version = "1.88" authors = ["Juarez Rudsatz "] description = "Command line tool useful for migration, transformations, backup, and restore of documents stored inside cores of Apache Solr" From 64e2276ab146482fd648ded3a3e9e3a4faec5205 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Wed, 11 Mar 2026 17:41:47 -0300 Subject: [PATCH 22/28] fix: mark --skip as imcompatible with --iterate-by --- src/args.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/args.rs b/src/args.rs index 91e3842..c0d1b0d 100644 --- a/src/args.rs +++ b/src/args.rs @@ -78,11 +78,11 @@ pub(crate) struct Backup { pub order: Vec, /// Skip this quantity of documents in the Solr Query - #[arg(short = 'k', long, display_order = 43, value_parser = parse_quantity, default_value_t = 0, value_name = "quantity")] + #[arg(short = 'k', long, display_order = 43, value_parser = parse_quantity, default_value_t = 0, value_name = "quantity", conflicts_with = "iterate_by")] pub skip: u64, /// Maximum quantity of documents for retrieving from the core (like 100M) - #[arg(short, long, display_order = 44, value_parser = parse_quantity, value_name = "quantity")] + #[arg(short, long, display_order = 44, value_parser = parse_quantity, value_name = "quantity", conflicts_with = "iterate_by")] pub limit: Option, /// Names of core fields retrieved in each document [default: all but _*] @@ -95,7 +95,7 @@ pub(crate) struct Backup { /// Slice the queries by using the variables {begin} and {end} for iterating in `--query` /// Used in bigger solr cores with huge number of docs because querying the end of docs is expensive and fails frequently - #[arg(short, long, display_order = 50, default_value_t = IterateMode::Day, value_name = "mode", value_enum)] + #[arg(short, long, display_order = 50, default_value_t = IterateMode::Day, value_name = "mode", requires = "iterate_between", value_enum)] pub iterate_by: IterateMode, /// The range of dates/numbers for iterating the queries throught slices. From f86f2e4d61ed3ef4310fbfe48cbe8125ea54011f Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Wed, 11 Mar 2026 17:42:48 -0300 Subject: [PATCH 23/28] fix: ignore log messages from ureq crate --- src/state.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/state.rs b/src/state.rs index 7812e85..9b983ca 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,5 +1,7 @@ use log::{debug, error}; -use simplelog::{ColorChoice, CombinedLogger, Config, SharedLogger, TermLogger, WriteLogger}; +use simplelog::{ + ColorChoice, CombinedLogger, ConfigBuilder, SharedLogger, TermLogger, WriteLogger, +}; use std::fs::File; use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; @@ -41,18 +43,19 @@ fn start_monitoring_term_sinal() -> Arc { impl LoggingArgs { pub(crate) fn start_log(&self) -> Result<(), Box> { + let cfg = ConfigBuilder::new().add_filter_ignore("ureq".to_string()).build(); let mut enabled: Vec> = Vec::new(); if !self.is_quiet() { enabled.push(TermLogger::new( self.log_level, - Config::default(), + cfg.clone(), self.log_mode, ColorChoice::Auto, )); } if let Some(filepath) = &self.log_file_path { let file_to_log = File::create(filepath).unwrap(); - enabled.push(WriteLogger::new(self.log_level, Config::default(), file_to_log)); + enabled.push(WriteLogger::new(self.log_level, cfg, file_to_log)); } CombinedLogger::init(enabled).unwrap(); Ok(()) From 88572e043b83637a657ce0117b396385b6ca6de8 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Wed, 11 Mar 2026 17:47:15 -0300 Subject: [PATCH 24/28] refactor: reclassify some debug/trace logs --- src/connection.rs | 8 ++++---- src/fetch.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/connection.rs b/src/connection.rs index e3105f5..0f02fa9 100644 --- a/src/connection.rs +++ b/src/connection.rs @@ -117,7 +117,7 @@ impl SolrClient { pub(crate) fn get_with_content_type( &mut self, url: &str, content_type: &str, ) -> Result { - debug!("# curl --fail --location -X GET {}", url); + trace!("# curl --fail --location -X GET {}", url); loop { let req = self.http.get(url); let request = req.header(CONTENT_TYPE, content_type); @@ -166,7 +166,7 @@ impl SolrClient { fn post_with_content_type( &mut self, url: &str, content_type: &str, content: &str, ) -> Result { - debug!("# curl --fail --location -X POST -H 'Content-Type: {}' {}", content_type, url); + trace!("# curl --fail --location -X POST -H 'Content-Type: {}' {}", content_type, url); loop { let req = self.http.post(url); let request = req.header(CONTENT_TYPE, content_type); @@ -188,7 +188,7 @@ impl SolrClient { if self.retry_count > 0 { self.retry_count -= 1; } - trace!("# Response: {}", content); + // trace!("# Response: {}", content); Some(Ok(content)) } Err(failure) => { @@ -203,7 +203,7 @@ impl SolrClient { wait(SOLR_WAIT_SECS * self.retry_count); None } else { - trace!("# Failure: {}", failure); + debug!("# Failure: {}", failure); Some(Err(failure)) } } diff --git a/src/fetch.rs b/src/fetch.rs index 858ebe1..9ef5e1e 100644 --- a/src/fetch.rs +++ b/src/fetch.rs @@ -17,7 +17,7 @@ impl Backup { for it in 0..times { let json = SolrClient::send_get_as_json(&diagnostics_query_url)?; if let Ok(next) = SolrCore::parse_core_schema(self, &json) { - debug!("#{} Solr query returned num_found: {}", it, next.num_found); + trace!("#{} Solr query returned num_found: {}", it, next.num_found); if next.num_found > res.num_found { res = next; } From 7b7dcb7597aa84d2cb8802160495eeb8857f0a4c Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Wed, 11 Mar 2026 17:53:39 -0300 Subject: [PATCH 25/28] refactor: rework progress bar updates --- src/backup.rs | 10 +++++----- src/bars.rs | 35 +++++++++++++++++++++++++---------- src/restore.rs | 8 ++++---- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/backup.rs b/src/backup.rs index 41a0e5a..12ad29d 100644 --- a/src/backup.rs +++ b/src/backup.rs @@ -1,6 +1,6 @@ use super::{ args::Backup, - bars::{foreach_progress, wait_with_progress}, + bars::{forall_progress, wait_with_progress}, connection::SolrClient, fails::{BoxedError, raise}, helpers::{IntegerHelpers, wait, wait_by}, @@ -61,8 +61,7 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { start_archive_writers(pool, params, receiver, progress, num_found); - retrieved = - foreach_progress(reporter, num_retrieving, params.num_docs, params.options.is_quiet()); + retrieved = forall_progress(reporter, num_retrieving, params.options.is_quiet()); }) .unwrap(); @@ -137,7 +136,7 @@ fn finish_writing( let (r, n, s) = (retrieved, num_retrieving, started.elapsed()); info!("Dowloaded {} of {} documents in {:?}.", r, n, s); if retrieved > 0 { - wait_with_progress(delay_after, "Waiting after all processing..."); + wait_with_progress(delay_after, "Exporting documents to archives..."); } Ok(()) } @@ -268,7 +267,8 @@ fn start_storing_docs( error!("Error in thread #{} writing file into archive: {}", writer, cause); break; } - let status = progress.send(0); + let num_docs = docs.step.curr; + let status = progress.send(num_docs); if status.is_err() { break; } diff --git a/src/bars.rs b/src/bars.rs index 13c50f1..57b3617 100644 --- a/src/bars.rs +++ b/src/bars.rs @@ -1,6 +1,5 @@ #![allow(dead_code)] -use super::helpers::IntegerHelpers; use crossbeam_channel::Receiver; use indicatif::{ProgressBar, ProgressStyle}; use std::time::{Duration, Instant}; @@ -31,14 +30,12 @@ fn new_time_bar(len: u64) -> ProgressBar { // region implementarion -pub(crate) fn foreach_progress( - reporter: Receiver, num_retrieving: u64, num_increment: u64, quiet: bool, -) -> u64 { +pub(crate) fn foreach_progress(reporter: Receiver, total: u64, quiet: bool) -> u64 { let mut updated = 0; - let perc_bar = if quiet { None } else { Some(new_wide_bar(num_retrieving.to_u64())) }; - for _ in reporter.iter() { + let perc_bar = if quiet { None } else { Some(new_wide_bar(total)) }; + for num_increment in reporter.iter() { if let Some(prog) = &perc_bar { - prog.inc(num_increment.to_u64()); + prog.inc(num_increment); updated += num_increment; } } @@ -49,15 +46,33 @@ pub(crate) fn foreach_progress( updated } +pub(crate) fn forall_progress(reporter: Receiver, total: u64, quiet: bool) -> u64 { + let mut updated = 0; + let perc_bar = if quiet { None } else { Some(new_wide_bar(total)) }; + for position in reporter.iter() { + if position > updated + && let Some(prog) = &perc_bar + { + prog.set_position(position); + updated = position; + } + } + if let Some(pg) = perc_bar { + pg.finish_and_clear(); + } + drop(reporter); + updated +} + pub(crate) fn wait_with_progress(millis: u64, message: &str) { if millis > 10 { - let delta = millis.min(500).to_u64(); + let delta = millis.min(500); let delay = Duration::from_millis(delta); let started = Instant::now(); - let deadline = started + Duration::from_millis(millis.to_u64()); + let deadline = started + Duration::from_millis(millis); - let time_bar = new_time_bar(millis.to_u64()); + let time_bar = new_time_bar(millis); if !message.is_empty() { // time_bar.println(message); time_bar.set_message(message.to_owned()); diff --git a/src/restore.rs b/src/restore.rs index 0c5287a..6e34d4d 100644 --- a/src/restore.rs +++ b/src/restore.rs @@ -39,7 +39,7 @@ pub(crate) fn restore_main(params: &Restore) -> BoxedError { wait_with_progress( params.transfer.delay_before, - &format!("Waiting before processing {}...", core), + &format!("Starting restore for core {}...", core), ); pre_post_processing(params, false)?; @@ -53,7 +53,7 @@ pub(crate) fn restore_main(params: &Restore) -> BoxedError { pre_post_processing(params, true)?; if updated > 0 { - wait_with_progress(params.transfer.delay_after, "Waiting after all processing..."); + wait_with_progress(params.transfer.delay_after, "Restoring documents..."); } Ok(()) } @@ -89,7 +89,7 @@ fn unzip_archives_and_send(params: &Restore, found: &[PathBuf]) -> BoxedResult max_errors } else { - let status = progress.send(0); + let status = progress.send(1); status.is_err() } } From 5ce1273d9cdee198c9f1975432ba0b0e179d82dc Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Wed, 11 Mar 2026 17:57:07 -0300 Subject: [PATCH 26/28] refactor: fix some clippy lints --- src/main.rs | 3 +++ src/steps.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index f87bb3d..79a93bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,6 +14,7 @@ #![deny(unused_extern_crates)] #![deny(unused_must_use)] #![deny(unused_import_braces)] +#![deny(unused_imports)] // endregion @@ -21,8 +22,10 @@ // #![allow(unused_variables)] // #![allow(unused_imports)] +// #![allow(unused_import_braces)] // #![allow(dead_code)] // #![allow(unreachable_code)] +// #![allow(unused)] // endregion diff --git a/src/steps.rs b/src/steps.rs index a406c6d..bee951b 100644 --- a/src/steps.rs +++ b/src/steps.rs @@ -206,7 +206,7 @@ impl SliceItem { impl Requests { pub(crate) fn len(&self) -> u64 { let res = self.limit / self.num_docs; - if self.limit % self.num_docs == 0 { res } else { res + 1 } + if self.limit.is_multiple_of(self.num_docs) { res } else { res + 1 } } } From 226a57558ae5f173a4d4b6f5c76922bb52496d27 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Wed, 11 Mar 2026 18:27:40 -0300 Subject: [PATCH 27/28] fix: make the flag --iterate-by work for solrcopy backup --- src/backup.rs | 140 +++++++++++++++++++++++++++----------------------- src/fetch.rs | 23 ++++++++- src/steps.rs | 93 ++++++++++++++++----------------- 3 files changed, 142 insertions(+), 114 deletions(-) diff --git a/src/backup.rs b/src/backup.rs index 12ad29d..d028fe2 100644 --- a/src/backup.rs +++ b/src/backup.rs @@ -18,21 +18,20 @@ use std::{path::PathBuf, time::Instant}; pub(crate) fn backup_main(params: &Backup) -> BoxedError { debug!("# BACKUP {:?}", params); - wait_with_progress(params.transfer.delay_before, "Waiting before processing..."); - - let slices = params.get_slices(); + if params.options.is_quiet() { + wait_with_progress(params.transfer.delay_before, "Starting the copy..."); + } let schema = params.inspect_core()?; - let end_limit = params.get_docs_to_retrieve(&schema); - let num_retrieving = params.estimate_docs_quantity(&schema, &slices)?; - let num_found = schema.num_found.to_u64(); + let num_found = schema.num_found; + let num_retrieve = params.get_docs_to_retrieve(num_found); let mut retrieved = 0; info!( - "retrieving {} documents in the range {} to {} from {} documents of solr core {}.", - num_retrieving, + "retrieving {} documents from range {} to {} of {} total returned by the query on solr core: {}.", + num_retrieve, params.skip + 1, - end_limit, + params.skip + num_retrieve, num_found, params.options.core ); @@ -41,7 +40,6 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { let started = Instant::now(); thread::scope(|pool| { - let requests = params.get_steps(&schema); let transfer = ¶ms.transfer; let readers_channel = transfer.readers * 4; @@ -53,19 +51,21 @@ pub(crate) fn backup_main(params: &Backup) -> BoxedError { pool.spawn(|_| { debug!("Started generator thread"); - start_querying_core(requests, slices, generator, &ctrl_c); + start_querying_core(params, &schema, generator, &ctrl_c); debug!("Finished generator thread"); }); start_solr_readers(pool, params, sender, sequence, num_found); - start_archive_writers(pool, params, receiver, progress, num_found); + start_archive_writers(pool, params, receiver, progress, num_retrieve); - retrieved = forall_progress(reporter, num_retrieving, params.options.is_quiet()); + retrieved = forall_progress(reporter, num_retrieve, params.options.is_quiet()); }) .unwrap(); - finish_writing(ctrl_c, started, num_retrieving, retrieved, params.transfer.delay_after) + // TODO: handle the finished thread with join + + finish_writing(ctrl_c, started, num_retrieve, retrieved, params.transfer.delay_after) } fn start_solr_readers( @@ -77,64 +77,63 @@ fn start_solr_readers( let must_match = if params.workaround_shards > 0 { num_found } else { 0 }; for ir in 0..params.transfer.readers { - let producer = sender.clone(); - let iterator = sequence.clone(); - - let reader = ir; - let thread_name = format!("Reader_{}", reader); - - pool.builder() - .name(thread_name) - .spawn(move |_| { - debug!("Started reader #{}", reader); - start_retrieving_docs(reader, iterator, producer, must_match, merr, delay); - debug!("Finished reader #{}", reader); - }) - .unwrap(); - } - drop(sequence); - drop(sender); + let producer = sender.clone(); + let iterator = sequence.clone(); + + let reader = ir; + let thread_name = format!("Reader_{}", reader); + + pool.builder() + .name(thread_name) + .spawn(move |_| { + debug!("Started reader #{}", reader); + start_retrieving_docs(reader, iterator, producer, must_match, merr, delay); + debug!("Finished reader #{}", reader); + }) + .unwrap(); + } + drop(sequence); + drop(sender); } fn start_archive_writers( pool: &thread::Scope<'_>, params: &Backup, receiver: Receiver, - progress: Sender, num_found: u64, + progress: Sender, num_retrieve: u64, ) { - let output_pat = params.get_archive_pattern(num_found); + let output_pat = params.get_archive_pattern(num_retrieve); let max = params.archive_files; let comp = params.archive_compression; for iw in 0..params.transfer.writers { - let consumer = receiver.clone(); - let updater = progress.clone(); - let dir = params.transfer.dir.clone(); - let name = output_pat.clone(); - - let writer = iw; - let thread_name = format!("Writer_{}", writer); - - pool.builder() - .name(thread_name) - .spawn(move |_| { - debug!("Started writer #{}", writer); - start_storing_docs(writer, dir, name, comp, max, consumer, updater); - debug!("Finished writer #{}", writer); - }) - .unwrap(); - } - drop(receiver); - drop(progress); + let consumer = receiver.clone(); + let updater = progress.clone(); + let dir = params.transfer.dir.clone(); + let name = output_pat.clone(); + + let writer = iw; + let thread_name = format!("Writer_{}", writer); + + pool.builder() + .name(thread_name) + .spawn(move |_| { + debug!("Started writer #{}", writer); + start_storing_docs(writer, dir, name, comp, max, consumer, updater); + debug!("Finished writer #{}", writer); + }) + .unwrap(); + } + drop(receiver); + drop(progress); } fn finish_writing( - ctrl_c: Arc, started: Instant, num_retrieving: u64, retrieved: u64, - delay_after: u64, + ctrl_c: Arc, started: Instant, num_retrieve: u64, retrieved: u64, delay_after: u64, ) -> BoxedError { if ctrl_c.aborted() { raise("# Execution aborted by user!") } else { - let (r, n, s) = (retrieved, num_retrieving, started.elapsed()); - info!("Dowloaded {} of {} documents in {:?}.", r, n, s); + let (r, n, s) = (retrieved, num_retrieve, started.elapsed()); + info!("Downloaded {} of {} documents in {:?}.", r, n, s); if retrieved > 0 { wait_with_progress(delay_after, "Exporting documents to archives..."); } @@ -145,19 +144,34 @@ fn finish_writing( // region Channels fn start_querying_core( - requests: Requests, slices: Slices, generator: Sender, ctrl_c: &Arc, + params: &Backup, schema: &SolrCore, generator: Sender, ctrl_c: &Arc, ) { - let parts = slices.get_iterator(); + let core_fields = params.merge_core_fields(schema); - 'outer: for range in parts { - let docs = requests.clone(); - for step in docs { - let filtered = range.filter(step); - let status = generator.send(filtered); + let slices: Slices = params.get_slices(); + let partitions = slices.get_iterator(); + let mut retrieved = 0u64; + + 'outer: for range in partitions { + let num_found = params.query_num_found(&range.begin, &range.end).unwrap_or(0); + if num_found == 0 { + continue; + } + let num_retrieve = params.get_docs_to_retrieve(num_found); + let requests: Requests = params.get_requests_for_range( + retrieved, + num_retrieve, + &core_fields, + &range.begin, + &range.end, + ); + for step in requests { + let status = generator.send(step); if status.is_err() || ctrl_c.aborted() { break 'outer; } } + retrieved += num_found; } drop(generator); } diff --git a/src/fetch.rs b/src/fetch.rs index 9ef5e1e..a5d92c7 100644 --- a/src/fetch.rs +++ b/src/fetch.rs @@ -1,5 +1,5 @@ use super::{args::Backup, connection::SolrClient, fails::*, helpers::*, models::SolrCore}; -use log::debug; +use log::{debug, trace}; use regex::Regex; // region Solr Core @@ -32,6 +32,27 @@ impl Backup { debug!("Core schema: {:?}", res); Ok(res) } + + pub(crate) fn query_num_found(&self, begin: &str, end: &str) -> BoxedResult { + // try sometimes for finding the greatest num_found of docs answered by the core + // Used for fixing problems with corrupted replicas of cores with more than 1 shard + let times = (self.workaround_shards * 5) + 1; + let mut prev_num_found = 0; + let query_url = self.get_query_num_found(begin, end); + for it in 0..times { + let json = SolrClient::send_get_as_json(&query_url)?; + if let Ok(num_found) = SolrCore::parse_num_found(&json) { + debug!( + "#{} Solr query returned num_found={} for range {} to {}", + it, num_found, begin, end + ); + if num_found > prev_num_found { + prev_num_found = num_found; + } + } + } + Ok(prev_num_found) + } } impl SolrCore { diff --git a/src/steps.rs b/src/steps.rs index bee951b..e957070 100644 --- a/src/steps.rs +++ b/src/steps.rs @@ -28,6 +28,7 @@ pub(crate) struct SliceItem { #[derive(Debug, Clone)] pub(crate) struct Requests { + pub prev: u64, pub curr: u64, pub limit: u64, pub num_docs: u64, @@ -51,7 +52,8 @@ impl Slices { res } - pub(crate) fn estimate_steps(&self) -> BoxedResult { + #[allow(dead_code)] + pub(crate) fn estimate_num_slices(&self) -> BoxedResult { if self.curr.is_empty() { return Ok(1); } @@ -191,18 +193,6 @@ impl Iterator for Slices { } } -impl SliceItem { - pub(crate) fn filter(&self, step: Step) -> Step { - if self.begin.is_empty() { - step - } else { - let query = - replace_solr_vars(step.url.as_str(), self.begin.as_str(), self.end.as_str()); - Step { url: query, curr: step.curr } - } - } -} - impl Requests { pub(crate) fn len(&self) -> u64 { let res = self.limit / self.num_docs; @@ -218,7 +208,7 @@ impl Iterator for Requests { let remaining = self.limit - self.curr; let rows = self.num_docs.min(remaining); let query = format!("{}&start={}&rows={}", self.url, self.curr, rows); - let res = Step { url: query, curr: self.curr }; + let res = Step { url: query, curr: self.prev + self.curr }; self.curr += self.num_docs; Some(res) } else { @@ -251,7 +241,7 @@ fn format_solr_time(date_time: NaiveDateTime) -> String { // region Solr requests impl Backup { - pub(crate) fn get_archive_pattern(&self, num_found: u64) -> String { + pub(crate) fn get_archive_pattern(&self, num_retrieve: u64) -> String { let prefix = match &self.archive_prefix { Some(text) => text.to_string(), None => { @@ -261,24 +251,15 @@ impl Backup { } }; let ext = self.archive_compression.get_ext(); - format!("{}_docs_{}_seq_{}.{}", prefix, num_found, BRACKETS, ext) - } - - pub(crate) fn estimate_docs_quantity( - &self, schema: &SolrCore, slices: &Slices, - ) -> BoxedResult { - let end_limit = self.get_docs_to_retrieve(schema); - let num_retrieving = end_limit - self.skip; - - let slice_count = slices.estimate_steps()?; - Ok(num_retrieving * slice_count) + format!("{}_docs_{}_seq_{}.{}", prefix, num_retrieve, BRACKETS, ext) } - pub(crate) fn get_docs_to_retrieve(&self, schema: &SolrCore) -> u64 { - schema.num_found.min(self.limit.unwrap_or(u64::MAX)) + pub(crate) fn get_docs_to_retrieve(&self, num_found: u64) -> u64 { + let num_retrieve = num_found - self.skip; + num_retrieve.min(self.limit.unwrap_or(u64::MAX)) } - pub(crate) fn get_steps(&self, schema: &SolrCore) -> Requests { + pub(crate) fn merge_core_fields(&self, schema: &SolrCore) -> Vec { let include_hash: HashSet = HashSet::from_iter(schema.fields.clone()); let exclude_hash: HashSet = HashSet::from_iter(self.exclude.clone()); let diff: Vec = include_hash.difference(&exclude_hash).map(String::from).collect(); @@ -286,14 +267,24 @@ impl Backup { debug!("Include fields {:?}", include_hash); debug!("Exclude fields {:?}", exclude_hash); debug!("Actual fields {:?}", diff); - - let fl = self.get_query_fields(diff); - let query = self.get_query_url(&fl, true); - let end_limit = self.get_docs_to_retrieve(schema); - Requests { curr: self.skip, limit: end_limit, num_docs: self.num_docs, url: query } + diff + } + + pub(crate) fn get_requests_for_range( + &self, retrieved: u64, num_retrieve: u64, core_fields: &[String], begin: &str, end: &str, + ) -> Requests { + let selected = self.get_query_fields(core_fields); + let query = self.get_query_url(&selected, begin, end); + Requests { + prev: retrieved, + curr: self.skip, + limit: num_retrieve, + num_docs: self.num_docs, + url: query, + } } - pub(crate) fn get_query_fields(&self, core_fields: Vec) -> String { + pub(crate) fn get_query_fields(&self, core_fields: &[String]) -> String { if core_fields.is_empty() { EMPTY_STRING } else { @@ -303,23 +294,22 @@ impl Backup { } pub(crate) fn get_query_for_diagnostics(&self) -> String { - let url = self.get_query_url(EMPTY_STR, false); - format!("{}&start=0&rows=1", url) + let (begin, end) = self.get_between(); + self.get_query_num_found(begin, end) } - pub(crate) fn replace_vars(&self, query: &str, raw: bool) -> String { - if raw || self.iterate_between.is_empty() { - query.to_string() - } else { - let (begin, end) = self.get_between(); - replace_solr_vars(query, begin, end) - } + pub(crate) fn get_query_num_found(&self, begin: &str, end: &str) -> String { + self.get_query_url("&start=0&rows=1", begin, end) } - pub(crate) fn get_query_url(&self, selected: &str, raw: bool) -> String { + pub(crate) fn get_query_url(&self, selected: &str, begin: &str, end: &str) -> String { let qparam = self.query.as_deref().unwrap_or("*:*"); - let qfixed = self.replace_vars(qparam, raw); - let filterq = solr_query(&qfixed); + let qfixed = if begin.is_empty() || end.is_empty() { + qparam + } else { + &replace_solr_vars(qparam, begin, end) + }; + let filterq = solr_query(qfixed); let fqparam = self.fq.as_deref().unwrap_or("*:*"); let filterfq = solr_query(fqparam); @@ -353,7 +343,7 @@ impl Backup { } } - fn get_between(&self) -> (&str, &str) { + pub(crate) fn get_between(&self) -> (&str, &str) { if self.iterate_between.is_empty() { (EMPTY_STR, EMPTY_STR) } else { @@ -400,10 +390,13 @@ mod tests { let parsed = Cli::mockup_args_backup(); let gets = parsed.get().unwrap(); let core_info = SolrCore::mockup(); - let query = gets.get_query_url(EMPTY_STR, true); + let query = gets.get_query_url(EMPTY_STR, EMPTY_STR, EMPTY_STR); + let num_retrieve = gets.get_docs_to_retrieve(core_info.num_found); let mut i = 0; - for step in gets.get_steps(&core_info) { + for step in + gets.get_requests_for_range(0, num_retrieve, &core_info.fields, EMPTY_STR, EMPTY_STR) + { let url = step.url; assert_eq!(url.is_empty(), false); assert_eq!(url.starts_with(&query), true); From 32c30b775f09bb84e6b55eb93bcf8f68d730de76 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Wed, 11 Mar 2026 18:28:12 -0300 Subject: [PATCH 28/28] chore: upgrade release version to 0.9.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1d9991a..69ff0c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "solrcopy" -version = "0.8.1" +version = "0.9.0" edition = "2024" rust-version = "1.88"