From 68dd57e4a543007ccd19c3b67cdff4cabe38811d Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Mon, 13 Apr 2026 08:17:55 -0700 Subject: [PATCH 1/9] Added a check to detect when blank lines should have been skipped, but it doesn't solve the problem yet. --- src/fread.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/fread.c b/src/fread.c index 3df73156d..647c423fb 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1946,6 +1946,10 @@ int freadMain(freadMainArgs _args) } } } + if (!prevStart && topStart && topSkip > 0) + { + DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); + } if (!firstJumpEnd) { if (verbose) DTPRINT(_(" No sep and quote rule found a block of 2x2 or greater. Single column input.\n")); topNumFields = 1; From 0a317ae4d817b6b98d1dd3008550ad796b8bfe8e Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Mon, 13 Apr 2026 17:31:16 -0700 Subject: [PATCH 2/9] Fixed check added to only check if 'topSkip' is greater than 0. 'topSkip' is greater than 0 when blank lines are present, so I also check if blank lines should be skipped so I can throw a warning to let the user know. --- src/fread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 647c423fb..93aaa3cd6 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1946,7 +1946,7 @@ int freadMain(freadMainArgs _args) } } } - if (!prevStart && topStart && topSkip > 0) + if (topSkip > 0 && !skipEmptyLines) { DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); } From 9164f60265ffab76ac0413ac1e06efca6febea8b Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Mon, 20 Apr 2026 11:10:23 -0700 Subject: [PATCH 3/9] Updated check for blank lines to ask if 'topSkip' isgreater than 1 to accomodate situation where the header and data are separated by a blank line. --- src/fread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 82d314505..2aca02110 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1843,6 +1843,7 @@ int freadMain(freadMainArgs _args) int topNumFields = 1; // how many fields that was, to resolve ties enum quote_rule_t topQuoteRule = -1; // which quote rule that was int topSkip = 0; // how many rows to auto-skip + // #7707 'topSkip' accumulates as blank lines are encountered; can be used to differentiate between a file where the header and data are separated by a blank line and a file where block(s) of lines or each line is separated by a blank line const char *topStart = NULL; for (quoteRule = quote ? QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED : QUOTE_RULE_IGNORE_QUOTES; quoteRule < QUOTE_RULE_COUNT; quoteRule++) { // #loop_counter_not_local_scope_ok @@ -1946,7 +1947,7 @@ int freadMain(freadMainArgs _args) } } } - if (topSkip > 0 && !skipEmptyLines) + if (topSkip > 1 && !skipEmptyLines) { DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); } From 8a8056d00555efb6dcfac8c327fc0f9bf1c23baf Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Mon, 20 Apr 2026 13:11:54 -0700 Subject: [PATCH 4/9] Used the 'prevStart' variable to detect when each line is separated by a blank line. In the case of each line separated by a blank line, 'prevStart' is always NULL because each line could be a possible header. --- src/fread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 2aca02110..2c889097a 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1947,7 +1947,7 @@ int freadMain(freadMainArgs _args) } } } - if (topSkip > 1 && !skipEmptyLines) + if (!prevStart && topSkip > 1 && !skipEmptyLines) { DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); } From 03e482d316492ff4c486f0733f6b0221daabe025 Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Tue, 21 Apr 2026 16:22:16 -0700 Subject: [PATCH 5/9] Added test '1578.10' for initial case which issue #3339 pointed out. Causes an error in test 1578.1? --- inst/tests/tests.Rraw | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 443487c6a..4b35ab027 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8073,6 +8073,8 @@ test(1578.6, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), outpu test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 +input = "x y\n\n1 a\n\n2 b\n\n3 c" +test(1578.10, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n") # test 1579 moved to optimize.Rraw From 218d0bb26bfbe10dd7b918bd666724aeebf31d5b Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Wed, 22 Apr 2026 18:11:09 -0700 Subject: [PATCH 6/9] Changed the number of the test written to verify fix for #3339. --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4b35ab027..76b0ba5f9 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8074,7 +8074,7 @@ test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 input = "x y\n\n1 a\n\n2 b\n\n3 c" -test(1578.10, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n") +test(1578.91, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n") # test 1579 moved to optimize.Rraw From a647937205ce9f055a8e627720003a42a4724255 Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Thu, 23 Apr 2026 12:14:09 -0700 Subject: [PATCH 7/9] Removed newline from end of expected warning message. --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 76b0ba5f9..ce3223c21 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8074,7 +8074,7 @@ test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 input = "x y\n\n1 a\n\n2 b\n\n3 c" -test(1578.91, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n") +test(1578.91, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.") # test 1579 moved to optimize.Rraw From 7da8059f5a1ef2c03e5c821142a9bca2ed566738 Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Thu, 23 Apr 2026 14:28:02 -0700 Subject: [PATCH 8/9] Updated NEWS.md with news of the new warning. --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 021ddbbb6..f2e417cbe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -50,6 +50,8 @@ 9. `fread()` no longer replaces a literal header column name `"NA"` with an auto-generated `Vn` name when `na.strings` includes `"NA"`, [#5124](https://github.com/Rdatatable/data.table/issues/5124). Data rows still continue to parse `"NA"` as missing. Thanks @Mashin6 for the report and @shrektan for the fix. +10. `fread()` would not give a warning when every second line of input was empty, [#3339](https://github.com/Rdatatable/data.table/issues/3339). Now, a warning message 'The rows in this file appear to be separated by blank lines.' is given and suggests to set `blank.lines.skip` to `TRUE`. Thanks to @Henrik-P for the report and @Asa-Henry for the fix. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). From 219258aa2d4697a6e02f10ed5902e39ac3cfcaa3 Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Thu, 23 Apr 2026 14:38:17 -0700 Subject: [PATCH 9/9] Updated sub-test numbers for test '1578' to match the format established where 0s are prepended when there is more than one significant digit following the decimal. --- inst/tests/tests.Rraw | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ce3223c21..54e3abea3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8059,22 +8059,22 @@ test(1577.3, levels(X$b), character(0)) # FR #530, skip blank lines input = "Header not 2 columns\n\n1,3\n2,4" -test(1578.1, fread(input), data.table(V1=1:2, V2=3:4)) +test(1578.01, fread(input), data.table(V1=1:2, V2=3:4)) input = "a,b\n\n1,3\n2,4" -test(1578.2, fread(input), data.table(V1=1:2, V2=3:4)) # the block of 2x2 dominates the one line with sep in auto-removed header section -test(1578.3, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) +test(1578.02, fread(input), data.table(V1=1:2, V2=3:4)) # the block of 2x2 dominates the one line with sep in auto-removed header section +test(1578.03, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) input = "a,b\n\n\n1,3\n2,4" -test(1578.4, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) +test(1578.04, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) input = "a,b\n\n\n1,3\n\n2,4\n\n" -test(1578.5, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) +test(1578.05, fread(input, blank.lines.skip=TRUE), data.table( a=1:2, b=3:4)) f = testDir("530_fread.txt") -test(1578.6, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), output="Positioned on line 48 starting: <>") -test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) -test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) -test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 +test(1578.06, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), output="Positioned on line 48 starting: <>") +test(1578.07, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) +test(1578.08, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) +test(1578.09, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 input = "x y\n\n1 a\n\n2 b\n\n3 c" -test(1578.91, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.") +test(1578.10, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.") # test 1579 moved to optimize.Rraw