diff --git a/CHANGELOG.md b/CHANGELOG.md index 04d31f5bb1..63f81fc8ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed - **GFQL Cypher parse memoization (perf)**: `parse_cypher` now memoizes its result (LRU over the deterministic lark parse+transform → immutable frozen AST). Repeated identical Cypher queries skip the ~15 ms parse — the dominant per-call cost of small queries (~50% of a Cypher call at 100k rows) — making end-to-end query latency ~1.3–1.7× faster at small/interactive sizes across pandas/polars/cuDF. Safe to share the cached AST: every Cypher AST node is `@dataclass(frozen=True)` and `compile_cypher_query` does not mutate the parsed tree; validation errors still raise and are not cached. +- **GFQL structured whole-entity returns (#1650)**: Terminal Cypher `RETURN a` (whole node/edge) now emits **structured flattened columns** (`a.id`, `a.val`, `a.kind`, ...) instead of a single Cypher display string (`({id: 51, val: 51, kind: 'a'})`). The per-field columns already exist before projection, so this is "stop collapsing" rather than "rebuild": measured ~2–6.4× faster on pandas and ~2.7–4.3× on cuDF for whole-entity returns (the win grows with row count, since the old text render is O(rows) and the flat form is ~free), and the result is directly usable without re-parsing a string and survives JSON/CSV/Parquet/Arrow serialization and `plot()`. The human-readable Cypher display string remains available on demand via the `render_entity_text(result, alias)` presentation helper. OPTIONAL-MATCH / `WITH`-reentry / grouping paths that synthesize null/absent entities or still consume a single-column entity value are unchanged. Behavior change: callers that previously read the rendered display string from a terminal `RETURN a` column now receive flattened `a.*` columns. Edge case: a whole entity with NO fields to flatten — an entity with no id binding, no properties, and no type/label (in practice only an edge whose graph has no edge-id binding) — has no `{alias}.{field}` columns to emit, so it falls back to the single Cypher-display-text column under the bare alias (value is correct, e.g. `[]`); nodes always carry their id field and always flatten. ### Performance - **GFQL temporal-detection dtype gate (#1650)**: `order_detect_temporal_mode` now short-circuits for numeric/bool/complex columns, which can never hold temporal *text*, instead of running an `astype(str)` + multi-regex `fullmatch` scan on every comparison. Eliminates spurious row-wise stringification in `where_rows`/comparison paths whose output never contains entity-text. Byte-identical results; measured `where_rows` speedups ~3.1× (pandas) and ~4.4–13.3× (cuDF, scaling with row count). Does not address whole-entity `RETURN a` text rendering, which is tracked separately. diff --git a/docs/source/gfql/cypher.rst b/docs/source/gfql/cypher.rst index 2bf530dbfa..7bead15574 100644 --- a/docs/source/gfql/cypher.rst +++ b/docs/source/gfql/cypher.rst @@ -309,6 +309,33 @@ Row And Row-Pipeline Forms including connected suffix projections in the current supported row-binding subset. +Whole-Entity RETURN Output Shape +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A terminal ``RETURN`` of a whole node or relationship (``RETURN a`` rather than +``RETURN a.prop``) emits **structured flattened columns**, one per field, named +``.``:: + + g.gfql("MATCH (a:Person) RETURN a") + # result._nodes columns: a.id, a.name, a.age, ... (one column per field) + +This is directly usable (no string to re-parse) and survives JSON / CSV / Parquet / +Arrow serialization and ``plot()``. To recover the human-readable Cypher display +string (``(:Person {name: 'Alice'})``) on demand, use the presentation helper:: + + from graphistry.compute.gfql.cypher.result_postprocess import render_entity_text + text_series = render_entity_text(result, "a") # nodes + text_series = render_entity_text(result, "r", table="edges") # relationships + +Notes: + +- An aliased property projection of the same field (``RETURN a, a.val``) is + de-duplicated — you get a single ``a.val`` column, not two. +- A whole entity with no fields to flatten (no id binding, no properties, no + type/label — in practice only an edge whose graph has no edge-id binding) has + nothing to flatten and falls back to a single Cypher-display-text column under the + bare alias. Nodes always carry an id field and always flatten. + Procedure And Multi-Branch Forms ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/graphistry/compute/chain.py b/graphistry/compute/chain.py index 685395411f..0edd42b6b1 100644 --- a/graphistry/compute/chain.py +++ b/graphistry/compute/chain.py @@ -1097,13 +1097,10 @@ def _chain_impl( ) if added_edge_index: final_edges_df = final_edges_df.drop(columns=[g._edge]) - # Rebuild from `self` to restore the ORIGINAL edge binding (`self._edge`, - # often None — `g` carries the internal edge-index binding instead), but - # explicitly carry the materialized node-id binding `g._node`: for an - # edges-only input `self._node is None`, so rebuilding from `self` alone - # drops it, leaving the endpoint-reconciliation concat below to synthesize - # a `None`-named column (corrupt result + a void-block concat crash on - # newer pandas). + # `self` restores the original edge binding, but carry the materialized + # `g._node` explicitly: an edges-only `self._node is None` would drop the + # node binding, making the reconciliation concat synthesize a corrupt + # `None`-named column (and a void-block concat crash on newer pandas). g_out = self.nodes(final_nodes_df, g._node).edges(final_edges_df, edge=original_edge) else: g_out = g.nodes(final_nodes_df).edges(final_edges_df) diff --git a/graphistry/compute/gfql/cypher/result_postprocess.py b/graphistry/compute/gfql/cypher/result_postprocess.py index d8b0103bc0..bf2ce497f0 100644 --- a/graphistry/compute/gfql/cypher/result_postprocess.py +++ b/graphistry/compute/gfql/cypher/result_postprocess.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import replace -from typing import Any, Dict, Literal, Optional, TypedDict, cast +from typing import Any, Dict, List, Literal, Optional, Set, TypedDict, cast import pandas as pd @@ -116,6 +116,105 @@ def _format_edge_entities(df: DataFrameT, projection: ResultProjectionPlan) -> S ) +def _label_flag_columns(df: DataFrameT) -> list[str]: + return [ + str(col) + for col in df.columns + if str(col).startswith("label__") + and str(col).split("label__", 1)[1] not in {"", "None", "nan"} + ] + + +def _flat_entity_field_names( + source_rows_df: DataFrameT, projection: ResultProjectionPlan, id_column: Optional[str] +) -> list[str]: + """Ordered field names for a flattened whole-entity projection (#1650). + + Mirrors the renderer's column selection (``node_property_columns`` / + ``edge_property_columns`` honor ``exclude_columns`` so sibling aliases and + engine-internal columns are not pulled in), then prepends the entity id and + (nodes) appends ``label__*`` flags / (edges) the ``type`` column so + :func:`render_entity_text` can losslessly reconstruct the Cypher form. + """ + alias_col = projection.alias + if projection.table == "nodes": + prop_cols = node_property_columns(source_rows_df, alias_col, projection.exclude_columns) + # label sources for faithful reconstruction: label__* flags and/or the + # node ``type`` column (both consumed by _node_label_text). + extra = _label_flag_columns(source_rows_df) + if "type" in source_rows_df.columns: + extra = [*extra, "type"] + else: + prop_cols = edge_property_columns(source_rows_df, alias_col, projection.exclude_columns) + extra = ["type"] if "type" in source_rows_df.columns else [] + + fields: list[str] = [] + for col in [id_column, *prop_cols, *extra]: + if col is not None and col in source_rows_df.columns and col not in fields: + fields.append(str(col)) + return fields + + +def _flat_entity_columns( + source_rows_df: DataFrameT, + projection: ResultProjectionPlan, + output_name: str, + id_column: Optional[str], +) -> Dict[str, SeriesT]: + """Structured (flattened) whole-entity projection (issue #1650). + + Emit one ``{output_name}.{field}`` column per aliased field instead of + collapsing the entity into a single Cypher display string. The per-field + columns already exist on ``source_rows_df`` (gathered by + ``_projection_alias_rows``), so this is "stop collapsing", not "rebuild": + near-free, lossless, and directly usable without re-parsing a string. + """ + return { + f"{output_name}.{field}": cast(SeriesT, source_rows_df[field]) + for field in _flat_entity_field_names(source_rows_df, projection, id_column) + } + + +def render_entity_text( + result: Plottable, alias: str, *, table: Literal["nodes", "edges"] = "nodes" +) -> SeriesT: + """Render a structured whole-entity projection back to Cypher display text. + + Presentation helper: given a result whose ``RETURN `` was emitted as + flattened ``{alias}.{field}`` columns (the default since #1650), reconstruct + the Cypher display string (``(:Label {..})`` / ``[:TYPE {..}]``). Used by the + conformance/TCK driver and by callers who want the human-readable form. The + structured data path itself never pays this cost. + """ + rows_df = cast(DataFrameT, result._nodes) + if rows_df is None: + raise ValueError("result has no _nodes frame to render") + prefix = f"{alias}." + field_cols = [col for col in rows_df.columns if str(col).startswith(prefix)] + if not field_cols: + raise ValueError(f"no flattened columns found for alias {alias!r}") + frame = cast( + DataFrameT, + rows_df[field_cols].rename(columns={col: str(col)[len(prefix):] for col in field_cols}), + ) + # An OPTIONAL-MATCH miss flattens to a row whose fields are all null; such + # rows must render as null, not "()". Track presence (any field non-null). + present: Optional[SeriesT] = None + for field in frame.columns: + not_na = cast(SeriesT, frame[field].notna()) + present = not_na if present is None else cast(SeriesT, present | not_na) + # _format_*_entities anchors length/null on a bare alias column; render every + # row, then null absent rows below. + frame = cast(DataFrameT, frame.assign(**{alias: True})) + projection = ResultProjectionPlan(alias=alias, table=table, columns=(), exclude_columns=()) + rendered = _format_node_entities(frame, projection) if table == "nodes" else _format_edge_entities(frame, projection) + if present is not None and hasattr(rendered, "where"): + # Null absent rows. ``other=None`` fills NaN/None (valid pandas/cuDF); + # the pandas-stubs ``where`` overload is stricter than runtime here. + rendered = cast(SeriesT, rendered.where(present, None)) # type: ignore[call-overload] + return rendered + + def _project_property_column( rows_df: DataFrameT, *, @@ -124,10 +223,8 @@ def _project_property_column( if column.source_name is None or column.source_name not in rows_df.columns: raise ValueError(f"projection source column not found: {column.source_name!r}") series = cast(SeriesT, rows_df[column.source_name]) - # Temporal-constructor normalization only applies to STRING values; numeric/bool/ - # complex columns can never hold temporal text, so skip the (otherwise spurious) - # ``astype(str)`` + detection scan and return the column as-is — byte-identical, - # since the scan returns None for these dtypes. Mirrors the #1650/#1651 gate. + # Temporal-constructor normalization only applies to strings; numeric/bool/complex + # can't hold temporal text, so skip the astype(str)+scan (byte-identical). #1650 gate. if is_non_textual_scalar_dtype(getattr(series, "dtype", None)): return series if hasattr(series, "astype") and hasattr(cast(SeriesT, series.astype(str)), "str"): @@ -185,7 +282,17 @@ def _projection_alias_rows( return None -def apply_result_projection(result: Plottable, projection: ResultProjectionPlan) -> Plottable: +def apply_result_projection( + result: Plottable, projection: ResultProjectionPlan, *, structured: bool = True +) -> Plottable: + """Project Cypher RETURN columns onto ``result._nodes``. + + ``structured=True`` (#1650 default) emits whole-entity returns as flattened + ``{alias}.{field}`` columns. ``structured=False`` keeps the legacy single + Cypher-display-string column; the reentry / OPTIONAL-MATCH null-fill machinery + (which still assumes a single-column entity value) opts out via this flag until + it is unified onto the structured path. + """ rows_df = cast(DataFrameT, getattr(result, "_nodes", None)) if rows_df is None: return result @@ -194,6 +301,7 @@ def apply_result_projection(result: Plottable, projection: ResultProjectionPlan) return result projected_data: Dict[str, SeriesT] = {} projected_entity_meta: Dict[str, WholeRowProjectionMeta] = {} + output_columns: list[str] = [] for column in projection.columns: if column.kind == "whole_row": source_alias = column.source_name or projection.alias @@ -201,20 +309,46 @@ def apply_result_projection(result: Plottable, projection: ResultProjectionPlan) if source_rows_df is None or source_alias not in source_rows_df.columns: raise ValueError(f"whole-row projection source alias not found: {source_alias!r}") source_projection = projection if source_alias == projection.alias else replace(projection, alias=source_alias) - projected_data[column.output_name] = ( - _format_node_entities(source_rows_df, source_projection) - if projection.table == "nodes" - else _format_edge_entities(source_rows_df, source_projection) - ) id_column = getattr(result, "_node" if source_projection.table == "nodes" else "_edge", None) + flat_columns = ( + _flat_entity_columns(source_rows_df, source_projection, column.output_name, id_column) + if structured + else {} + ) + if structured and flat_columns: + # Structured (flattened) emission (#1650): one column per field; text + # stays available via render_entity_text(). + projected_data.update(flat_columns) + output_columns.extend(flat_columns.keys()) + elif structured: + # No fields to flatten: the synthesized absent-entity row (OPTIONAL miss + # / reentry no-match, a single ``{alias: None}`` column) or a field-less + # real entity. Emit the single-column text form (renders to None / []). + projected_data[column.output_name] = ( + _format_node_entities(source_rows_df, source_projection) + if source_projection.table == "nodes" + else _format_edge_entities(source_rows_df, source_projection) + ) + output_columns.append(column.output_name) + else: + projected_data[column.output_name] = ( + _format_node_entities(source_rows_df, source_projection) + if source_projection.table == "nodes" + else _format_edge_entities(source_rows_df, source_projection) + ) + output_columns.append(column.output_name) if id_column is not None and id_column in source_rows_df.columns: projected_entity_meta[column.output_name] = { "table": source_projection.table, "alias": source_projection.alias, "id_column": id_column, + # Snapshot the id Series: the bounded-reentry path recovers + # carried node identities from this meta and must not alias the + # live working frame (see #1356). "ids": cast(SeriesT, source_rows_df[id_column]).copy(), } else: + output_columns.append(column.output_name) if column.kind == "property": property_rows_df = alias_rows_df if ( @@ -226,6 +360,18 @@ def apply_result_projection(result: Plottable, projection: ResultProjectionPlan) projected_data[column.output_name] = _project_property_column(property_rows_df, column=column) else: projected_data[column.output_name] = _project_expr_column(result, rows_df, column=column) + # De-dup output columns (#1650): a flattened whole entity `a` (-> a.id, a.val, ...) + # collides by name with an explicit property projection (`RETURN a, a.val`). Both + # read the same source field (dotted aliases are rejected), so values are identical + # — keep first occurrence; a duplicate name would drop data on to_dict/serialization. + if len(set(output_columns)) != len(output_columns): + seen: Set[str] = set() + deduped: List[str] = [] + for c in output_columns: + if c not in seen: + seen.add(c) + deduped.append(c) + output_columns = deduped projected_rows = alias_rows_df if rows_df.__class__.__module__.startswith("cudf") and any(isinstance(value, pd.Series) for value in projected_data.values()): projected_rows = cast(DataFrameT, cast(Any, alias_rows_df).to_pandas()) @@ -233,7 +379,7 @@ def apply_result_projection(result: Plottable, projection: ResultProjectionPlan) key: cast(SeriesT, value.to_pandas() if hasattr(value, "to_pandas") else value) for key, value in projected_data.items() } - projected_nodes = cast(DataFrameT, projected_rows.assign(**projected_data)[[column.output_name for column in projection.columns]]) + projected_nodes = cast(DataFrameT, projected_rows.assign(**projected_data)[output_columns]) out = result.bind() out._nodes = projected_nodes diff --git a/graphistry/compute/gfql_unified.py b/graphistry/compute/gfql_unified.py index 3fb03c6803..d89cdc0c0c 100644 --- a/graphistry/compute/gfql_unified.py +++ b/graphistry/compute/gfql_unified.py @@ -856,7 +856,15 @@ def _execute_compiled_query_chain_non_union( empty_result_row=compiled_query.empty_result_row, ) if compiled_query.result_projection is not None: - result = apply_result_projection(result, compiled_query.result_projection) + # OPTIONAL null-fill / row-guard still consumes a single-column entity value, + # so those keep the legacy text form; plain terminal RETURN flattens (#1650). + structured_projection = ( + compiled_query.optional_projection_row_guard is None + and compiled_query.optional_null_fill is None + ) + result = apply_result_projection( + result, compiled_query.result_projection, structured=structured_projection + ) if compiled_query.optional_projection_row_guard is not None: expected_rows = 1 for base_chain in compiled_query.optional_projection_row_guard.base_chains: @@ -892,6 +900,7 @@ def _execute_compiled_query_chain_non_union( context, ), compiled_query.optional_null_fill.alignment_projection, + structured=False, ) result = _apply_optional_null_fill( result, diff --git a/graphistry/tests/compute/gfql/cypher/_whole_entity_compat.py b/graphistry/tests/compute/gfql/cypher/_whole_entity_compat.py new file mode 100644 index 0000000000..8dd0089ffe --- /dev/null +++ b/graphistry/tests/compute/gfql/cypher/_whole_entity_compat.py @@ -0,0 +1,41 @@ +"""Test helper for the #1650 structured-return change. + +`RETURN a` (whole entity) now emits flattened `a.id, a.val, ...` columns instead +of a Cypher display string. This shim renders those flattened columns back to the +display string so existing conformance assertions (which encode the Cypher text +form) keep verifying render fidelity with minimal churn — the same role the +tck-gfql harness plays for the external suite. + +Use `entity_text_records(result, {"a": "nodes"})` and compare to the pre-#1650 +expected list-of-dicts. Scalar / expression output columns pass through unchanged. +""" +from __future__ import annotations + +from typing import Any, Dict, List + +import pandas as pd + +from graphistry.compute.gfql.cypher.result_postprocess import render_entity_text + + +def _to_pandas(series: Any) -> Any: + if hasattr(series, "to_pandas"): + series = series.to_pandas() + return series.reset_index(drop=True) + + +def entity_text_records(result: Any, entities: Dict[str, str]) -> List[Dict[str, Any]]: + """Records with whole-entity columns rendered to Cypher text, scalars passed through. + + `entities` maps each whole-entity output name to its table ("nodes"/"edges"). + """ + nodes = result._nodes + prefixes = tuple(f"{name}." for name in entities) + data: Dict[str, Any] = { + name: _to_pandas(render_entity_text(result, name, table=table)) + for name, table in entities.items() + } + for col in nodes.columns: + if not str(col).startswith(prefixes): + data[str(col)] = _to_pandas(nodes[col]) + return pd.DataFrame(data).to_dict(orient="records") diff --git a/graphistry/tests/compute/gfql/cypher/test_lowering.py b/graphistry/tests/compute/gfql/cypher/test_lowering.py index bdbcdc49bc..e66c39f0d3 100644 --- a/graphistry/tests/compute/gfql/cypher/test_lowering.py +++ b/graphistry/tests/compute/gfql/cypher/test_lowering.py @@ -45,6 +45,7 @@ from graphistry.compute.gfql.ir.compilation import PlanContext from graphistry.compute.gfql.ir.logical_plan import CHILD_SLOTS, Filter, PatternMatch, ProcedureCall as LogicalProcedureCall from graphistry.tests.test_compute import CGFull +from graphistry.tests.compute.gfql.cypher._whole_entity_compat import entity_text_records class _CypherTestGraph(CGFull): @@ -1815,7 +1816,7 @@ def test_string_cypher_supports_cartesian_with_stage_identity_join_whole_row_pro pd.DataFrame({"s": [], "d": []}), ).gfql("MATCH (a), (b) WITH a, b WHERE a = b RETURN a, b ORDER BY a.id") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes", "b": "nodes"}) == [ {"a": "(:A)", "b": "(:A)"}, {"a": "(:B)", "b": "(:B)"}, ] @@ -1834,7 +1835,7 @@ def test_string_cypher_supports_cartesian_with_stage_property_join_whole_row_pro pd.DataFrame({"s": [], "d": []}), ).gfql("MATCH (a:A), (b:B) WITH a, b WHERE a.k = b.k RETURN a, b") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes", "b": "nodes"}) == [ {"a": "(:A {k: 2})", "b": "(:B {k: 2})"}, ] @@ -1851,7 +1852,7 @@ def test_string_cypher_supports_cartesian_with_stage_inequality_join_whole_row_p pd.DataFrame({"s": [], "d": []}), ).gfql("MATCH (a), (b) WITH a, b WHERE a <> b RETURN a, b ORDER BY a.id, b.id") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes", "b": "nodes"}) == [ {"a": "(:A)", "b": "(:B)"}, {"a": "(:B)", "b": "(:A)"}, ] @@ -1886,7 +1887,7 @@ def test_string_cypher_supports_cartesian_node_identity_join_with_whole_row_proj pd.DataFrame({"s": [], "d": []}), ).gfql("MATCH (a), (b) WHERE a = b RETURN a, b ORDER BY a.id") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes", "b": "nodes"}) == [ {"a": "(:A)", "b": "(:A)"}, {"a": "(:B)", "b": "(:B)"}, ] @@ -1910,7 +1911,7 @@ def test_string_cypher_supports_cartesian_node_property_join_with_whole_row_proj pd.DataFrame({"s": [], "d": []}), ).gfql("MATCH (a:A), (b:B) WHERE a.k = b.k RETURN a, b") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes", "b": "nodes"}) == [ {"a": "(:A {k: 2})", "b": "(:B {k: 2})"}, ] @@ -1927,7 +1928,7 @@ def test_string_cypher_supports_cartesian_whole_row_projection_aliases() -> None pd.DataFrame({"s": [], "d": []}), ).gfql("MATCH (a), (b) WHERE a = b RETURN a AS left, b AS right ORDER BY left.id") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"left": "nodes", "right": "nodes"}) == [ {"left": "(:A)", "right": "(:A)"}, {"left": "(:B)", "right": "(:B)"}, ] @@ -2359,7 +2360,7 @@ def test_string_cypher_formats_single_node_entity_projection() -> None: result = _mk_graph(nodes, edges).gfql("MATCH (p) RETURN p") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"p": "nodes"}) == [ {"p": "(:Person {name: 'Alice', score: 2})"} ] entity_meta = getattr(result, "_cypher_entity_projection_meta") @@ -2383,7 +2384,7 @@ def test_string_cypher_formats_single_edge_entity_projection() -> None: result = _mk_graph(nodes, edges).gfql("MATCH ()-[r]->() RETURN r") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"r": "edges"}) == [ {"r": "[:KNOWS {weight: 5}]"} ] @@ -2505,8 +2506,9 @@ def test_string_cypher_formats_filtered_edge_entity_projection_on_cudf() -> None engine="cudf", ) - pdf = _to_pandas_df(result._nodes).sort_values("r").reset_index(drop=True) - assert pdf.to_dict(orient="records") == [ + assert sorted( + entity_text_records(result, {"r": "edges"}), key=lambda row: row["r"] + ) == [ {"r": "[:HATES]"}, {"r": "[:KNOWS]"}, ] @@ -2548,7 +2550,7 @@ def test_string_cypher_formats_optional_match_projection_on_cudf() -> None: engine="cudf", ) - assert _to_pandas_df(result._nodes).to_dict(orient="records") == [ + assert entity_text_records(result, {"m": "nodes"}) == [ {"m": "(:A {num: 42})"} ] @@ -2571,7 +2573,7 @@ def test_string_cypher_formats_small_float_node_entity_projection_on_cudf() -> N result = _mk_graph(nodes, edges).gfql("MATCH (a) RETURN a", engine="cudf") - assert _to_pandas_df(result._nodes).to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:B {num: 30.94857, num2: 0.00002})"} ] @@ -2582,7 +2584,7 @@ def test_string_cypher_formats_single_node_entity_projection_with_alias() -> Non result = _mk_graph(nodes, edges).gfql("MATCH (a) RETURN a AS ColumnName") - assert result._nodes.to_dict(orient="records") == [{"ColumnName": "(:A)"}] + assert entity_text_records(result, {"ColumnName": "nodes"}) == [{"ColumnName": "(:A)"}] entity_meta = getattr(result, "_cypher_entity_projection_meta") assert entity_meta["ColumnName"]["ids"].tolist() == ["a"] @@ -2694,7 +2696,7 @@ def test_issue_1411_connected_join_whole_row_projection_shape() -> None: "RETURN city" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"city": "nodes"}) == [ {"city": "(:Place {name: 'City'})"} ] entity_meta = getattr(result, "_cypher_entity_projection_meta") @@ -2791,7 +2793,7 @@ def test_string_cypher_formats_mixed_node_entity_projection() -> None: "MATCH (p:Person) RETURN p AS person, p.name AS person_name ORDER BY person_name DESC LIMIT 1" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"person": "nodes"}) == [ {"person": "(:Person {name: 'Bob', score: 9})", "person_name": "Bob"} ] @@ -2808,7 +2810,7 @@ def test_string_cypher_formats_mixed_node_entity_and_null_predicate_projection() result = _mk_graph(nodes, edges).gfql("MATCH (n:X) RETURN n, n.prop IS NULL AS b") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"n": "nodes"}) == [ {"n": "(:X {prop: 42})", "b": False}, {"n": "(:X)", "b": True}, ] @@ -2826,7 +2828,7 @@ def test_string_cypher_formats_mixed_node_entity_and_not_null_predicate_projecti result = _mk_graph(nodes, edges).gfql("MATCH (n:X) RETURN n, n.prop IS NOT NULL AS b") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"n": "nodes"}) == [ {"n": "(:X {prop: 42})", "b": True}, {"n": "(:X)", "b": False}, ] @@ -2846,7 +2848,7 @@ def test_string_cypher_formats_mixed_edge_entity_projection() -> None: result = _mk_graph(nodes, edges).gfql("MATCH ()-[r]->() RETURN r AS rel, type(r) AS rel_type") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"rel": "edges"}) == [ {"rel": "[:KNOWS {weight: 5}]", "rel_type": "KNOWS"} ] @@ -2990,7 +2992,7 @@ def test_string_cypher_orders_distinct_whole_row_by_missing_property() -> None: result = graph.gfql("MATCH (a)-->(b) RETURN DISTINCT b ORDER BY b.name") - assert result._nodes.to_dict(orient="records") == [{"b": "(:B)"}] + assert entity_text_records(result, {"b": "nodes"}) == [{"b": "(:B)"}] @pytest.mark.parametrize( @@ -3033,7 +3035,7 @@ def test_string_cypher_formats_match_node_without_null_type_label() -> None: result = graph.gfql("MATCH (n {name: 'bar'}) RETURN n") - assert result._nodes.to_dict(orient="records") == [{"n": "({name: 'bar'})"}] + assert entity_text_records(result, {"n": "nodes"}) == [{"n": "({name: 'bar'})"}] def test_string_cypher_ignores_placeholder_label_columns_in_entity_rendering() -> None: @@ -3050,7 +3052,7 @@ def test_string_cypher_ignores_placeholder_label_columns_in_entity_rendering() - result = graph.gfql("MATCH (n {name: 'bar'}) RETURN n") - assert result._nodes.to_dict(orient="records") == [{"n": "({name: 'bar'})"}] + assert entity_text_records(result, {"n": "nodes"}) == [{"n": "({name: 'bar'})"}] def test_string_cypher_formats_numeric_id_as_entity_property() -> None: @@ -3061,7 +3063,7 @@ def test_string_cypher_formats_numeric_id_as_entity_property() -> None: result = graph.gfql("MATCH (n) RETURN DISTINCT n ORDER BY n.id") - assert result._nodes.to_dict(orient="records") == [{"n": "({id: 1})"}, {"n": "({id: 10})"}] + assert entity_text_records(result, {"n": "nodes"}) == [{"n": "({id: 1})"}, {"n": "({id: 10})"}] def test_string_cypher_formats_small_float_entity_properties_without_scientific_notation() -> None: @@ -3077,7 +3079,7 @@ def test_string_cypher_formats_small_float_entity_properties_without_scientific_ result = graph.gfql("MATCH (n) RETURN n ORDER BY n.num DESC") - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"n": "nodes"}) == [ {"n": "({num: 30.94857})"}, {"n": "({num: 0.00002})"}, {"n": "({num: -0.00002})"}, @@ -3092,7 +3094,7 @@ def test_string_cypher_supports_return_star_with_order_by() -> None: result = graph.gfql("MATCH (n) RETURN * ORDER BY n.id") - assert result._nodes.to_dict(orient="records") == [{"n": "({id: 1})"}, {"n": "({id: 10})"}] + assert entity_text_records(result, {"n": "nodes"}) == [{"n": "({id: 1})"}, {"n": "({id: 10})"}] def test_string_cypher_supports_return_label_predicate_expression() -> None: @@ -3192,7 +3194,7 @@ def test_string_cypher_supports_generic_match_where_boolean_expression() -> None result = graph.gfql("MATCH (n)\nWHERE NOT(n.name = 'apa' AND false)\nRETURN n") - assert result._nodes.to_dict(orient="records") == [{"n": "({name: 'a'})"}] + assert entity_text_records(result, {"n": "nodes"}) == [{"n": "({name: 'a'})"}] def test_string_cypher_executes_searched_case_projection() -> None: @@ -3567,6 +3569,8 @@ def test_string_cypher_supports_whole_row_grouping_with_post_aggregate_expressio result = graph.gfql("MATCH (a) RETURN a, count(a) + 3") + # Aggregate/grouping projection still renders the entity as a single text + # column via a separate path (not the structured #1650 terminal-RETURN path). assert result._nodes.to_dict(orient="records") == [{"a": "()", "count(a) + 3": 4}] @@ -3583,6 +3587,7 @@ def test_string_cypher_supports_whole_row_grouping_with_count_star() -> None: result = graph.gfql("MATCH (a:L) RETURN a, count(*)") + # Aggregate/grouping projection renders entity text via a separate path. assert result._nodes.to_dict(orient="records") == [{"a": "(:L)", "count(*)": 1}] @@ -3839,6 +3844,7 @@ def test_string_cypher_supports_with_whole_row_grouping_then_return() -> None: "RETURN x, c" ) + # Aggregate/grouping projection renders entity text via a separate path. assert result._nodes.to_dict(orient="records") == [{"x": "(:X)", "c": 1}] @@ -4042,7 +4048,7 @@ def test_string_cypher_supports_bare_label_predicate_in_with_where() -> None: result = graph.gfql("MATCH (:Root {name: 'x'})-->(i:TextNode) WITH i WHERE i.var > 'te' AND i:TextNode RETURN i") - assert result._nodes.to_dict(orient="records") == [{"i": "(:TextNode {var: 'tf'})"}] + assert entity_text_records(result, {"i": "nodes"}) == [{"i": "(:TextNode {var: 'tf'})"}] # OR/NOT WHERE shapes — Earley admits them where LALR rejected. Pandas @@ -4069,21 +4075,21 @@ def _or_where_graph() -> "_CypherTestGraph": def test_string_cypher_executes_disjunctive_property_predicate_returns_union() -> None: result = _or_where_graph().gfql("MATCH (n) WHERE n.p1 = 12 OR n.p2 = 13 RETURN n") - rendered = sorted(row["n"] for row in result._nodes.to_dict(orient="records")) + rendered = sorted(row["n"] for row in entity_text_records(result, {"n": "nodes"})) assert rendered == ["(:A {p1: 12})", "(:B {p2: 13})"] def test_string_cypher_executes_disjunctive_same_alias_property_predicate() -> None: result = _or_where_graph().gfql("MATCH (n) WHERE n.p1 = 12 OR n.p1 = 99 RETURN n") - rendered = sorted(row["n"] for row in result._nodes.to_dict(orient="records")) + rendered = sorted(row["n"] for row in entity_text_records(result, {"n": "nodes"})) assert rendered == ["(:A {p1: 12})"] def test_string_cypher_executes_negation_property_predicate_returns_complement() -> None: result = _or_where_graph().gfql("MATCH (n) WHERE NOT n.p1 = 12 RETURN n") - rendered = sorted(row["n"] for row in result._nodes.to_dict(orient="records")) + rendered = sorted(row["n"] for row in entity_text_records(result, {"n": "nodes"})) assert rendered == [] @@ -4092,7 +4098,7 @@ def test_string_cypher_executes_disjunctive_then_conjunction() -> None: "MATCH (n) WHERE (n.p1 = 12 OR n.p2 = 13) AND n.id = 'a' RETURN n" ) - rendered = [row["n"] for row in result._nodes.to_dict(orient="records")] + rendered = [row["n"] for row in entity_text_records(result, {"n": "nodes"})] assert rendered == ["(:A {p1: 12})"] @@ -6227,7 +6233,12 @@ def test_string_cypher_pattern_predicates_are_existence_checks_not_row_expansion if engine == "cudf": assert type(result._nodes).__module__.startswith("cudf") - assert _to_pandas_df(result._nodes).to_dict(orient="records") == expected_rows + _entities = { + name: ("edges" if str(val).startswith("[") else "nodes") + for name, val in (expected_rows[0].items() if expected_rows else []) + if str(val).startswith(("(", "[")) + } + assert entity_text_records(result, _entities) == expected_rows @pytest.mark.parametrize( @@ -6724,6 +6735,8 @@ def test_string_cypher_supports_optional_match_optional_alias_projection_when_al result = graph.gfql("MATCH (a:Single), (c:C) OPTIONAL MATCH (a)-->(b)-->(c) RETURN b") + # OPTIONAL row-guard path keeps whole entities as text (structured #1650 flip + # is gated off for the reentry/optional machinery; unify in follow-up). assert result._nodes.to_dict(orient="records") == [{"b": "(:A {num: 42})"}] @@ -6838,7 +6851,7 @@ def test_string_cypher_formats_temporal_constructor_properties_in_entity_project "MATCH (a) WITH a, a.date AS date WITH a, date ORDER BY date ASC LIMIT 2 RETURN a, date" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {date: '1910-05-06'})", "date": "1910-05-06"}, {"a": "(:A {date: '1980-10-24'})", "date": "1980-10-24"}, ] @@ -6864,7 +6877,7 @@ def test_string_cypher_orders_temporal_constructor_time_properties() -> None: "MATCH (a) WITH a, a.time AS time WITH a, time ORDER BY time ASC LIMIT 3 RETURN a, time" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {time: '12:35:15+05:00'})", "time": "12:35:15+05:00"}, {"a": "(:A {time: '12:30:14.645876123+01:01'})", "time": "12:30:14.645876123+01:01"}, {"a": "(:A {time: '12:31:14.645876123+01:00'})", "time": "12:31:14.645876123+01:00"}, @@ -6909,7 +6922,7 @@ def test_string_cypher_orders_time_plus_duration_expression() -> None: "MATCH (a) WITH a ORDER BY a.time + duration({minutes: 6}) ASC LIMIT 3 RETURN a" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {time: '12:35:15+05:00'})"}, {"a": "(:A {time: '12:30:14.645876123+01:01'})"}, {"a": "(:A {time: '12:31:14.645876123+01:00'})"}, @@ -6936,7 +6949,7 @@ def test_string_cypher_orders_datetime_plus_duration_expression() -> None: "MATCH (a) WITH a ORDER BY a.datetime + duration({days: 4, minutes: 6}) ASC LIMIT 3 RETURN a" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {datetime: '0001-01-01T01:01:01.000000001-11:59'})"}, {"a": "(:A {datetime: '1980-12-11T12:31:14-11:59'})"}, {"a": "(:A {datetime: '1984-10-11T12:31:14.645876123+00:17'})"}, @@ -6964,7 +6977,7 @@ def test_string_cypher_orders_date_plus_duration_expression() -> None: "MATCH (a) WITH a ORDER BY a.date + duration({months: 1, days: 2}) ASC LIMIT 2 RETURN a" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {date: '1910-05-06'})"}, {"a": "(:A {date: '1980-10-24'})"}, ] @@ -6984,7 +6997,7 @@ def test_string_cypher_formats_list_literal_strings_in_entity_projection() -> No "MATCH (a) WITH a, a.list AS list WITH a, list ORDER BY list ASC LIMIT 2 RETURN a, list" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {list: [1, 2]})", "list": "[1, 2]"}, {"a": "(:A {list: [2, -2]})", "list": "[2, -2]"}, ] @@ -7162,7 +7175,7 @@ def test_string_cypher_supports_labels_projection_and_relationship_label_predica ] rel_result = graph.gfql("MATCH ()-[r]->() RETURN r, r:T2 AS result ORDER BY result") - assert sorted(rel_result._nodes.to_dict(orient="records"), key=lambda row: row["r"]) == [ + assert sorted(entity_text_records(rel_result, {"r": "edges"}), key=lambda row: row["r"]) == [ {"r": "[:T1]", "result": False}, {"r": "[:T2]", "result": True}, ] @@ -7347,6 +7360,8 @@ def test_string_cypher_supports_bound_optional_match_whole_row_with_scalar_proje result = graph.gfql("MATCH (a) OPTIONAL MATCH (a)-[r:T]->(b) RETURN b, b.id + '!' AS label") + # OPTIONAL-MATCH null-fill path still renders whole entities as text (the + # structured #1650 flip is gated off for reentry; unification is a follow-up). assert result._nodes.to_dict(orient="records") == [ {"b": "()", "label": "b!"}, {"b": None, "label": None}, @@ -9181,7 +9196,7 @@ def test_string_cypher_executes_match_with_then_return_pipeline() -> None: "MATCH (a:A) WITH a ORDER BY a.score DESC LIMIT 2 RETURN a" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {score: 9})"}, {"a": "(:A {score: 5})"}, ] @@ -9202,7 +9217,7 @@ def test_string_cypher_executes_match_with_expression_order_pipeline() -> None: "MATCH (a) WITH a ORDER BY NOT (a.bool AND a.bool2) LIMIT 2 RETURN a" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {bool: true, bool2: true})"}, {"a": "(:D {bool: true, bool2: true})"}, ] @@ -9248,7 +9263,7 @@ def test_string_cypher_executes_match_with_constant_expression_order_pipeline() "RETURN a" ) - assert result._nodes.to_dict(orient="records") == [{"a": "({num: 1, text: 'a'})"}] + assert entity_text_records(result, {"a": "nodes"}) == [{"a": "({num: 1, text: 'a'})"}] def test_string_cypher_executes_match_with_mixed_whole_row_bool_alias_pipeline() -> None: @@ -9270,7 +9285,7 @@ def test_string_cypher_executes_match_with_mixed_whole_row_bool_alias_pipeline() "RETURN a, bool" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:B {bool: false})", "bool": False}, {"a": "(:C {bool: false})", "bool": False}, {"a": "(:E {bool: false})", "bool": False}, @@ -9296,7 +9311,7 @@ def test_string_cypher_executes_match_with_mixed_whole_row_numeric_alias_pipelin "RETURN a, num" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:E {num: 7054})", "num": 7054}, {"a": "(:C {num: 30})", "num": 30}, {"a": "(:A {num: 9})", "num": 9}, @@ -9321,7 +9336,7 @@ def test_string_cypher_executes_match_with_mixed_whole_row_computed_alias_pipeli "RETURN a, score" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:C {num: 3})", "score": 4}, {"a": "(:B {num: 2})", "score": 3}, {"a": "(:A {num: 1})", "score": 2}, @@ -9347,7 +9362,7 @@ def test_string_cypher_executes_with_orderby4_style_mixed_whole_row_pipeline() - "RETURN a, sum, mod" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"a": "nodes"}) == [ {"a": "(:A {num: 4, num2: 1})", "sum": 5, "mod": 1}, {"a": "(:A {num: 3, num2: 4})", "sum": 7, "mod": 1}, {"a": "(:A {num: 2, num2: 2})", "sum": 4, "mod": 2}, @@ -9373,7 +9388,7 @@ def test_string_cypher_executes_with_match_reentry_limit_shape() -> None: "MATCH (a:A) WITH a ORDER BY a.name LIMIT 1 MATCH (a)-->(b) RETURN a" ) - assert result._nodes.to_dict(orient="records") == [{"a": "(:A {name: 'alpha'})"}] + assert entity_text_records(result, {"a": "nodes"}) == [{"a": "(:A {name: 'alpha'})"}] def test_string_cypher_executes_with_match_reentry_ordered_topk_multi_row_shape() -> None: @@ -9484,7 +9499,7 @@ def test_string_cypher_executes_with_match_reentry_parameterized_limit_shape() - "MATCH (a:A) WITH a ORDER BY a.name LIMIT $n MATCH (a)-->(b) RETURN a", params={"n": 1}, ) - assert result._nodes.to_dict(orient="records") == [{"a": "(:A {name: 'alpha'})"}] + assert entity_text_records(result, {"a": "nodes"}) == [{"a": "(:A {name: 'alpha'})"}] def test_string_cypher_rejects_reentry_with_parameterized_non_int_limit_and_order() -> None: @@ -9531,7 +9546,7 @@ def test_string_cypher_executes_with_match_reentry_limit_shape_on_cudf() -> None ) assert type(result._nodes).__module__.startswith("cudf") - assert _to_pandas_df(result._nodes).to_dict(orient="records") == [{"a": "(:A {name: 'alpha'})"}] + assert entity_text_records(result, {"a": "nodes"}) == [{"a": "(:A {name: 'alpha'})"}] def test_string_cypher_executes_with_match_reentry_ordered_topk_multi_row_shape_on_cudf() -> None: @@ -9602,7 +9617,7 @@ def test_string_cypher_executes_with_match_reentry_parameterized_limit_shape_on_ ) assert type(result._nodes).__module__.startswith("cudf") - assert _to_pandas_df(result._nodes).to_dict(orient="records") == [{"a": "(:A {name: 'alpha'})"}] + assert entity_text_records(result, {"a": "nodes"}) == [{"a": "(:A {name: 'alpha'})"}] def test_string_cypher_failfast_rejects_with_match_reentry_ordered_skip_shape() -> None: @@ -9721,7 +9736,12 @@ def test_compile_cypher_tracks_reentry_carried_scalar_columns( ) def test_string_cypher_executes_with_match_reentry_carried_scalar_shapes(query: str, expected: List[Dict[str, Any]]) -> None: result = _mk_reentry_carried_scalar_graph().gfql(query) - assert result._nodes.to_dict(orient="records") == expected + _entities = { + name: ("edges" if str(val).startswith("[") else "nodes") + for name, val in (expected[0].items() if expected else []) + if str(val).startswith(("(", "[")) + } + assert entity_text_records(result, _entities) == expected @pytest.mark.parametrize( @@ -9751,7 +9771,12 @@ def test_string_cypher_executes_with_match_reentry_carried_scalar_shapes_on_cudf result = _mk_reentry_carried_scalar_graph_cudf().gfql(query, engine="cudf") assert type(result._nodes).__module__.startswith("cudf") - assert _to_pandas_df(result._nodes).to_dict(orient="records") == expected + _entities = { + name: ("edges" if str(val).startswith("[") else "nodes") + for name, val in (expected[0].items() if expected else []) + if str(val).startswith(("(", "[")) + } + assert entity_text_records(result, _entities) == expected def test_string_cypher_executes_with_match_reentry_carried_scalars_from_connected_prefix_shape() -> None: @@ -11651,7 +11676,7 @@ def test_string_cypher_executes_connected_whole_row_plus_scalar_projection() -> "RETURN c, b.id AS bid" ) - assert result._nodes.to_dict(orient="records") == [{"c": "(:C)", "bid": "b"}] + assert entity_text_records(result, {"c": "nodes"}) == [{"c": "(:C)", "bid": "b"}] def test_string_cypher_executes_multi_stage_with_match_reentry_connected_shape() -> None: @@ -12675,7 +12700,7 @@ def test_multi_alias_with_stage_whole_row_projection_executes_for_joined_row_pro pd.DataFrame({"s": ["n1", "n2"], "d": ["x1", "x2"], "type": ["R", "R"]}), ) result = g.gfql("MATCH (n)-[rel]->(x) WITH n, x WHERE n.animal = x.animal RETURN n, x") - records = result._nodes.to_dict(orient="records") + records = entity_text_records(result, {"n": "nodes", "x": "nodes"}) assert len(records) == 1 assert "cat" in records[0]["n"] assert "cat" in records[0]["x"] @@ -12686,7 +12711,7 @@ def test_string_cypher_executes_connected_multi_pattern_multi_whole_row_joined_p "MATCH (b:B)-[:S]->(c:C), (c)-[:T]->(d:D) RETURN b, c, d.id AS did ORDER BY did" ) - records = result._nodes.to_dict(orient="records") + records = entity_text_records(result, {"b": "nodes", "c": "nodes"}) assert records == [ {"b": "(:B)", "c": "(:C)", "did": "d1"}, {"b": "(:B)", "c": "(:C)", "did": "d2"}, @@ -12704,7 +12729,7 @@ def test_multi_alias_connected_whole_row_return_with_cross_alias_where_executes_ pd.DataFrame({"s": ["n1", "n2"], "d": ["x1", "x2"], "type": ["R", "R"]}), ) result = g.gfql("MATCH (n)-[rel]->(x) WHERE n.animal = x.animal RETURN n, x") - assert result._nodes.to_dict(orient="records") == [{"n": "({animal: 'cat'})", "x": "({animal: 'cat'})"}] + assert entity_text_records(result, {"n": "nodes", "x": "nodes"}) == [{"n": "({animal: 'cat'})", "x": "({animal: 'cat'})"}] def test_multi_alias_connected_cross_alias_where_scalar_projection_remains_supported() -> None: @@ -12732,7 +12757,7 @@ def test_multi_alias_connected_cross_alias_where_single_whole_row_projection_rem pd.DataFrame({"s": ["n1", "n2"], "d": ["x1", "x2"], "type": ["R", "R"]}), ) result = g.gfql("MATCH (n)-[rel]->(x) WHERE n.animal = x.animal RETURN n, x.id AS x_id") - assert result._nodes.to_dict(orient="records") == [{"n": "({animal: 'cat'})", "x_id": "x1"}] + assert entity_text_records(result, {"n": "nodes"}) == [{"n": "({animal: 'cat'})", "x_id": "x1"}] def test_compile_cypher_tracks_seeded_top_level_row_query() -> None: @@ -12915,7 +12940,8 @@ def test_gfql_preserves_group_order_for_aggregate_order_ties_on_cudf() -> None: engine="cudf", ) - assert _to_pandas_df(result._nodes).to_dict(orient="records") == [ + # Aggregate/grouping projection renders entity text via a separate path. + assert result._nodes.to_pandas().to_dict(orient="records") == [ {"a": "(:L1)", "count(*)": 1}, {"a": "(:L2)", "count(*)": 1}, {"a": "(:L3)", "count(*)": 1}, @@ -13336,7 +13362,7 @@ def test_gfql_executes_with_where_or_short_circuit_over_mixed_type_compare() -> "ORDER BY i.id" ) - assert result._nodes.to_dict(orient="records") == [ + assert entity_text_records(result, {"i": "nodes"}) == [ {"i": "(:TextNode {var: 'text'})"}, {"i": "(:IntNode {var: 0})"}, ] diff --git a/graphistry/tests/compute/gfql/cypher/test_result_postprocess.py b/graphistry/tests/compute/gfql/cypher/test_result_postprocess.py index 8d7fb6e61a..58e368f397 100644 --- a/graphistry/tests/compute/gfql/cypher/test_result_postprocess.py +++ b/graphistry/tests/compute/gfql/cypher/test_result_postprocess.py @@ -1,13 +1,17 @@ from __future__ import annotations +from typing import Any + import pandas as pd import graphistry from graphistry.compute.gfql.cypher.lowering import ResultProjectionColumn, ResultProjectionPlan from graphistry.compute.gfql.cypher.result_postprocess import apply_result_projection +from graphistry.tests.compute.gfql.cypher._whole_entity_compat import entity_text_records -def test_apply_result_projection_preserves_prefixed_whole_row_metadata() -> None: + +def _project_alice() -> Any: rows = pd.DataFrame( { "id": ["a"], @@ -18,8 +22,7 @@ def test_apply_result_projection_preserves_prefixed_whole_row_metadata() -> None } ) result = graphistry.bind(node="id").nodes(rows) - - out = apply_result_projection( + return apply_result_projection( result, ResultProjectionPlan( alias="n", @@ -31,7 +34,26 @@ def test_apply_result_projection_preserves_prefixed_whole_row_metadata() -> None ), ) - assert out._nodes.to_dict(orient="records") == [{"node": "(:Person {name: 'Alice'})", "name": "Alice"}] + +def test_apply_result_projection_emits_flat_whole_row_columns() -> None: + # #1650: whole-entity projection is structured (one column per field), not a + # Cypher display string. The scalar `name` projection passes through alongside. + out = _project_alice() + assert out._nodes.to_dict(orient="records") == [ + {"node.id": "a", "node.name": "Alice", "node.label__Person": True, "name": "Alice"} + ] + + +def test_apply_result_projection_renders_whole_row_text_via_helper() -> None: + # The structured columns losslessly reconstruct the pre-#1650 Cypher text form. + out = _project_alice() + assert entity_text_records(out, {"node": "nodes"}) == [ + {"node": "(:Person {name: 'Alice'})", "name": "Alice"} + ] + + +def test_apply_result_projection_preserves_prefixed_whole_row_metadata() -> None: + out = _project_alice() assert out._cypher_entity_projection_meta["node"]["table"] == "nodes" assert out._cypher_entity_projection_meta["node"]["alias"] == "n" assert out._cypher_entity_projection_meta["node"]["id_column"] == "id" diff --git a/graphistry/tests/compute/test_gfql.py b/graphistry/tests/compute/test_gfql.py index 51dd50a6bf..002d127ee1 100644 --- a/graphistry/tests/compute/test_gfql.py +++ b/graphistry/tests/compute/test_gfql.py @@ -246,8 +246,10 @@ def test_gfql_with_cypher_string_defaults_language_to_cypher(self): result = g.gfql("MATCH (p:person) RETURN p LIMIT 1") + # structured whole-entity returns (#1650): RETURN p flattens to p.* columns + # (was the display string '(:person)' before the structured-returns change). assert len(result._nodes) == 1 - assert result._nodes.iloc[0]["p"] == "(:person)" + assert result._nodes.iloc[0]["p.type"] == "person" def test_gfql_string_invalid_syntax_surfaces_parser_error(self): g = _mk_people_company_graph3() @@ -401,11 +403,38 @@ def test_gfql_executes_cypher_union_of_whole_row_entity_outputs(self): result = g.gfql("MATCH (a:A) RETURN a AS a UNION MATCH (b:B) RETURN b AS a") + # structured whole-entity returns (#1650): RETURN a AS a flattens to a.* columns + # (was the display string '(:A)'/'(:B)' before the structured-returns change). assert result._nodes.to_dict(orient="records") == [ - {"a": "(:A)"}, - {"a": "(:B)"}, + {"a.id": "a", "a.type": "A"}, + {"a.id": "b", "a.type": "B"}, ] + def test_gfql_whole_entity_plus_property_projection_no_duplicate_column(self): + """#1650: `RETURN a, a.val` must not emit a duplicate `a.val` column. The + whole-entity flatten (`a` -> a.id, a.val, ...) shares the `{alias}.{field}` + namespace with the explicit `a.val` projection; the duplicate is collapsed + (identical data) so selection + serialization stay well-formed.""" + nodes_df = pd.DataFrame({"id": ["x", "y"], "val": [1, 2]}) + g = CGFull().nodes(nodes_df, "id").edges(pd.DataFrame({"s": ["x"], "d": ["y"]}), "s", "d") + for q in ("MATCH (a) RETURN a, a.val", "MATCH (a) RETURN a.val, a", "MATCH (a) RETURN a, a.id"): + cols = list(g.gfql(q)._nodes.columns) + assert len(cols) == len(set(cols)), f"duplicate column for {q!r}: {cols}" + assert "a.val" in cols and "a.id" in cols + # round-trips without the "columns are not unique" data-dropping warning + recs = g.gfql(q)._nodes.to_dict(orient="records") + assert recs[0]["a.val"] in (1, 2) + + def test_gfql_whole_entity_with_no_fields_falls_back_to_text(self): + """#1650 boundary: a whole entity with no flattenable field (edge with no + edge-id binding, no properties, no type) has nothing to flatten, so it emits + the single Cypher-display-text column under the bare alias. Value is correct + (`[]`); documented in CHANGELOG. Nodes always carry an id field and flatten.""" + g = CGFull().edges(pd.DataFrame({"s": ["x", "y"], "d": ["y", "z"]}), "s", "d") + out = g.gfql("MATCH (a)-[e]->(b) RETURN e")._nodes + assert list(out.columns) == ["e"] + assert out["e"].tolist() == ["[]", "[]"] + def test_gfql_rejects_cypher_union_with_mismatched_columns(self): with pytest.raises(GFQLValidationError) as exc_info: _mk_empty_graph().gfql("RETURN 1 AS a UNION RETURN 2 AS b") diff --git a/graphistry/tests/test_policy_hooks.py b/graphistry/tests/test_policy_hooks.py index 72db1e47cd..9ade653bc3 100644 --- a/graphistry/tests/test_policy_hooks.py +++ b/graphistry/tests/test_policy_hooks.py @@ -91,6 +91,29 @@ def postload_policy(context: PolicyContext) -> None: assert out._node is not None, f"{ops_label}: node-id binding lost" assert None not in list(out._nodes.columns), f"{ops_label}: corrupt None column" + @pytest.mark.skipif("TEST_CUDF" not in __import__("os").environ, reason="cuDF lane: set TEST_CUDF=1 (e.g. dgx-spark)") + @pytest.mark.parametrize("ops_label,ops", [ + ("node_only", [n()]), + ("single_hop", [n(), e(), n()]), + ]) + def test_fast_path_shapes_still_invoke_postload_cudf(self, ops_label, ops): + """cuDF lane for the policy-gate + edges-only node-binding fix (chain.py is on + the cuDF-pairing list). The void-block crash is pandas-specific, but cuDF must + still fire postload and return a valid, non-corrupt result on an edges-only graph.""" + cudf = pytest.importorskip("cudf") + called = {'postload': False} + + def postload_policy(context: PolicyContext) -> None: + called['postload'] = True + + df = cudf.DataFrame({'s': ['a', 'b', 'c'], 'd': ['b', 'c', 'd']}) + g = graphistry.edges(df, 's', 'd') + + out = g.gfql(ops, policy={'postload': postload_policy}) + assert called['postload'], f"postload must fire for {ops_label} (cuDF)" + assert out._node is not None, f"{ops_label} (cuDF): node-id binding lost" + assert None not in list(out._nodes.columns), f"{ops_label} (cuDF): corrupt None column" + def test_precall_hook_called(self): """Test that precall hook is called for call operations.""" from graphistry.compute.ast import call