From 8605075902c7d8afc10bcf1a308d9a76a28bd2a2 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 22:17:15 +0000 Subject: [PATCH 01/39] refactor: deprecate and clean up multimodal blob APIs --- .../bigframes/bigquery/_operations/ai.py | 4 +- .../bigframes/bigframes/blob/_functions.py | 602 ---------- packages/bigframes/bigframes/dataframe.py | 53 +- packages/bigframes/bigframes/ml/llm.py | 4 +- .../bigframes/bigframes/operations/blob.py | 1029 +---------------- .../bigframes/bigframes/operations/strings.py | 29 +- packages/bigframes/bigframes/series.py | 77 +- .../bigframes/bigframes/session/__init__.py | 42 +- .../tests/system/large/blob/test_function.py | 853 -------------- .../sqlglot/expressions/test_blob_ops.py | 12 +- 10 files changed, 130 insertions(+), 2575 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/blob/test_function.py diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py index 7a509d4f95ff..6164c863b391 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ai.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ai.py @@ -1003,7 +1003,7 @@ def _separate_context_and_series( if isinstance(prompt, series.Series): if prompt.dtype == dtypes.OBJ_REF_DTYPE: # Multi-model support - return [None], [prompt.blob.read_url()] + return [None], [prompt._blob._read_url()] return [None], [prompt] prompt_context: List[str | None] = [] @@ -1040,7 +1040,7 @@ def _convert_series( if result.dtype == dtypes.OBJ_REF_DTYPE: # Support multimodel - return result.blob.read_url() + return result._blob._read_url() return result diff --git a/packages/bigframes/bigframes/blob/_functions.py b/packages/bigframes/bigframes/blob/_functions.py index 5114f60058c1..3869416d1244 100644 --- a/packages/bigframes/bigframes/blob/_functions.py +++ b/packages/bigframes/bigframes/blob/_functions.py @@ -124,605 +124,3 @@ def udf(self): # TODO(b/404605969): remove cleanups when UDF fixes dataset deletion. self._session._function_session._update_temp_artifacts(udf_name, "") return self._session.read_gbq_function(udf_name) - - -def exif_func(src_obj_ref_rt: str, verbose: bool) -> str: - try: - import io - import json - - import requests - from PIL import ExifTags, Image - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - image = Image.open(io.BytesIO(bts)) - exif_data = image.getexif() - exif_dict = {} - - if exif_data: - for tag, value in exif_data.items(): - tag_name = ExifTags.TAGS.get(tag, tag) - # Convert non-serializable types to strings - try: - json.dumps(value) - exif_dict[tag_name] = value - except (TypeError, ValueError): - exif_dict[tag_name] = str(value) - - if verbose: - return json.dumps({"status": "", "content": json.dumps(exif_dict)}) - else: - return json.dumps(exif_dict) - - except Exception as e: - # Return error as JSON with error field - error_result = {"status": f"{type(e).__name__}: {str(e)}", "content": "{}"} - if verbose: - return json.dumps(error_result) - else: - return "{}" - - -exif_func_def = FunctionDef(exif_func, ["pillow", "requests"]) - - -# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string. -def image_blur_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - ksize_x: int, - ksize_y: int, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() # Raise exception for HTTP errors - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - - img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y)) - - success, encoded = cv.imencode(ext, img_blurred) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={"Content-Type": content_type}, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"]) - - -def image_blur_to_bytes_func( - src_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str, verbose: bool -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y)) - success, encoded = cv.imencode(ext, img_blurred) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_blur_to_bytes_def = FunctionDef( - image_blur_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -def image_resize_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - dsize_x: int, - dsize_y: int, - fx: float, - fy: float, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy) - - success, encoded = cv.imencode(ext, img_resized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={ - "Content-Type": content_type, - }, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_resize_def = FunctionDef( - image_resize_func, ["opencv-python", "numpy", "requests"] -) - - -def image_resize_to_bytes_func( - src_obj_ref_rt: str, - dsize_x: int, - dsize_y: int, - fx: float, - fy: float, - ext: str, - verbose: bool, -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy) - success, encoded = cv.imencode(ext, img_resized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_resize_to_bytes_def = FunctionDef( - image_resize_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -def image_normalize_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - alpha: float, - beta: float, - norm_type: str, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - norm_type_mapping = { - "inf": cv.NORM_INF, - "l1": cv.NORM_L1, - "l2": cv.NORM_L2, - "minmax": cv.NORM_MINMAX, - } - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_normalized = cv.normalize( - img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type] - ) - - success, encoded = cv.imencode(ext, img_normalized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={ - "Content-Type": content_type, - }, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_normalize_def = FunctionDef( - image_normalize_func, ["opencv-python", "numpy", "requests"] -) - - -def image_normalize_to_bytes_func( - src_obj_ref_rt: str, - alpha: float, - beta: float, - norm_type: str, - ext: str, - verbose: bool, -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - norm_type_mapping = { - "inf": cv.NORM_INF, - "l1": cv.NORM_L1, - "l2": cv.NORM_L2, - "minmax": cv.NORM_MINMAX, - } - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_normalized = cv.normalize( - img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type] - ) - success, encoded = cv.imencode(ext, img_normalized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_normalize_to_bytes_def = FunctionDef( - image_normalize_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -# Extracts all text from a PDF url -def pdf_extract_func(src_obj_ref_rt: str, verbose: bool) -> str: - try: - import io - import json - - import requests - from pypdf import PdfReader # type: ignore - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30, stream=True) - response.raise_for_status() - pdf_bytes = response.content - - pdf_file = io.BytesIO(pdf_bytes) - reader = PdfReader(pdf_file, strict=False) - - all_text = "" - for page in reader.pages: - page_extract_text = page.extract_text() - if page_extract_text: - all_text += page_extract_text - - result_dict = {"status": "", "content": all_text} - - except Exception as e: - result_dict = {"status": str(e), "content": ""} - - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -pdf_extract_def = FunctionDef( - pdf_extract_func, ["pypdf>=5.3.1,<6.0.0", "requests", "cryptography==43.0.3"] -) - - -# Extracts text from a PDF url and chunks it simultaneously -def pdf_chunk_func( - src_obj_ref_rt: str, chunk_size: int, overlap_size: int, verbose: bool -) -> str: - try: - import io - import json - - import requests - from pypdf import PdfReader # type: ignore - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30, stream=True) - response.raise_for_status() - pdf_bytes = response.content - - pdf_file = io.BytesIO(pdf_bytes) - reader = PdfReader(pdf_file, strict=False) - # extract and chunk text simultaneously - all_text_chunks = [] - curr_chunk = "" - for page in reader.pages: - page_text = page.extract_text() - if page_text: - curr_chunk += page_text - # split the accumulated text into chunks of a specific size with overlaop - # this loop implements a sliding window approach to create chunks - while len(curr_chunk) >= chunk_size: - split_idx = curr_chunk.rfind(" ", 0, chunk_size) - if split_idx == -1: - split_idx = chunk_size - actual_chunk = curr_chunk[:split_idx] - all_text_chunks.append(actual_chunk) - overlap = curr_chunk[split_idx + 1 : split_idx + 1 + overlap_size] - curr_chunk = overlap + curr_chunk[split_idx + 1 + overlap_size :] - if curr_chunk: - all_text_chunks.append(curr_chunk) - - result_dict = {"status": "", "content": all_text_chunks} - - except Exception as e: - result_dict = {"status": str(e), "content": []} - - if verbose: - return json.dumps(result_dict) - else: - return json.dumps(result_dict["content"]) - - -pdf_chunk_def = FunctionDef( - pdf_chunk_func, ["pypdf>=5.3.1,<6.0.0", "requests", "cryptography==43.0.3"] -) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index b89360c691d3..a98a44448737 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -833,7 +833,7 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: df = self.copy() for col in blob_cols: # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. - df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + df[col] = df[col]._blob._get_runtime(mode="R", with_metadata=True) return df, blob_cols def _repr_mimebundle_(self, include=None, exclude=None): @@ -1611,7 +1611,8 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: ... + ) -> pandas.DataFrame: + ... @overload def to_pandas( @@ -1623,7 +1624,8 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def to_pandas( self, @@ -1935,7 +1937,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def drop( @@ -1947,7 +1950,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: ... + ) -> None: + ... def drop( self, @@ -2091,17 +2095,20 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: ... + ) -> None: + ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2118,7 +2125,8 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2127,7 +2135,8 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2136,7 +2145,8 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... def rename_axis( self, @@ -2332,7 +2342,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def reset_index( @@ -2344,7 +2355,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: ... + ) -> None: + ... @overload def reset_index( @@ -2356,7 +2368,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: ... + ) -> Optional[DataFrame]: + ... def reset_index( self, @@ -2419,7 +2432,8 @@ def sort_index( ascending: bool = ..., inplace: Literal[False] = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_index( @@ -2428,7 +2442,8 @@ def sort_index( ascending: bool = ..., inplace: Literal[True] = ..., na_position: Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_index( self, @@ -2474,7 +2489,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_values( @@ -2485,7 +2501,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_values( self, diff --git a/packages/bigframes/bigframes/ml/llm.py b/packages/bigframes/bigframes/ml/llm.py index bcf59d591f8e..d9e228c90c9f 100644 --- a/packages/bigframes/bigframes/ml/llm.py +++ b/packages/bigframes/bigframes/ml/llm.py @@ -397,7 +397,7 @@ def predict( # TODO(garrettwu): remove transform to ObjRefRuntime when BQML supports ObjRef as input if X["content"].dtype == dtypes.OBJ_REF_DTYPE: - X["content"] = X["content"].blob._get_runtime("R", with_metadata=True) + X["content"] = X["content"]._blob._get_runtime("R", with_metadata=True) options: dict = {} @@ -731,7 +731,7 @@ def predict( isinstance(item, bigframes.series.Series) and item.dtype == dtypes.OBJ_REF_DTYPE ): - item = item.blob._get_runtime("R", with_metadata=True) + item = item._blob._get_runtime("R", with_metadata=True) df_prompt[label] = item df_prompt = df_prompt.drop(columns="bigframes_placeholder_col") diff --git a/packages/bigframes/bigframes/operations/blob.py b/packages/bigframes/bigframes/operations/blob.py index b9a33af2d1ed..d29d1a1202c0 100644 --- a/packages/bigframes/bigframes/operations/blob.py +++ b/packages/bigframes/bigframes/operations/blob.py @@ -33,134 +33,17 @@ @log_adapter.class_logger -class BlobAccessor: +class _BlobAccessor: """ - Blob functions for Series and Index. - - .. note:: - BigFrames Blob is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). + Internal blob functions for Series and Index. """ def __init__(self, data: bigframes.series.Series): self._data = data - def uri(self) -> bigframes.series.Series: - """URIs of the Blob. - - Returns: - bigframes.series.Series: URIs as string.""" - s = bigframes.series.Series(self._data._block) - - return s.struct.field("uri") - - def authorizer(self) -> bigframes.series.Series: - """Authorizers of the Blob. - - Returns: - bigframes.series.Series: Autorithers(connection) as string.""" - s = bigframes.series.Series(self._data._block) - - return s.struct.field("authorizer") - - def version(self) -> bigframes.series.Series: - """Versions of the Blob. - - Returns: - bigframes.series.Series: Version as string.""" - # version must be retrieved after fetching metadata - return self._data._apply_unary_op(ops.obj_fetch_metadata_op).struct.field( - "version" - ) - - def metadata(self) -> bigframes.series.Series: - """Retrieve the metadata of the Blob. - - Returns: - bigframes.series.Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time). - """ - series_to_check = bigframes.series.Series(self._data._block) - # Check if it's a struct series from a verbose operation - if dtypes.is_struct_like(series_to_check.dtype): - pyarrow_dtype = series_to_check.dtype.pyarrow_dtype - if "content" in [field.name for field in pyarrow_dtype]: - content_field_type = pyarrow_dtype.field("content").type - content_bf_type = dtypes.arrow_dtype_to_bigframes_dtype( - content_field_type - ) - if content_bf_type == dtypes.OBJ_REF_DTYPE: - series_to_check = series_to_check.struct.field("content") - details_json = series_to_check._apply_unary_op( - ops.obj_fetch_metadata_op - ).struct.field("details") - import bigframes.bigquery as bbq - - return bbq.json_extract(details_json, "$.gcs_metadata").rename("metadata") - - def content_type(self) -> bigframes.series.Series: - """Retrieve the content type of the Blob. - - Returns: - bigframes.series.Series: string of the content type.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.content_type")) - .rename("content_type") - ) - - def md5_hash(self) -> bigframes.series.Series: - """Retrieve the md5 hash of the Blob. - - Returns: - bigframes.series.Series: string of the md5 hash.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.md5_hash")) - .rename("md5_hash") - ) - - def size(self) -> bigframes.series.Series: - """Retrieve the file size of the Blob. - - Returns: - bigframes.series.Series: file size in bytes.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.size")) - .rename("size") - .astype("Int64") - ) - - def updated(self) -> bigframes.series.Series: - """Retrieve the updated time of the Blob. - - Returns: - bigframes.series.Series: updated time as UTC datetime.""" - import bigframes.pandas as bpd - - updated = ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.updated")) - .rename("updated") - .astype("Int64") - ) - - return bpd.to_datetime(updated, unit="us", utc=True) - def _get_runtime( self, mode: str, with_metadata: bool = False ) -> bigframes.series.Series: - """Retrieve the ObjectRefRuntime as JSON. - - Args: - mode (str): mode for the URLs, "R" for read, "RW" for read & write. - metadata (bool, default False): whether to fetch the metadata in the ObjectRefRuntime. - - Returns: - bigframes.series.Series: ObjectRefRuntime JSON. - """ s = ( self._data._apply_unary_op(ops.obj_fetch_metadata_op) if with_metadata @@ -169,913 +52,7 @@ def _get_runtime( return s._apply_unary_op(ops.ObjGetAccessUrl(mode=mode)) - def _df_apply_udf( - self, df: bigframes.dataframe.DataFrame, udf - ) -> bigframes.series.Series: - # Catch and rethrow function axis=1 warning to be more user-friendly. - with warnings.catch_warnings(record=True) as catched_warnings: - s = df.apply(udf, axis=1) - for w in catched_warnings: - if isinstance(w.message, bfe.FunctionAxisOnePreviewWarning): - warnings.warn( - "Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.", - category=w.category, - stacklevel=2, - ) - else: - warnings.warn_explicit( - message=w.message, - category=w.category, - filename=w.filename, - lineno=w.lineno, - source=w.source, - ) - - return s - - def _apply_udf_or_raise_error( - self, df: bigframes.dataframe.DataFrame, udf, operation_name: str - ) -> bigframes.series.Series: - """Helper to apply UDF with consistent error handling.""" - try: - res = self._df_apply_udf(df, udf) - except Exception as e: - raise RuntimeError(f"{operation_name} UDF execution failed: {e}") from e - - if res is None: - raise RuntimeError(f"{operation_name} returned None result") - - return res - - def read_url(self) -> bigframes.series.Series: - """Retrieve the read URL of the Blob. - - Returns: - bigframes.series.Series: Read only URLs.""" + def _read_url(self) -> bigframes.series.Series: return self._get_runtime(mode="R")._apply_unary_op( ops.JSONValue(json_path="$.access_urls.read_url") ) - - def write_url(self) -> bigframes.series.Series: - """Retrieve the write URL of the Blob. - - Returns: - bigframes.series.Series: Writable URLs.""" - return self._get_runtime(mode="RW")._apply_unary_op( - ops.JSONValue(json_path="$.access_urls.write_url") - ) - - def display( - self, - n: int = 3, - *, - content_type: str = "", - width: Optional[int] = None, - height: Optional[int] = None, - ): - """Display the blob content in the IPython Notebook environment. Only works for image type now. - - Args: - n (int, default 3): number of sample blob objects to display. - content_type (str, default ""): content type of the blob. If unset, use the blob metadata of the storage. Possible values are "image", "audio" and "video". - width (int or None, default None): width in pixels that the image/video are constrained to. If unset, use the global setting in bigframes.options.display.blob_display_width, otherwise image/video's original size or ratio is used. No-op for other content types. - height (int or None, default None): height in pixels that the image/video are constrained to. If unset, use the global setting in bigframes.options.display.blob_display_height, otherwise image/video's original size or ratio is used. No-op for other content types. - """ - import IPython.display as ipy_display - - width = width or bigframes.options.display.blob_display_width - height = height or bigframes.options.display.blob_display_height - - # col name doesn't matter here. Rename to avoid column name conflicts - df = bigframes.series.Series(self._data._block).rename("blob_col").to_frame() - - df["read_url"] = df["blob_col"].blob.read_url() - - if content_type: - df["content_type"] = content_type - else: - df["content_type"] = df["blob_col"].blob.content_type() - - pandas_df, _, query_job = df._block.retrieve_repr_request_results(n) - df._set_internal_query_job(query_job) - - def display_single_url( - read_url: Union[str, pd._libs.missing.NAType], - content_type: Union[str, pd._libs.missing.NAType], - ): - if pd.isna(read_url): - ipy_display.display("") - return - - if pd.isna(content_type): # display as raw data or error - response = requests.get(read_url) - ipy_display.display(response.content) - return - - content_type = cast(str, content_type).casefold() - - if content_type.startswith("image"): - ipy_display.display( - ipy_display.Image(url=read_url, width=width, height=height) - ) - elif content_type.startswith("audio"): - # using url somehow doesn't work with audios - response = requests.get(read_url) - ipy_display.display(ipy_display.Audio(response.content)) - elif content_type.startswith("video"): - ipy_display.display( - ipy_display.Video(read_url, width=width, height=height) - ) - else: # display as raw data - response = requests.get(read_url) - ipy_display.display(response.content) - - for _, row in pandas_df.iterrows(): - display_single_url(row["read_url"], row["content_type"]) - - @property - def session(self): - return self._data._block.session - - def _resolve_connection(self, connection: Optional[str] = None) -> str: - """Resovle the BigQuery connection. - - Args: - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" is - str. If None, uses default connection of the session. - - Returns: - str: the resolved BigQuery connection string in the format: - "project.location.connection_id". - - Raises: - ValueError: If the connection cannot be resolved to a valid string. - """ - connection = connection or self._data._block.session.bq_connection - return clients.get_canonical_bq_connection_id( - connection, - default_project=self._data._block.session._project, - default_location=self._data._block.session._location, - ) - - def get_runtime_json_str( - self, mode: str = "R", *, with_metadata: bool = False - ) -> bigframes.series.Series: - """Get the runtime (contains signed URL to access gcs data) and apply the ToJSONSTring transformation. - - Args: - mode(str or str, default "R"): the mode for accessing the runtime. - Default to "R". Possible values are "R" (read-only) and - "RW" (read-write) - with_metadata (bool, default False): whether to include metadata - in the JSON string. Default to False. - - Returns: - str: the runtime object in the JSON string. - """ - runtime = self._get_runtime(mode=mode, with_metadata=with_metadata) - return runtime._apply_unary_op(ops.ToJSONString()) - - def exif( - self, - *, - engine: Literal[None, "pillow"] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extract EXIF data. Now only support image types. - - Args: - engine ('pillow' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: JSON series of key-value pairs if verbose=False, or struct with status and content if verbose=True. - - Raises: - ValueError: If engine is not 'pillow'. - RuntimeError: If EXIF extraction fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pillow": - raise ValueError("Must specify the engine, supported value is 'pillow'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - df["verbose"] = verbose - - exif_udf = blob_func.TransformFunction( - blob_func.exif_func_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - res = self._apply_udf_or_raise_error(df, exif_udf, "EXIF extraction") - - if verbose: - try: - exif_content_series = bbq.parse_json( - res._apply_unary_op(ops.JSONValue(json_path="$.content")) - ).rename("exif_content") - exif_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - except Exception as e: - raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e - results_df = bpd.DataFrame( - {"status": exif_status_series, "content": exif_content_series} - ) - results_struct = bbq.struct(results_df).rename("exif_results") - return results_struct - else: - try: - return bbq.parse_json(res) - except Exception as e: - raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e - - def image_blur( - self, - ksize: tuple[int, int], - *, - engine: Literal[None, "opencv"] = None, - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Blurs images. - - Args: - ksize (tuple(int, int)): Kernel size. - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image blur operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_blur_udf = blob_func.TransformFunction( - blob_func.image_blur_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["ksize_x"], df["ksize_y"] = ksize - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error(df, image_blur_udf, "Image blur") - - if verbose: - blurred_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - blurred_content_series = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[blurred_content_b64_series] - ) - blurred_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": blurred_status_series, "content": blurred_content_series} - ) - results_struct = bbq.struct(results_df).rename("blurred_results") - return results_struct - else: - blurred_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("blurred_bytes") - return blurred_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_blur_udf = blob_func.TransformFunction( - blob_func.image_blur_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["ksize_x"], df["ksize_y"] = ksize - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_blur_udf, "Image blur") - res.cache() # to execute the udf - - if verbose: - blurred_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": blurred_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("blurred_results") - return results_struct - else: - return dst - - def image_resize( - self, - dsize: tuple[int, int] = (0, 0), - *, - engine: Literal[None, "opencv"] = None, - fx: float = 0.0, - fy: float = 0.0, - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ): - """Resize images. - - Args: - dsize (tuple(int, int), default (0, 0)): Destination size. If set to 0, fx and fy parameters determine the size. - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - fx (float, default 0.0): scale factor along the horizontal axis. If set to 0.0, dsize parameter determines the output size. - fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size. - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image resize operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - dsize_set = dsize[0] > 0 and dsize[1] > 0 - fsize_set = fx > 0.0 and fy > 0.0 - if not dsize_set ^ fsize_set: - raise ValueError( - "Only one of dsize or (fx, fy) parameters must be set. And the set values must be positive. " - ) - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_resize_udf = blob_func.TransformFunction( - blob_func.image_resize_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["dsize_x"], df["dsize_y"] = dsize - df["fx"], df["fy"] = fx, fy - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error(df, image_resize_udf, "Image resize") - - if verbose: - resized_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - resized_content_series = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[resized_content_b64_series] - ) - - resized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": resized_status_series, "content": resized_content_series} - ) - results_struct = bbq.struct(results_df).rename("resized_results") - return results_struct - else: - resized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("resized_bytes") - return resized_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_resize_udf = blob_func.TransformFunction( - blob_func.image_resize_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["dsize_x"], df["dsize_y"] = dsize - df["fx"], df["fy"] = fx, fy - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_resize_udf, "Image resize") - res.cache() # to execute the udf - - if verbose: - resized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": resized_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("resized_results") - return results_struct - else: - return dst - - def image_normalize( - self, - *, - engine: Literal[None, "opencv"] = None, - alpha: float = 1.0, - beta: float = 0.0, - norm_type: str = "l2", - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Normalize images. - - Args: - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - alpha (float, default 1.0): Norm value to normalize to or the lower range boundary in case of the range normalization. - beta (float, default 0.0): Upper range boundary in case of the range normalization; it is not used for the norm normalization. - norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax". - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image normalize operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_normalize_udf = blob_func.TransformFunction( - blob_func.image_normalize_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["alpha"] = alpha - df["beta"] = beta - df["norm_type"] = norm_type - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error( - df, image_normalize_udf, "Image normalize" - ) - - if verbose: - normalized_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - normalized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[normalized_content_b64_series] - ) - normalized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": normalized_status_series, "content": normalized_bytes} - ) - results_struct = bbq.struct(results_df).rename("normalized_results") - return results_struct - else: - normalized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("normalized_bytes") - return normalized_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_normalize_udf = blob_func.TransformFunction( - blob_func.image_normalize_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["alpha"] = alpha - df["beta"] = beta - df["norm_type"] = norm_type - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_normalize_udf, "Image normalize") - res.cache() # to execute the udf - - if verbose: - normalized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": normalized_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("normalized_results") - return results_struct - else: - return dst - - def pdf_extract( - self, - *, - engine: Literal[None, "pypdf"] = None, - connection: Optional[str] = None, - max_batching_rows: int = 1, - container_cpu: Union[float, int] = 2, - container_memory: str = "1Gi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extracts text from PDF URLs and saves the text as string. - - Args: - engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - max_batching_rows (int, default 1): Max number of rows per batch - send to cloud run to execute the function. - container_cpu (int or float, default 2): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "1Gi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the extracted content - are displayed. Conversely, when set to False, only the extracted - content is presented, suppressing error messages. - - Returns: - bigframes.series.Series: str or struct[str, str], - depend on the "verbose" parameter. - Contains the extracted text from the PDF file. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'pypdf'. - RuntimeError: If PDF extraction fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pypdf": - raise ValueError("Must specify the engine, supported value is 'pypdf'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - - pdf_extract_udf = blob_func.TransformFunction( - blob_func.pdf_extract_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df = self.get_runtime_json_str(mode="R").to_frame() - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, pdf_extract_udf, "PDF extraction") - - if verbose: - # Extract content with error handling - try: - content_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - except Exception as e: - raise RuntimeError( - f"Failed to extract content field from PDF result: {e}" - ) from e - try: - status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status")) - except Exception as e: - raise RuntimeError( - f"Failed to extract status field from PDF result: {e}" - ) from e - - res_df = bpd.DataFrame({"status": status_series, "content": content_series}) - struct_series = bbq.struct(res_df).rename("extracted_results") - return struct_series - else: - return res.rename("extracted_content") - - def pdf_chunk( - self, - *, - engine: Literal[None, "pypdf"] = None, - connection: Optional[str] = None, - chunk_size: int = 2000, - overlap_size: int = 200, - max_batching_rows: int = 1, - container_cpu: Union[float, int] = 2, - container_memory: str = "1Gi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extracts and chunks text from PDF URLs and saves the text as - arrays of strings. - - Args: - engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - chunk_size (int, default 2000): the desired size of each text chunk - (number of characters). - overlap_size (int, default 200): the number of overlapping characters - between consective chunks. The helps to ensure context is - perserved across chunk boundaries. - max_batching_rows (int, default 1): Max number of rows per batch - send to cloud run to execute the function. - container_cpu (int or float, default 2): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "1Gi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the extracted content - are displayed. Conversely, when set to False, only the extracted - content is presented, suppressing error messages. - - Returns: - bigframe.series.Series: array[str] or struct[str, array[str]], - depend on the "verbose" parameter. - where each string is a chunk of text extracted from PDF. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'pypdf'. - RuntimeError: If PDF chunking fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pypdf": - raise ValueError("Must specify the engine, supported value is 'pypdf'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - - if chunk_size <= 0: - raise ValueError("chunk_size must be a positive integer.") - if overlap_size < 0: - raise ValueError("overlap_size must be a non-negative integer.") - if overlap_size >= chunk_size: - raise ValueError("overlap_size must be smaller than chunk_size.") - - pdf_chunk_udf = blob_func.TransformFunction( - blob_func.pdf_chunk_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df = self.get_runtime_json_str(mode="R").to_frame() - df["chunk_size"] = chunk_size - df["overlap_size"] = overlap_size - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, pdf_chunk_udf, "PDF chunking") - - try: - content_series = bbq.json_extract_string_array(res, "$.content") - except Exception as e: - raise RuntimeError( - f"Failed to extract content array from PDF chunk result: {e}" - ) from e - - if verbose: - try: - status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status")) - except Exception as e: - raise RuntimeError( - f"Failed to extract status field from PDF chunk result: {e}" - ) from e - - results_df = bpd.DataFrame( - {"status": status_series, "content": content_series} - ) - resultes_struct = bbq.struct(results_df).rename("chunked_results") - return resultes_struct - else: - return bbq.json_extract_string_array(res, "$").rename("chunked_content") - - def audio_transcribe( - self, - *, - engine: Literal["bigquery"] = "bigquery", - connection: Optional[str] = None, - model_name: Optional[ - Literal[ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ] - ] = None, - verbose: bool = False, - ) -> bigframes.series.Series: - """ - Transcribe audio content using a Gemini multimodal model. - - Args: - engine ('bigquery'): The engine (bigquery or third party library) used for the function. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - model_name (str): The model for natural language tasks. Accepted - values are "gemini-2.0-flash-lite-001", and "gemini-2.0-flash-001". - See "https://ai.google.dev/gemini-api/docs/models" for model choices. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the transcribed content - are displayed. Conversely, when set to False, only the transcribed - content is presented, suppressing error messages. - - Returns: - bigframes.series.Series: str or struct[str, str], - depend on the "verbose" parameter. - Contains the transcribed text from the audio file. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'bigquery'. - RuntimeError: If the transcription result structure is invalid. - """ - if engine.casefold() != "bigquery": - raise ValueError("Must specify the engine, supported value is 'bigquery'.") - - import bigframes.bigquery as bbq - import bigframes.pandas as bpd - - # col name doesn't matter here. Rename to avoid column name conflicts - audio_series = bigframes.series.Series(self._data._block) - - prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio." - - # Convert the audio series to the runtime representation required by the model. - audio_runtime = audio_series.blob._get_runtime("R", with_metadata=True) - - transcribed_results = bbq.ai.generate( - prompt=(prompt_text, audio_runtime), - connection_id=connection, - endpoint=model_name, - model_params={"generationConfig": {"temperature": 0.0}}, - ) - - # Validate that the result is not None - if transcribed_results is None: - raise RuntimeError("Transcription returned None result") - - transcribed_content_series = transcribed_results.struct.field("result").rename( - "transcribed_content" - ) - - if verbose: - transcribed_status_series = transcribed_results.struct.field("status") - results_df = bpd.DataFrame( - { - "status": transcribed_status_series, - "content": transcribed_content_series, - } - ) - results_struct = bbq.struct(results_df).rename("transcription_results") - return results_struct - else: - return transcribed_content_series.rename("transcribed_content") diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index 26ff2616a1b7..7cc93d34c07a 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -305,6 +305,18 @@ def join(self, sep: str) -> T: ops.ArrayReduceOp(aggregation=agg_ops.StringAggOp(sep=sep)) ) + def _to_blob(self, connection: Optional[str] = None) -> T: + import bigframes.core.blocks + + if hasattr(self._data, "_block") and isinstance( + self._data._block, bigframes.core.blocks.Block + ): + session = self._data._block.session + else: + raise ValueError("to_blob is only supported via Series.str") + connection = session._create_bq_connection(connection=connection) + return self._data._apply_binary_op(connection, ops.obj_make_ref_op) + def to_blob(self, connection: Optional[str] = None) -> T: """Create a BigFrames Blob series from a series of URIs. @@ -325,16 +337,15 @@ def to_blob(self, connection: Optional[str] = None) -> T: bigframes.series.Series: Blob Series. """ - import bigframes.core.blocks + import warnings + import bigframes.exceptions as bfe - if hasattr(self._data, "_block") and isinstance( - self._data._block, bigframes.core.blocks.Block - ): - session = self._data._block.session - else: - raise ValueError("to_blob is only supported via Series.str") - connection = session._create_bq_connection(connection=connection) - return self._data._apply_binary_op(connection, ops.obj_make_ref_op) + warnings.warn( + "Series.str.to_blob is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.", + category=bfe.ApiDeprecationWarning, + stacklevel=2, + ) + return self._to_blob(connection) def _parse_flags(flags: int) -> Optional[str]: diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index fbcc949855c2..3d29c19b9e41 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -321,16 +321,8 @@ def list(self) -> lists.ListAccessor: return lists.ListAccessor(self) @property - def blob(self) -> blob.BlobAccessor: - """ - Accessor for Blob operations. - """ - warnings.warn( - "The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - return blob.BlobAccessor(self) + def _blob(self) -> blob._BlobAccessor: + return blob._BlobAccessor(self) @property @validations.requires_ordering() @@ -383,7 +375,8 @@ def copy(self) -> Series: def rename( self, index: Union[blocks.Label, Mapping[Any, Any]] = None, - ) -> Series: ... + ) -> Series: + ... @overload def rename( @@ -392,7 +385,8 @@ def rename( *, inplace: Literal[False], **kwargs, - ) -> Series: ... + ) -> Series: + ... @overload def rename( @@ -401,7 +395,8 @@ def rename( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... def rename( self, @@ -462,7 +457,8 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> Series: ... + ) -> Series: + ... @overload def rename_axis( @@ -471,7 +467,8 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> Series: ... + ) -> Series: + ... @overload def rename_axis( @@ -480,7 +477,8 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... @validations.requires_index def rename_axis( @@ -524,7 +522,8 @@ def reset_index( drop: Literal[False] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> bigframes.dataframe.DataFrame: ... + ) -> bigframes.dataframe.DataFrame: + ... @overload def reset_index( @@ -535,7 +534,8 @@ def reset_index( drop: Literal[True] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> Series: ... + ) -> Series: + ... @overload def reset_index( @@ -546,7 +546,8 @@ def reset_index( drop: bool = ..., inplace: Literal[True] = ..., allow_duplicates: Optional[bool] = ..., - ) -> None: ... + ) -> None: + ... @validations.requires_ordering() def reset_index( @@ -1539,9 +1540,9 @@ def ne(self, other: object) -> Series: def items(self): for batch_df in self._block.to_pandas_batches(): - assert batch_df.shape[1] == 1, ( - f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." - ) + assert ( + batch_df.shape[1] == 1 + ), f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." for item in batch_df.squeeze(axis=1).items(): yield item @@ -1771,7 +1772,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... @typing.overload def sort_values( @@ -1782,7 +1784,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> Series: ... + ) -> Series: + ... def sort_values( self, @@ -1813,12 +1816,14 @@ def sort_values( @typing.overload # type: ignore[override] def sort_index( self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=... - ) -> Series: ... + ) -> Series: + ... @typing.overload def sort_index( self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=... - ) -> None: ... + ) -> None: + ... @validations.requires_index def sort_index( @@ -2693,28 +2698,18 @@ def _apply_binary_aggregation( @typing.overload def _align( self, other: Series, how="outer" - ) -> tuple[ - ex.DerefOp, - ex.DerefOp, - blocks.Block, - ]: ... + ) -> tuple[ex.DerefOp, ex.DerefOp, blocks.Block,]: + ... @typing.overload def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ - ex.DerefOp, - AlignedExprT, - blocks.Block, - ]: ... + ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: + ... def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ - ex.DerefOp, - AlignedExprT, - blocks.Block, - ]: + ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: """Aligns the series value with another scalar or series object. Returns new left column id, right column id and joined tabled expression.""" values, block = self._align_n( [ diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index a6bb3041764c..ea36cc1925f1 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,7 +432,8 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq( @@ -448,7 +449,8 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq( self, @@ -520,7 +522,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def _read_gbq_colab( @@ -529,7 +532,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -590,7 +594,8 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_query( @@ -606,7 +611,8 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_query( self, @@ -753,7 +759,8 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_table( @@ -767,7 +774,8 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_table( self, @@ -918,7 +926,8 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: ... + ) -> bigframes.core.indexes.Index: + ... @typing.overload def read_pandas( @@ -926,7 +935,8 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: ... + ) -> bigframes.series.Series: + ... @typing.overload def read_pandas( @@ -934,7 +944,8 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... def read_pandas( self, @@ -2248,12 +2259,17 @@ def from_glob_path( bigframes.pandas.DataFrame: Result BigFrames DataFrame. """ + warnings.warn( + "from_glob_path is deprecated and will be removed in a future release. Use read_gbq with 'ref' column instead.", + category=bfe.ApiDeprecationWarning, + stacklevel=2, + ) # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. connection = self._create_bq_connection(connection=connection) table = self._create_object_table(path, connection) - s = self._loader.read_gbq_table(table)["uri"].str.to_blob(connection) + s = self._loader.read_gbq_table(table)["uri"].str._to_blob(connection) return s.rename(name).to_frame() def _create_bq_connection( @@ -2312,7 +2328,7 @@ def read_gbq_object_table( table = self.bqclient.get_table(object_table) connection = table._properties["externalDataConfiguration"]["connectionId"] - s = self._loader.read_gbq_table(object_table)["uri"].str.to_blob(connection) + s = self._loader.read_gbq_table(object_table)["uri"].str._to_blob(connection) return s.rename(name).to_frame() # ========================================================================= diff --git a/packages/bigframes/tests/system/large/blob/test_function.py b/packages/bigframes/tests/system/large/blob/test_function.py deleted file mode 100644 index bc09baf268d1..000000000000 --- a/packages/bigframes/tests/system/large/blob/test_function.py +++ /dev/null @@ -1,853 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import traceback -import uuid -from typing import Generator - -import pandas as pd -import pytest -from google.cloud import storage - -import bigframes -import bigframes.pandas as bpd -from bigframes import dtypes - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -@pytest.fixture(scope="function") -def images_output_folder() -> Generator[str, None, None]: - id = uuid.uuid4().hex - folder = os.path.join("gs://bigframes_blob_test/output/", id) - yield folder - - # clean up - try: - cloud_storage_client = storage.Client() - bucket = cloud_storage_client.bucket("bigframes_blob_test") - blobs = bucket.list_blobs(prefix="output/" + id) - for blob in blobs: - blob.delete() - except Exception as exc: - traceback.print_exception(type(exc), exc, None) - - -@pytest.fixture(scope="function") -def images_output_uris(images_output_folder: str) -> list[str]: - return [ - os.path.join(images_output_folder, "img0.jpg"), - os.path.join(images_output_folder, "img1.jpg"), - ] - - -def test_blob_exif( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=False - ) - expected = bpd.Series( - ['{"ExifOffset": 47, "Make": "MyCamera"}'], - session=session, - dtype=dtypes.JSON_DTYPE, - ) - pd.testing.assert_series_equal( - actual.to_pandas(), - expected.to_pandas(), - check_dtype=False, - check_index_type=False, - ) - - -def test_blob_exif_verbose( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=True - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.JSON_DTYPE - - -def test_blob_image_blur_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=False - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=True - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - # Content should be blob objects for GCS destination - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not content_series.blob.size().isna().any() - - -def test_blob_image_resize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_pdf_extract( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=False, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - actual_text = actual[actual != ""].iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_extract_verbose( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=True, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = successful_results.apply(lambda x: x["content"]).iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=False, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # First entry is NA - actual_text = "".join(actual.dropna()) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=True, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = "".join(successful_results.apply(lambda x: x["content"]).iloc[0]) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ], -) -def test_blob_transcribe( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=False, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=False failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in transcribed text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ], -) -def test_blob_transcribe_verbose( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=True, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0]["content"] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=True failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in transcribed text. " - ) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py index 4bfd50fef4ec..7130c7ac1610 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -21,15 +21,9 @@ pytest.importorskip("pytest_snapshot") -def test_obj_fetch_metadata(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str.to_blob() - sql = blob_s.blob.version().to_frame().sql - snapshot.assert_match(sql, "out.sql") - - def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str.to_blob() - sql = blob_s.blob.read_url().to_frame().sql + blob_s = scalar_types_df["string_col"].str._to_blob() + sql = blob_s._blob._read_url().to_frame().sql snapshot.assert_match(sql, "out.sql") @@ -45,7 +39,7 @@ def test_obj_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapsh def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot): - blob_df = scalar_types_df["string_col"].str.to_blob() + blob_df = scalar_types_df["string_col"].str._to_blob() snapshot.assert_match(blob_df.to_frame().sql, "out.sql") From 1624846cc7ca387773f29ae3f0d4c923eeab573a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:00:30 +0000 Subject: [PATCH 02/39] refactor: cleanup blob API references --- packages/bigframes/docs/templates/toc.yml | 3 - ...with-bigframes-over-national-jukebox.ipynb | 701 +++++++++--------- .../tests/system/small/blob/test_io.py | 132 ---- .../system/small/blob/test_properties.py | 119 --- .../tests/system/small/blob/test_urls.py | 31 - 5 files changed, 364 insertions(+), 622 deletions(-) delete mode 100644 packages/bigframes/tests/system/small/blob/test_io.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_properties.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_urls.py diff --git a/packages/bigframes/docs/templates/toc.yml b/packages/bigframes/docs/templates/toc.yml index 5d043fd85f2a..562b857fee5c 100644 --- a/packages/bigframes/docs/templates/toc.yml +++ b/packages/bigframes/docs/templates/toc.yml @@ -87,9 +87,6 @@ uid: bigframes.operations.lists.ListAccessor - name: PlotAccessor uid: bigframes.operations.plotting.PlotAccessor - - name: BlobAccessor - uid: bigframes.operations.blob.BlobAccessor - status: beta name: Series - name: Window uid: bigframes.core.window.Window diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index fe2d567d1b31..e70ddfe4a845 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -1,23 +1,8 @@ { "cells": [ { + "id": "c62e292f", "cell_type": "markdown", - "metadata": { - "@deathbeds/jupyterlab-fonts": { - "styles": { - "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "194%" - } - } - } - }, - "editable": true, - "slideshow": { - "slide_type": "subslide" - }, - "tags": [] - }, "source": [ "# Creating a searchable index of the National Jukebox\n", "\n", @@ -35,42 +20,42 @@ "To follow along, you'll need a Google Cloud project\n", "\n", "* Go to https://cloud.google.com/free to start a free trial." - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "z-index": "0", - "zoom": "216%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "194%" } } } }, + "editable": true, "slideshow": { - "slide_type": "slide" - } + "slide_type": "subslide" + }, + "tags": [] }, + "execution_count": null + }, + { + "id": "7dc312a4", + "cell_type": "markdown", "source": [ "The National Jukebox is a project of the USA Library of Congress to provide access to thousands of acoustic sound recordings from the very earliest days of the commercial record industry.\n", "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\"recording" - ] - }, - { - "cell_type": "markdown", + "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "0", - "zoom": "181%" + "zoom": "216%" } } } @@ -79,11 +64,16 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "07dcae4b", + "cell_type": "markdown", "source": [ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\"audio\n", + "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -96,16 +86,14 @@ "3. BigQuery DataFrames to use Python instead of SQL.\n", "\n", " https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "275%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "z-index": "0", + "zoom": "181%" } } } @@ -114,20 +102,43 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "8dd2ddab", + "cell_type": "markdown", "source": [ "## Getting started with BigQuery DataFrames (bigframes)\n", "\n", "Install the bigframes package." - ] + ], + "metadata": { + "@deathbeds/jupyterlab-fonts": { + "styles": { + "": { + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "275%" + } + } + } + }, + "slideshow": { + "slide_type": "slide" + } + }, + "execution_count": null }, { + "id": "96cda443", "cell_type": "code", - "execution_count": null, + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "214%" } } @@ -142,18 +153,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ] + "execution_count": null }, { + "id": "acf12472", "cell_type": "markdown", + "source": [ + "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "4", "zoom": "236%" } @@ -161,15 +175,17 @@ } } }, - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ] + "execution_count": null }, { + "id": "fd321077", "cell_type": "code", - "execution_count": null, + "source": [ + "from kaggle_secrets import UserSecretsClient\n", + "user_secrets = UserSecretsClient()\n", + "user_credential = user_secrets.get_gcloud_credential()\n", + "user_secrets.set_tensorflow_credential(user_credential)" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:53:08.494636Z", @@ -180,22 +196,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from kaggle_secrets import UserSecretsClient\n", - "user_secrets = UserSecretsClient()\n", - "user_credential = user_secrets.get_gcloud_credential()\n", - "user_secrets.set_tensorflow_credential(user_credential)" - ] + "execution_count": null }, { + "id": "4d837a34", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes._config\n", + "import bigframes.pandas as bpd\n", + "\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "# Set to your GCP project ID.\n", + "bpd.options.bigquery.project = \"swast-scratch\"" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "193%" } } @@ -210,24 +229,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes._config\n", - "import bigframes.pandas as bpd\n", - "\n", - "bpd.options.bigquery.location = \"US\"\n", - "\n", - "# Set to your GCP project ID.\n", - "bpd.options.bigquery.project = \"swast-scratch\"" - ] + "execution_count": null }, { + "id": "008f0a87", "cell_type": "markdown", + "source": [ + "## Reading data\n", + "\n", + "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "207%" } } @@ -237,20 +253,24 @@ "slide_type": "slide" } }, - "source": [ - "## Reading data\n", - "\n", - "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." - ] + "execution_count": null }, { + "id": "9a4b35ab", "cell_type": "code", - "execution_count": null, + "source": [ + "df = bpd.read_json(\n", + " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", + " engine=\"bigquery\",\n", + " orient=\"records\",\n", + " lines=True,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "225%" } } @@ -265,24 +285,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df = bpd.read_json(\n", - " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", - " engine=\"bigquery\",\n", - " orient=\"records\",\n", - " lines=True,\n", - ")" - ] + "execution_count": null }, { + "id": "e00dcb01", "cell_type": "code", - "execution_count": null, + "source": [ + "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", + "df.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "122%" } } @@ -300,20 +316,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", - "df.peek()" - ] + "execution_count": null }, { + "id": "335511be", "cell_type": "code", - "execution_count": null, + "source": [ + "df.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "134%" } } @@ -328,14 +343,17 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df.shape" - ] + "execution_count": null }, { + "id": "595126a1", "cell_type": "code", - "execution_count": null, + "source": [ + "# For the purposes of a demo, select only a subset of rows.\n", + "df = df.sample(n=250)\n", + "df.cache()\n", + "df.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:55:55.448664Z", @@ -346,22 +364,36 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# For the purposes of a demo, select only a subset of rows.\n", - "df = df.sample(n=250)\n", - "df.cache()\n", - "df.shape" - ] + "execution_count": null }, { + "id": "cbd59dd9", "cell_type": "code", - "execution_count": null, + "source": [ + "# As a side effect of how I extracted the song information from the HTML DOM,\n", + "# we ended up with lists in places where we only expect one item.\n", + "#\n", + "# We can \"explode\" to flatten these lists.\n", + "flattened = df.explode([\n", + " \"Recording Repository\",\n", + " \"Recording Label\",\n", + " \"Recording Take Number\",\n", + " \"Recording Date\",\n", + " \"Recording Matrix Number\",\n", + " \"Recording Catalog Number\",\n", + " \"Media Size\",\n", + " \"Recording Location\",\n", + " \"Summary\",\n", + " \"Rights Advisory\",\n", + " \"Title\",\n", + "])\n", + "flattened.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "161%" } } @@ -379,31 +411,14 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# As a side effect of how I extracted the song information from the HTML DOM,\n", - "# we ended up with lists in places where we only expect one item.\n", - "#\n", - "# We can \"explode\" to flatten these lists.\n", - "flattened = df.explode([\n", - " \"Recording Repository\",\n", - " \"Recording Label\",\n", - " \"Recording Take Number\",\n", - " \"Recording Date\",\n", - " \"Recording Matrix Number\",\n", - " \"Recording Catalog Number\",\n", - " \"Media Size\",\n", - " \"Recording Location\",\n", - " \"Summary\",\n", - " \"Rights Advisory\",\n", - " \"Title\",\n", - "])\n", - "flattened.peek()" - ] + "execution_count": null }, { + "id": "84548649", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:56:06.546531Z", @@ -414,18 +429,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "flattened.shape" - ] + "execution_count": null }, { + "id": "8be3127f", "cell_type": "markdown", + "source": [ + "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "216%" } } @@ -437,18 +453,25 @@ }, "tags": [] }, - "source": [ - "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." - ] + "execution_count": null }, { + "id": "31277e21", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened = flattened.assign(**{\n", + " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", + " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", + "})\n", + "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", + "# Note: str.to_blob is deprecated.\n", + "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "211%" } } @@ -468,23 +491,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened = flattened.assign(**{\n", - " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", - " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", - "})\n", - "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", - "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" - ] + "execution_count": null }, { + "id": "d27756f5", "cell_type": "markdown", + "source": [ + "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "317%" } } @@ -496,13 +515,20 @@ }, "tags": [] }, - "source": [ - "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." - ] + "execution_count": null }, { + "id": "d1f7ad46", "cell_type": "code", - "execution_count": null, + "source": [ + "# Note: .blob.audio_transcribe is removed. This cell will fail.\n", + "# Use bigframes.bigquery.ai.generate instead.\n", + "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", + " model_name=\"gemini-2.0-flash-001\",\n", + " verbose=True,\n", + ")\n", + "flattened[\"Transcription\"]" + ], "metadata": { "editable": true, "execution": { @@ -518,22 +544,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", - " model_name=\"gemini-2.0-flash-001\",\n", - " verbose=True,\n", - ")\n", - "flattened[\"Transcription\"]" - ] + "execution_count": null }, { + "id": "1575c468", "cell_type": "markdown", + "source": [ + "Sometimes the model has transient errors. Check the status column to see if there are errors." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "229%" } } @@ -543,18 +566,21 @@ "slide_type": "slide" } }, - "source": [ - "Sometimes the model has transient errors. Check the status column to see if there are errors." - ] + "execution_count": null }, { + "id": "e53c7a0b", "cell_type": "code", - "execution_count": null, + "source": [ + "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", + "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", + "flattened.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "177%" } } @@ -574,21 +600,20 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", - "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", - "flattened.shape" - ] + "execution_count": null }, { + "id": "3629f4af", "cell_type": "code", - "execution_count": null, + "source": [ + "# Show transcribed lyrics.\n", + "flattened[\"Transcription\"].struct.field(\"content\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "141%" } } @@ -603,20 +628,23 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Show transcribed lyrics.\n", - "flattened[\"Transcription\"].struct.field(\"content\")" - ] + "execution_count": null }, { + "id": "09ef6c3d", "cell_type": "code", - "execution_count": null, + "source": [ + "# Find all instrumentatal songs\n", + "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", + "print(instrumental.shape)\n", + "song = instrumental.peek(1)\n", + "song" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -634,23 +662,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Find all instrumentatal songs\n", - "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", - "print(instrumental.shape)\n", - "song = instrumental.peek(1)\n", - "song" - ] + "execution_count": null }, { + "id": "cf15986a", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -670,25 +701,23 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "778d0ac3", "cell_type": "markdown", + "source": [ + "## Creating a searchable index\n", + "\n", + "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "\n", + "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "181%" } } @@ -698,22 +727,21 @@ "slide_type": "slide" } }, - "source": [ - "## Creating a searchable index\n", - "\n", - "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", - "\n", - "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" - ] + "execution_count": null }, { + "id": "de7e4e11", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "163%" } } @@ -728,21 +756,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" - ] + "execution_count": null }, { + "id": "4acfb495", "cell_type": "code", - "execution_count": null, + "source": [ + "df_to_index = (\n", + " flattened\n", + " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", + " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", + ")\n", + "embedding = text_model.predict(df_to_index)\n", + "embedding.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "125%" } } @@ -757,25 +789,22 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df_to_index = (\n", - " flattened\n", - " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", - " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", - ")\n", - "embedding = text_model.predict(df_to_index)\n", - "embedding.peek(1)" - ] + "execution_count": null }, { + "id": "a49d1dde", "cell_type": "code", - "execution_count": null, + "source": [ + "# Check the status column to look for errors.\n", + "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", + "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", + "embedding.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "178%" } } @@ -795,39 +824,39 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "# Check the status column to look for errors.\n", - "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", - "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", - "embedding.shape" - ] + "execution_count": null }, { + "id": "15a5bfd3", "cell_type": "markdown", + "source": [ + "We're now ready to save this to a table." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "224%" } } } } }, - "source": [ - "We're now ready to save this to a table." - ] + "execution_count": null }, { + "id": "8b49384c", "cell_type": "code", - "execution_count": null, + "source": [ + "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", + "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "172%" } } @@ -842,19 +871,24 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", - "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" - ] + "execution_count": null }, { + "id": "810c77d5", "cell_type": "markdown", + "source": [ + "## Searching the database\n", + "\n", + "To search by semantics, we:\n", + "\n", + "1. Turn our search string into an embedding using the same model as our index.\n", + "2. Find the closest matches to the search string." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "183%" } } @@ -864,23 +898,22 @@ "slide_type": "slide" } }, - "source": [ - "## Searching the database\n", - "\n", - "To search by semantics, we:\n", - "\n", - "1. Turn our search string into an embedding using the same model as our index.\n", - "2. Find the closest matches to the search string." - ] + "execution_count": null }, { + "id": "fb63ad94", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "df_written = bpd.read_gbq(embedding_table_id)\n", + "df_written.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "92%" } } @@ -898,22 +931,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "df_written = bpd.read_gbq(embedding_table_id)\n", - "df_written.peek(1)" - ] + "execution_count": null }, { + "id": "f19c88d3", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "search_string = \"walking home\"\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", + "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", + "search_embedding = text_model.predict(search_df)\n", + "search_embedding" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "127%" } } @@ -928,26 +965,28 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "search_string = \"walking home\"\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", - "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", - "search_embedding = text_model.predict(search_df)\n", - "search_embedding" - ] + "execution_count": null }, { + "id": "06f0312e", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "vector_search_results = bbq.vector_search(\n", + " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", + " column_to_search=\"ml_generate_embedding_result\",\n", + " query=search_embedding,\n", + " distance_type=\"COSINE\",\n", + " query_column_to_search=\"ml_generate_embedding_result\",\n", + " top_k=5,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "175%" } } @@ -967,23 +1006,14 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "vector_search_results = bbq.vector_search(\n", - " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", - " column_to_search=\"ml_generate_embedding_result\",\n", - " query=search_embedding,\n", - " distance_type=\"COSINE\",\n", - " query_column_to_search=\"ml_generate_embedding_result\",\n", - " top_k=5,\n", - ")" - ] + "execution_count": null }, { + "id": "fae3fcae", "cell_type": "code", - "execution_count": null, + "source": [ + "vector_search_results.dtypes" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T16:05:50.566930Z", @@ -994,19 +1024,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "vector_search_results.dtypes" - ] + "execution_count": null }, { + "id": "38423dde", "cell_type": "code", - "execution_count": null, + "source": [ + "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", + "results" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "158%" } } @@ -1024,20 +1055,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", - "results" - ] + "execution_count": null }, { + "id": "37a1dfbd", "cell_type": "code", - "execution_count": null, + "source": [ + "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "138%" } } @@ -1052,14 +1082,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" - ] + "execution_count": null }, { + "id": "a4748e0f", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "editable": true, "execution": { @@ -1076,26 +1113,16 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "ff22e7eb", "cell_type": "code", - "execution_count": null, + "source": [], "metadata": { "trusted": true }, - "outputs": [], - "source": [] + "execution_count": null } ], "metadata": { @@ -1132,6 +1159,6 @@ "version": "3.11.13" } }, - "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 4, + "nbformat": 4 } diff --git a/packages/bigframes/tests/system/small/blob/test_io.py b/packages/bigframes/tests/system/small/blob/test_io.py deleted file mode 100644 index c89fb4c6e6ed..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_io.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest import mock - -import pandas as pd -import pytest - -import bigframes -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -idisplay = pytest.importorskip("IPython.display") - - -def test_blob_create_from_uri_str( - bq_connection: str, session: bigframes.Session, images_uris -): - uri_series = bpd.Series(images_uris, session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - - pd_blob_df = blob_series.struct.explode().to_pandas() - expected_pd_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_from_glob_path( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - blob_df = session.from_glob_path( - images_gcs_path, connection=bq_connection, name="blob_col" - ) - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_read_gbq_object_table( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - obj_table = session._create_object_table(images_gcs_path, bq_connection) - - blob_df = session.read_gbq_object_table(obj_table, name="blob_col") - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame): - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - images_mm_df["blob_col"].blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert isinstance(arg, idisplay.Image) - - -def test_display_nulls( - monkeypatch, - bq_connection: str, - session: bigframes.Session, -): - uri_series = bpd.Series([None, None, None], dtype="string", session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - blob_series.blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert arg == "" diff --git a/packages/bigframes/tests/system/small/blob/test_properties.py b/packages/bigframes/tests/system/small/blob/test_properties.py deleted file mode 100644 index f63de38a8ce9..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_properties.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd -import pytest - -import bigframes.dtypes as dtypes -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.uri().to_pandas() - expected = pd.Series(images_uris, name="uri") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.authorizer().to_pandas() - expected = pd.Series( - [bq_connection.casefold(), bq_connection.casefold()], name="authorizer" - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_version(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.version().to_pandas() - expected = pd.Series(["1753907851152593", "1753907851111538"], name="version") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_metadata(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.metadata().to_pandas() - expected = pd.Series( - [ - ( - '{"content_type":"image/jpeg",' - '"md5_hash":"e130ad042261a1883cd2cc06831cf748",' - '"size":338390,' - '"updated":1753907851000000}' - ), - ( - '{"content_type":"image/jpeg",' - '"md5_hash":"e2ae3191ff2b809fd0935f01a537c650",' - '"size":43333,' - '"updated":1753907851000000}' - ), - ], - name="metadata", - dtype=dtypes.JSON_DTYPE, - ) - expected.index = expected.index.astype(dtypes.INT_DTYPE) - pd.testing.assert_series_equal(actual, expected) - - -def test_blob_content_type(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.content_type().to_pandas() - expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_md5_hash(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas() - expected = pd.Series( - ["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"], - name="md5_hash", - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_size(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.size().to_pandas() - expected = pd.Series([338390, 43333], name="size") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_updated(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.updated().to_pandas() - expected = pd.Series( - [ - pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), - pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), - ], - name="updated", - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) diff --git a/packages/bigframes/tests/system/small/blob/test_urls.py b/packages/bigframes/tests/system/small/blob/test_urls.py deleted file mode 100644 index b2dd6604343e..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_urls.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -def test_blob_read_url(images_mm_df: bpd.DataFrame): - urls = images_mm_df["blob_col"].blob.read_url() - - assert urls.str.startswith("https://storage.googleapis.com/").all() - - -def test_blob_write_url(images_mm_df: bpd.DataFrame): - urls = images_mm_df["blob_col"].blob.write_url() - - assert urls.str.startswith("https://storage.googleapis.com/").all() From b9476e683f180374e85d805ab9b2be641e7f6380 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:06:16 +0000 Subject: [PATCH 03/39] style: fix formatting and clean up imports --- packages/bigframes/bigframes/operations/blob.py | 8 -------- packages/bigframes/bigframes/session/polars_executor.py | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/packages/bigframes/bigframes/operations/blob.py b/packages/bigframes/bigframes/operations/blob.py index d29d1a1202c0..9cd7dd0db291 100644 --- a/packages/bigframes/bigframes/operations/blob.py +++ b/packages/bigframes/bigframes/operations/blob.py @@ -14,18 +14,10 @@ from __future__ import annotations -import os -import warnings -from typing import Literal, Optional, Union, cast - -import pandas as pd -import requests import bigframes.dataframe -import bigframes.exceptions as bfe import bigframes.operations as ops import bigframes.series -from bigframes import clients, dtypes from bigframes.core.logging import log_adapter FILE_FOLDER_REGEX = r"^.*\/(.*)$" diff --git a/packages/bigframes/bigframes/session/polars_executor.py b/packages/bigframes/bigframes/session/polars_executor.py index 43e3609ac3c1..06c7fcb925c4 100644 --- a/packages/bigframes/bigframes/session/polars_executor.py +++ b/packages/bigframes/bigframes/session/polars_executor.py @@ -122,7 +122,7 @@ def _is_node_polars_executable(node: nodes.BigFrameNode): return False for expr in node._node_expressions: if isinstance(expr, agg_expressions.Aggregation): - if not type(expr.op) in _COMPATIBLE_AGG_OPS: + if type(expr.op) not in _COMPATIBLE_AGG_OPS: return False if isinstance(expr, expression.Expression): if not set(map(type, _get_expr_ops(expr))).issubset(_COMPATIBLE_SCALAR_OPS): From d84ec947309ea54eec3ddc74d2678e2262e7e4af Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:11:51 +0000 Subject: [PATCH 04/39] style: enforce ruff formatting --- packages/bigframes/.python-version | 1 + packages/bigframes/bigframes/dataframe.py | 51 +++++---------- packages/bigframes/bigframes/series.py | 65 +++++++++---------- .../bigframes/bigframes/session/__init__.py | 33 ++++------ 4 files changed, 60 insertions(+), 90 deletions(-) create mode 100644 packages/bigframes/.python-version diff --git a/packages/bigframes/.python-version b/packages/bigframes/.python-version new file mode 100644 index 000000000000..95ed564f82b7 --- /dev/null +++ b/packages/bigframes/.python-version @@ -0,0 +1 @@ +3.14.2 diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index a98a44448737..b0ea81e003e1 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -1611,8 +1611,7 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: - ... + ) -> pandas.DataFrame: ... @overload def to_pandas( @@ -1624,8 +1623,7 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def to_pandas( self, @@ -1937,8 +1935,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def drop( @@ -1950,8 +1947,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... def drop( self, @@ -2095,20 +2091,17 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: - ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: - ... + ) -> None: ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2125,8 +2118,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2135,8 +2127,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2145,8 +2136,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename_axis( self, @@ -2342,8 +2332,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def reset_index( @@ -2355,8 +2344,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: - ... + ) -> None: ... @overload def reset_index( @@ -2368,8 +2356,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: - ... + ) -> Optional[DataFrame]: ... def reset_index( self, @@ -2432,8 +2419,7 @@ def sort_index( ascending: bool = ..., inplace: Literal[False] = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_index( @@ -2442,8 +2428,7 @@ def sort_index( ascending: bool = ..., inplace: Literal[True] = ..., na_position: Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_index( self, @@ -2489,8 +2474,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_values( @@ -2501,8 +2485,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_values( self, diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index 3d29c19b9e41..17addef1ab0a 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -375,8 +375,7 @@ def copy(self) -> Series: def rename( self, index: Union[blocks.Label, Mapping[Any, Any]] = None, - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -385,8 +384,7 @@ def rename( *, inplace: Literal[False], **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -395,8 +393,7 @@ def rename( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename( self, @@ -457,8 +454,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> Series: - ... + ) -> Series: ... @overload def rename_axis( @@ -467,8 +463,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def rename_axis( @@ -477,8 +472,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... @validations.requires_index def rename_axis( @@ -522,8 +516,7 @@ def reset_index( drop: Literal[False] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> bigframes.dataframe.DataFrame: - ... + ) -> bigframes.dataframe.DataFrame: ... @overload def reset_index( @@ -534,8 +527,7 @@ def reset_index( drop: Literal[True] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> Series: - ... + ) -> Series: ... @overload def reset_index( @@ -546,8 +538,7 @@ def reset_index( drop: bool = ..., inplace: Literal[True] = ..., allow_duplicates: Optional[bool] = ..., - ) -> None: - ... + ) -> None: ... @validations.requires_ordering() def reset_index( @@ -1540,9 +1531,9 @@ def ne(self, other: object) -> Series: def items(self): for batch_df in self._block.to_pandas_batches(): - assert ( - batch_df.shape[1] == 1 - ), f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." + assert batch_df.shape[1] == 1, ( + f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." + ) for item in batch_df.squeeze(axis=1).items(): yield item @@ -1772,8 +1763,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... @typing.overload def sort_values( @@ -1784,8 +1774,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> Series: - ... + ) -> Series: ... def sort_values( self, @@ -1816,14 +1805,12 @@ def sort_values( @typing.overload # type: ignore[override] def sort_index( self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=... - ) -> Series: - ... + ) -> Series: ... @typing.overload def sort_index( self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=... - ) -> None: - ... + ) -> None: ... @validations.requires_index def sort_index( @@ -2698,18 +2685,28 @@ def _apply_binary_aggregation( @typing.overload def _align( self, other: Series, how="outer" - ) -> tuple[ex.DerefOp, ex.DerefOp, blocks.Block,]: - ... + ) -> tuple[ + ex.DerefOp, + ex.DerefOp, + blocks.Block, + ]: ... @typing.overload def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: - ... + ) -> tuple[ + ex.DerefOp, + AlignedExprT, + blocks.Block, + ]: ... def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: + ) -> tuple[ + ex.DerefOp, + AlignedExprT, + blocks.Block, + ]: """Aligns the series value with another scalar or series object. Returns new left column id, right column id and joined tabled expression.""" values, block = self._align_n( [ diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index ea36cc1925f1..a025256f2b1e 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,8 +432,7 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq( @@ -449,8 +448,7 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq( self, @@ -522,8 +520,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def _read_gbq_colab( @@ -532,8 +529,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -594,8 +590,7 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -611,8 +606,7 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, @@ -759,8 +753,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -774,8 +767,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -926,8 +918,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: - ... + ) -> bigframes.core.indexes.Index: ... @typing.overload def read_pandas( @@ -935,8 +926,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: - ... + ) -> bigframes.series.Series: ... @typing.overload def read_pandas( @@ -944,8 +934,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... def read_pandas( self, From 9c3bc9e3ff851917dd6aba3fb71a18aae6143a52 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:43:04 +0000 Subject: [PATCH 05/39] docs: fix notebook outputs --- ...with-bigframes-over-national-jukebox.ipynb | 153 ++++++++++-------- 1 file changed, 90 insertions(+), 63 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index e70ddfe4a845..3fd66abcbb44 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -25,7 +25,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "194%" } } @@ -47,13 +47,13 @@ "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + "\"recording" ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "216%" } @@ -73,7 +73,7 @@ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", + "\"audio\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -91,7 +91,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "181%" } @@ -116,7 +116,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "275%" } } @@ -138,7 +138,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "214%" } } @@ -153,21 +153,22 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "acf12472", "cell_type": "markdown", "source": [ - "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "4", "zoom": "236%" } @@ -196,7 +197,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "4d837a34", @@ -214,7 +216,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "193%" } } @@ -229,7 +231,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "008f0a87", @@ -243,7 +246,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "207%" } } @@ -270,7 +273,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "225%" } } @@ -285,7 +288,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "e00dcb01", @@ -298,7 +302,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "122%" } } @@ -316,7 +320,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "335511be", @@ -328,7 +333,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "134%" } } @@ -343,7 +348,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "595126a1", @@ -364,7 +370,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "cbd59dd9", @@ -393,7 +400,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "161%" } } @@ -411,7 +418,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "84548649", @@ -429,7 +437,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "8be3127f", @@ -441,7 +450,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "216%" } } @@ -471,7 +480,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "211%" } } @@ -491,7 +500,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "d27756f5", @@ -503,7 +513,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "317%" } } @@ -544,7 +554,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "1575c468", @@ -556,7 +567,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "229%" } } @@ -580,7 +591,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "177%" } } @@ -600,7 +611,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "3629f4af", @@ -613,7 +625,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "141%" } } @@ -628,7 +640,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "09ef6c3d", @@ -644,7 +657,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -662,7 +675,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "cf15986a", @@ -681,7 +695,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -701,7 +715,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "778d0ac3", @@ -717,7 +732,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "181%" } } @@ -741,7 +756,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "163%" } } @@ -756,7 +771,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "4acfb495", @@ -774,7 +790,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "125%" } } @@ -789,7 +805,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "a49d1dde", @@ -804,7 +821,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "178%" } } @@ -824,7 +841,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "15a5bfd3", @@ -836,7 +854,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "224%" } } @@ -856,7 +874,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "172%" } } @@ -871,7 +889,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "810c77d5", @@ -888,7 +907,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "183%" } } @@ -913,7 +932,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "92%" } } @@ -931,7 +950,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "f19c88d3", @@ -950,7 +970,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "127%" } } @@ -965,7 +985,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "06f0312e", @@ -986,7 +1007,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "175%" } } @@ -1006,7 +1027,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "fae3fcae", @@ -1024,7 +1046,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "38423dde", @@ -1037,7 +1060,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "158%" } } @@ -1055,7 +1078,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "37a1dfbd", @@ -1067,7 +1091,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "138%" } } @@ -1082,7 +1106,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "a4748e0f", @@ -1113,7 +1138,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "ff22e7eb", @@ -1122,7 +1148,8 @@ "metadata": { "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] } ], "metadata": { From 11f0b0b5307c26a2da127b33ac7cc486f25d57d2 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 16:43:50 -0700 Subject: [PATCH 06/39] Update packages/bigframes/bigframes/operations/strings.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- packages/bigframes/bigframes/operations/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index 7cc93d34c07a..a5b9944424b0 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -313,7 +313,7 @@ def _to_blob(self, connection: Optional[str] = None) -> T: ): session = self._data._block.session else: - raise ValueError("to_blob is only supported via Series.str") + raise ValueError(f"{self._to_blob.__name__} is only supported via Series.str") connection = session._create_bq_connection(connection=connection) return self._data._apply_binary_op(connection, ops.obj_make_ref_op) From 38a7820633ee33cbaa24e3cda755f5b1b9ccafc3 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 15 Apr 2026 21:18:50 +0000 Subject: [PATCH 07/39] fix lint in strings.py --- packages/bigframes/bigframes/operations/strings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index a5b9944424b0..0833ab5df802 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -313,7 +313,9 @@ def _to_blob(self, connection: Optional[str] = None) -> T: ): session = self._data._block.session else: - raise ValueError(f"{self._to_blob.__name__} is only supported via Series.str") + raise ValueError( + f"{self._to_blob.__name__} is only supported via Series.str" + ) connection = session._create_bq_connection(connection=connection) return self._data._apply_binary_op(connection, ops.obj_make_ref_op) From e765ef09009b2db06f9a88ac452d78126f29df39 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 15 Apr 2026 23:34:29 +0000 Subject: [PATCH 08/39] Address review comments for Blob API deprecation --- packages/bigframes/.python-version | 1 - .../bigframes/bigframes/blob/_functions.py | 126 ------------------ .../bigframes/bigframes/operations/blob.py | 1 - .../bigframes/bigframes/operations/strings.py | 44 ------ .../bigframes/bigframes/pandas/__init__.py | 12 +- packages/bigframes/bigframes/pandas/io/api.py | 61 +++------ .../bigframes/bigframes/session/__init__.py | 104 +++------------ .../bigframes/bigframes/session/loader.py | 14 +- packages/bigframes/tests/system/conftest.py | 22 ++- .../test_blob_get_access_url/out.sql | 4 + .../out.sql | 2 +- .../test_blob_ops/test_blob_make_ref/out.sql | 4 + .../test_blob_make_ref_json/out.sql | 3 + .../test_obj_fetch_metadata/out.sql | 6 - .../test_obj_get_access_url/out.sql | 10 -- .../test_blob_ops/test_obj_make_ref/out.sql | 4 - .../test_obj_make_ref_json/out.sql | 3 - .../sqlglot/expressions/test_blob_ops.py | 18 +-- 18 files changed, 96 insertions(+), 343 deletions(-) delete mode 100644 packages/bigframes/.python-version delete mode 100644 packages/bigframes/bigframes/blob/_functions.py create mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql rename packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/{test_obj_get_access_url_with_duration => test_blob_get_access_url_with_duration}/out.sql (60%) create mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref/out.sql create mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref_json/out.sql delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref/out.sql delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql diff --git a/packages/bigframes/.python-version b/packages/bigframes/.python-version deleted file mode 100644 index 95ed564f82b7..000000000000 --- a/packages/bigframes/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.14.2 diff --git a/packages/bigframes/bigframes/blob/_functions.py b/packages/bigframes/bigframes/blob/_functions.py deleted file mode 100644 index 3869416d1244..000000000000 --- a/packages/bigframes/bigframes/blob/_functions.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -import typing -from dataclasses import dataclass -from typing import Callable, Iterable, Union - -import google.cloud.bigquery as bigquery - -import bigframes.session -import bigframes.session._io.bigquery as bf_io_bigquery - -_PYTHON_TO_BQ_TYPES = { - int: "INT64", - float: "FLOAT64", - str: "STRING", - bytes: "BYTES", - bool: "BOOL", -} - - -@dataclass(frozen=True) -class FunctionDef: - """Definition of a Python UDF.""" - - func: Callable # function body - requirements: Iterable[str] # required packages - - -# TODO(garrettwu): migrate to bigframes UDF when it is available -class TransformFunction: - """Simple transform function class to deal with Python UDF.""" - - def __init__( - self, - func_def: FunctionDef, - session: bigframes.session.Session, - connection: str, - max_batching_rows: int, - container_cpu: Union[float, int], - container_memory: str, - ): - self._func = func_def.func - self._requirements = func_def.requirements - self._session = session - self._connection = connection - self._max_batching_rows = ( - int(max_batching_rows) if max_batching_rows > 1 else max_batching_rows - ) - self._container_cpu = container_cpu - self._container_memory = container_memory - - def _input_bq_signature(self): - sig = inspect.signature(self._func) - inputs = [] - for k, v in sig.parameters.items(): - inputs.append(f"{k} {_PYTHON_TO_BQ_TYPES[v.annotation]}") - return ", ".join(inputs) - - def _output_bq_type(self): - sig = inspect.signature(self._func) - return_annotation = sig.return_annotation - origin = typing.get_origin(return_annotation) - if origin is Union: - args = typing.get_args(return_annotation) - if len(args) == 2 and args[1] is type(None): - return _PYTHON_TO_BQ_TYPES[args[0]] - return _PYTHON_TO_BQ_TYPES[sig.return_annotation] - - def _create_udf(self): - """Create Python UDF in BQ. Return name of the UDF.""" - udf_name = str( - self._session._anon_dataset_manager.generate_unique_resource_id() - ) - - func_body = "import typing\n" + inspect.getsource(self._func) - func_name = self._func.__name__ - packages = str(list(self._requirements)) - - sql = f""" -CREATE OR REPLACE FUNCTION `{udf_name}`({self._input_bq_signature()}) -RETURNS {self._output_bq_type()} LANGUAGE python -WITH CONNECTION `{self._connection}` -OPTIONS (entry_point='{func_name}', runtime_version='python-3.11', packages={packages}, max_batching_rows={self._max_batching_rows}, container_cpu={self._container_cpu}, container_memory='{self._container_memory}') -AS r\"\"\" - - -{func_body} - - -\"\"\" - """ - - bf_io_bigquery.start_query_with_client( - self._session.bqclient, - sql, - job_config=bigquery.QueryJobConfig(), - metrics=self._session._metrics, - location=None, - project=None, - timeout=None, - query_with_job=True, - publisher=self._session._publisher, - ) - - return udf_name - - def udf(self): - """Create and return the UDF object.""" - udf_name = self._create_udf() - - # TODO(b/404605969): remove cleanups when UDF fixes dataset deletion. - self._session._function_session._update_temp_artifacts(udf_name, "") - return self._session.read_gbq_function(udf_name) diff --git a/packages/bigframes/bigframes/operations/blob.py b/packages/bigframes/bigframes/operations/blob.py index 9cd7dd0db291..3666ee66602d 100644 --- a/packages/bigframes/bigframes/operations/blob.py +++ b/packages/bigframes/bigframes/operations/blob.py @@ -14,7 +14,6 @@ from __future__ import annotations - import bigframes.dataframe import bigframes.operations as ops import bigframes.series diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index 0833ab5df802..ff211f1b77d1 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -305,50 +305,6 @@ def join(self, sep: str) -> T: ops.ArrayReduceOp(aggregation=agg_ops.StringAggOp(sep=sep)) ) - def _to_blob(self, connection: Optional[str] = None) -> T: - import bigframes.core.blocks - - if hasattr(self._data, "_block") and isinstance( - self._data._block, bigframes.core.blocks.Block - ): - session = self._data._block.session - else: - raise ValueError( - f"{self._to_blob.__name__} is only supported via Series.str" - ) - connection = session._create_bq_connection(connection=connection) - return self._data._apply_binary_op(connection, ops.obj_make_ref_op) - - def to_blob(self, connection: Optional[str] = None) -> T: - """Create a BigFrames Blob series from a series of URIs. - - .. note:: - BigFrames Blob is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - - - Args: - connection (str or None, default None): - Connection to connect with remote service. str of the format ... - If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach - permission if the connection isn't fully set up. - - Returns: - bigframes.series.Series: Blob Series. - - """ - import warnings - import bigframes.exceptions as bfe - - warnings.warn( - "Series.str.to_blob is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - return self._to_blob(connection) - def _parse_flags(flags: int) -> Optional[str]: re2flags = [] diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index 58833284ae96..c6f7500f9e89 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -99,14 +99,12 @@ from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( _read_gbq_colab, - from_glob_path, read_arrow, read_avro, read_csv, read_gbq, read_gbq_function, read_gbq_model, - read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -251,7 +249,8 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> bigframes.series.Series: ... +) -> bigframes.series.Series: + ... @typing.overload @@ -261,7 +260,8 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> Union[pandas.Timestamp, datetime.datetime]: ... +) -> Union[pandas.Timestamp, datetime.datetime]: + ... def to_datetime( @@ -455,7 +455,6 @@ def reset_session(): _read_gbq_colab, read_gbq_function, read_gbq_model, - read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -466,7 +465,6 @@ def reset_session(): remote_function, to_datetime, to_timedelta, - from_glob_path, ] # Use __all__ to let type checkers know what is part of the public API. @@ -492,7 +490,6 @@ def reset_session(): "_read_gbq_colab", "read_gbq_function", "read_gbq_model", - "read_gbq_object_table", "read_gbq_query", "read_gbq_table", "read_json", @@ -503,7 +500,6 @@ def reset_session(): "remote_function", "to_datetime", "to_timedelta", - "from_glob_path", # Other names "api", # pandas dtype attributes diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index b7ed1a65d922..1cf818f5ddec 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -206,7 +206,8 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @overload @@ -222,7 +223,8 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., -) -> pandas.Series: ... +) -> pandas.Series: + ... def read_gbq( @@ -302,7 +304,8 @@ def _read_gbq_colab( # type: ignore[overload-overlap] *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @overload @@ -311,7 +314,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: ... +) -> pandas.Series: + ... def _read_gbq_colab( @@ -394,21 +398,6 @@ def read_gbq_model(model_name: str): read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) -def read_gbq_object_table( - object_table: str, *, name: Optional[str] = None -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.read_gbq_object_table, - object_table, - name=name, - ) - - -read_gbq_object_table.__doc__ = inspect.getdoc( - bigframes.session.Session.read_gbq_object_table -) - - @overload def read_gbq_query( # type: ignore[overload-overlap] query: str, @@ -422,7 +411,8 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @overload @@ -438,7 +428,8 @@ def read_gbq_query( filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., -) -> pandas.Series: ... +) -> pandas.Series: + ... def read_gbq_query( @@ -484,7 +475,8 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @overload @@ -498,7 +490,8 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: ... +) -> pandas.Series: + ... def read_gbq_table( @@ -551,7 +544,8 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @typing.overload @@ -559,7 +553,8 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.series.Series: ... +) -> bigframes.series.Series: + ... @typing.overload @@ -567,7 +562,8 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.core.indexes.Index: ... +) -> bigframes.core.indexes.Index: + ... def read_pandas( @@ -635,19 +631,6 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) -def from_glob_path( - path: str, *, connection: Optional[str] = None, name: Optional[str] = None -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.from_glob_path, - path=path, - connection=connection, - name=name, - ) - - -from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session.from_glob_path) - _default_location_lock = threading.Lock() diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index eba179411c17..1099e5dc527a 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,7 +432,8 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq( @@ -448,7 +449,8 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq( self, @@ -520,7 +522,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def _read_gbq_colab( @@ -529,7 +532,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -590,7 +594,8 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_query( @@ -606,7 +611,8 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_query( self, @@ -753,7 +759,8 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_table( @@ -767,7 +774,8 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_table( self, @@ -918,7 +926,8 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: ... + ) -> bigframes.core.indexes.Index: + ... @typing.overload def read_pandas( @@ -926,7 +935,8 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: ... + ) -> bigframes.series.Series: + ... @typing.overload def read_pandas( @@ -934,7 +944,8 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... def read_pandas( self, @@ -2303,45 +2314,6 @@ def _create_temp_table( schema=schema, cluster_cols=cluster_cols ) - def from_glob_path( - self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None - ) -> dataframe.DataFrame: - r"""Create a BigFrames DataFrame that contains a BigFrames `ObjectRef column `_ from a global wildcard path. - This operation creates a temporary BQ Object Table under the hood and requires bigquery.connections.delegate permission or BigQuery Connection Admin role. - If you have an existing BQ Object Table, use read_gbq_object_table(). - - .. note:: - BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - - Args: - path (str): - The wildcard global path, such as "gs:////\*". - connection (str or None, default None): - Connection to connect with remote service. str of the format ... - If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach - permission if the connection isn't fully set up. - name (str): - The column name of the ObjectRef column. - Returns: - bigframes.pandas.DataFrame: - Result BigFrames DataFrame. - """ - warnings.warn( - "from_glob_path is deprecated and will be removed in a future release. Use read_gbq with 'ref' column instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. - connection = self._create_bq_connection(connection=connection) - - table = self._create_object_table(path, connection) - - s = self._loader.read_gbq_table(table)["uri"].str._to_blob(connection) - return s.rename(name).to_frame() - def _create_bq_connection( self, *, @@ -2369,38 +2341,6 @@ def _create_bq_connection( return connection - def read_gbq_object_table( - self, object_table: str, *, name: Optional[str] = None - ) -> dataframe.DataFrame: - """Read an existing object table to create a BigFrames `ObjectRef `_ DataFrame. Use the connection of the object table for the connection of the ObjectRef. - This function dosen't retrieve the object table data. If you want to read the data, use read_gbq() instead. - - .. note:: - BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - - Args: - object_table (str): name of the object table of form ... - name (str or None): the returned ObjectRef column name. - - Returns: - bigframes.pandas.DataFrame: - Result BigFrames DataFrame. - """ - warnings.warn( - "read_gbq_object_table is deprecated and will be removed in a future release. Use read_gbq with 'ref' column instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. - table = self.bqclient.get_table(object_table) - connection = table._properties["externalDataConfiguration"]["connectionId"] - - s = self._loader.read_gbq_table(object_table)["uri"].str._to_blob(connection) - return s.rename(name).to_frame() - # ========================================================================= # bigframes.pandas attributes # diff --git a/packages/bigframes/bigframes/session/loader.py b/packages/bigframes/bigframes/session/loader.py index 960208063105..e8efe83cba80 100644 --- a/packages/bigframes/bigframes/session/loader.py +++ b/packages/bigframes/bigframes/session/loader.py @@ -54,6 +54,8 @@ from google.cloud import bigquery_storage_v1 from google.cloud.bigquery_storage_v1 import ( types as bq_storage_types, +) +from google.cloud.bigquery_storage_v1 import ( writer as bq_storage_writer, ) @@ -642,7 +644,8 @@ def read_gbq_table( # type: ignore[overload-overlap] n_rows: Optional[int] = None, index_col_in_columns: bool = False, publish_execution: bool = True, - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_table( @@ -665,7 +668,8 @@ def read_gbq_table( n_rows: Optional[int] = None, index_col_in_columns: bool = False, publish_execution: bool = True, - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_table( self, @@ -1149,7 +1153,8 @@ def read_gbq_query( # type: ignore[overload-overlap] dry_run: Literal[False] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool, - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_query( @@ -1165,7 +1170,8 @@ def read_gbq_query( dry_run: Literal[True] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool, - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_query( self, diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py index 361d9387bc77..f46f5dc3d371 100644 --- a/packages/bigframes/tests/system/conftest.py +++ b/packages/bigframes/tests/system/conftest.py @@ -1503,8 +1503,10 @@ def images_uris() -> list[str]: def images_mm_df( images_uris, session: bigframes.Session, bq_connection: str ) -> bpd.DataFrame: - blob_series = bpd.Series(images_uris, session=session).str.to_blob( - connection=bq_connection + import bigframes.bigquery.obj as obj + + blob_series = obj.make_ref( + bpd.Series(images_uris, session=session), authorizer=bq_connection ) return blob_series.rename("blob_col").to_frame() @@ -1527,7 +1529,12 @@ def pdf_gcs_path() -> str: def pdf_mm_df( pdf_gcs_path, session: bigframes.Session, bq_connection: str ) -> bpd.DataFrame: - return session.from_glob_path(pdf_gcs_path, name="pdf", connection=bq_connection) + import bigframes.bigquery.obj as obj + + table_id = session._create_object_table(pdf_gcs_path, bq_connection) + df = session.read_gbq(table_id) + blob_series = obj.make_ref(df["uri"], authorizer=bq_connection) + return blob_series.rename("pdf").to_frame() @pytest.fixture(scope="session") @@ -1539,6 +1546,9 @@ def audio_gcs_path() -> str: def audio_mm_df( audio_gcs_path, session: bigframes.Session, bq_connection: str ) -> bpd.DataFrame: - return session.from_glob_path( - audio_gcs_path, name="audio", connection=bq_connection - ) + import bigframes.bigquery.obj as obj + + table_id = session._create_object_table(audio_gcs_path, bq_connection) + df = session.read_gbq(table_id) + blob_series = obj.make_ref(df["uri"], authorizer=bq_connection) + return blob_series.rename("audio").to_frame() diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql new file mode 100644 index 000000000000..78bd19c32483 --- /dev/null +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql @@ -0,0 +1,4 @@ +SELECT + `rowindex`, + OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`string_col`, 'my-connection'), 'r') AS `string_col` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql similarity index 60% rename from packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql rename to packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql index 2e8b60230faa..ac2999e386d5 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql @@ -1,3 +1,3 @@ SELECT OBJ.GET_ACCESS_URL(`string_col`, 'READ', INTERVAL 3600 MICROSECOND) AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref/out.sql new file mode 100644 index 000000000000..3178a55cbfad --- /dev/null +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref/out.sql @@ -0,0 +1,4 @@ +SELECT + `rowindex`, + OBJ.MAKE_REF(`string_col`, 'my-connection') AS `string_col` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref_json/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref_json/out.sql new file mode 100644 index 000000000000..460675d3513b --- /dev/null +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref_json/out.sql @@ -0,0 +1,3 @@ +SELECT + OBJ.MAKE_REF(`string_col`) AS `string_col` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql deleted file mode 100644 index ca6f5842df1d..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql +++ /dev/null @@ -1,6 +0,0 @@ -SELECT - `rowindex`, - OBJ.FETCH_METADATA( - OBJ.MAKE_REF(`string_col`, 'bigframes-dev.test-region.bigframes-default-connection') - ).`version` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql deleted file mode 100644 index 6d612ee6b87d..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql +++ /dev/null @@ -1,10 +0,0 @@ -SELECT - `rowindex`, - JSON_VALUE( - OBJ.GET_ACCESS_URL( - OBJ.MAKE_REF(`string_col`, 'bigframes-dev.test-region.bigframes-default-connection'), - 'R' - ), - '$.access_urls.read_url' - ) AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref/out.sql deleted file mode 100644 index 74ca601cd5db..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref/out.sql +++ /dev/null @@ -1,4 +0,0 @@ -SELECT - `rowindex`, - OBJ.MAKE_REF(`string_col`, 'bigframes-dev.test-region.bigframes-default-connection') AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql deleted file mode 100644 index dc84b3bec12e..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql +++ /dev/null @@ -1,3 +0,0 @@ -SELECT - OBJ.MAKE_REF(`string_col`) AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py index 7130c7ac1610..502490404db9 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -14,6 +14,7 @@ import pytest +import bigframes.bigquery.obj as obj import bigframes.pandas as bpd from bigframes import operations as ops from bigframes.testing import utils @@ -21,13 +22,14 @@ pytest.importorskip("pytest_snapshot") -def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str._to_blob() - sql = blob_s._blob._read_url().to_frame().sql +def test_blob_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): + ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection") + url_s = obj.get_access_url(ref_s, mode="r") + sql = url_s.to_frame().sql snapshot.assert_match(sql, "out.sql") -def test_obj_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapshot): +def test_blob_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] sql = utils._apply_ops_to_sql( @@ -38,12 +40,12 @@ def test_obj_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapsh snapshot.assert_match(sql, "out.sql") -def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot): - blob_df = scalar_types_df["string_col"].str._to_blob() - snapshot.assert_match(blob_df.to_frame().sql, "out.sql") +def test_blob_make_ref(scalar_types_df: bpd.DataFrame, snapshot): + ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection") + snapshot.assert_match(ref_s.to_frame().sql, "out.sql") -def test_obj_make_ref_json(scalar_types_df: bpd.DataFrame, snapshot): +def test_blob_make_ref_json(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] sql = utils._apply_ops_to_sql( From 8af25322270d0571dbec682f17114678f924ead0 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 15 Apr 2026 23:51:31 +0000 Subject: [PATCH 09/39] chore: remove unused multimodal fixtures and tests --- .../bigframes/bigframes/pandas/__init__.py | 6 +- packages/bigframes/bigframes/pandas/io/api.py | 33 ++---- .../bigframes/bigframes/session/__init__.py | 33 ++---- .../bigframes/bigframes/session/loader.py | 12 +- packages/bigframes/tests/system/conftest.py | 46 -------- .../system/large/ml/test_multimodal_llm.py | 106 ------------------ 6 files changed, 28 insertions(+), 208 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/ml/test_multimodal_llm.py diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index c6f7500f9e89..11938a887785 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -249,8 +249,7 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> bigframes.series.Series: - ... +) -> bigframes.series.Series: ... @typing.overload @@ -260,8 +259,7 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> Union[pandas.Timestamp, datetime.datetime]: - ... +) -> Union[pandas.Timestamp, datetime.datetime]: ... def to_datetime( diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index 1cf818f5ddec..e2737fdbbd1a 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -206,8 +206,7 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -223,8 +222,7 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq( @@ -304,8 +302,7 @@ def _read_gbq_colab( # type: ignore[overload-overlap] *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -314,8 +311,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def _read_gbq_colab( @@ -411,8 +407,7 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -428,8 +423,7 @@ def read_gbq_query( filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq_query( @@ -475,8 +469,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -490,8 +483,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq_table( @@ -544,8 +536,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @typing.overload @@ -553,8 +544,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.series.Series: - ... +) -> bigframes.series.Series: ... @typing.overload @@ -562,8 +552,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.core.indexes.Index: - ... +) -> bigframes.core.indexes.Index: ... def read_pandas( diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 1099e5dc527a..872aff2291d2 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,8 +432,7 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq( @@ -449,8 +448,7 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq( self, @@ -522,8 +520,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def _read_gbq_colab( @@ -532,8 +529,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -594,8 +590,7 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -611,8 +606,7 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, @@ -759,8 +753,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -774,8 +767,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -926,8 +918,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: - ... + ) -> bigframes.core.indexes.Index: ... @typing.overload def read_pandas( @@ -935,8 +926,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: - ... + ) -> bigframes.series.Series: ... @typing.overload def read_pandas( @@ -944,8 +934,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... def read_pandas( self, diff --git a/packages/bigframes/bigframes/session/loader.py b/packages/bigframes/bigframes/session/loader.py index e8efe83cba80..a07b6fd71ca7 100644 --- a/packages/bigframes/bigframes/session/loader.py +++ b/packages/bigframes/bigframes/session/loader.py @@ -644,8 +644,7 @@ def read_gbq_table( # type: ignore[overload-overlap] n_rows: Optional[int] = None, index_col_in_columns: bool = False, publish_execution: bool = True, - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -668,8 +667,7 @@ def read_gbq_table( n_rows: Optional[int] = None, index_col_in_columns: bool = False, publish_execution: bool = True, - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -1153,8 +1151,7 @@ def read_gbq_query( # type: ignore[overload-overlap] dry_run: Literal[False] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool, - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -1170,8 +1167,7 @@ def read_gbq_query( dry_run: Literal[True] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool, - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py index f46f5dc3d371..3fe81b6d95bd 100644 --- a/packages/bigframes/tests/system/conftest.py +++ b/packages/bigframes/tests/system/conftest.py @@ -1499,18 +1499,6 @@ def images_uris() -> list[str]: ] -@pytest.fixture(scope="session") -def images_mm_df( - images_uris, session: bigframes.Session, bq_connection: str -) -> bpd.DataFrame: - import bigframes.bigquery.obj as obj - - blob_series = obj.make_ref( - bpd.Series(images_uris, session=session), authorizer=bq_connection - ) - return blob_series.rename("blob_col").to_frame() - - @pytest.fixture() def reset_default_session_and_location(): bpd.close_session() @@ -1518,37 +1506,3 @@ def reset_default_session_and_location(): yield bpd.close_session() bpd.options.bigquery.location = None - - -@pytest.fixture(scope="session") -def pdf_gcs_path() -> str: - return "gs://bigframes_blob_test/pdfs/*" - - -@pytest.fixture(scope="session") -def pdf_mm_df( - pdf_gcs_path, session: bigframes.Session, bq_connection: str -) -> bpd.DataFrame: - import bigframes.bigquery.obj as obj - - table_id = session._create_object_table(pdf_gcs_path, bq_connection) - df = session.read_gbq(table_id) - blob_series = obj.make_ref(df["uri"], authorizer=bq_connection) - return blob_series.rename("pdf").to_frame() - - -@pytest.fixture(scope="session") -def audio_gcs_path() -> str: - return "gs://bigframes_blob_test/audio/*" - - -@pytest.fixture(scope="session") -def audio_mm_df( - audio_gcs_path, session: bigframes.Session, bq_connection: str -) -> bpd.DataFrame: - import bigframes.bigquery.obj as obj - - table_id = session._create_object_table(audio_gcs_path, bq_connection) - df = session.read_gbq(table_id) - blob_series = obj.make_ref(df["uri"], authorizer=bq_connection) - return blob_series.rename("audio").to_frame() diff --git a/packages/bigframes/tests/system/large/ml/test_multimodal_llm.py b/packages/bigframes/tests/system/large/ml/test_multimodal_llm.py deleted file mode 100644 index 69c316e3dacc..000000000000 --- a/packages/bigframes/tests/system/large/ml/test_multimodal_llm.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd -import pyarrow as pa -import pytest - -import bigframes.pandas as bpd -from bigframes.ml import llm -from bigframes.testing import utils - - -@pytest.mark.parametrize( - "model_name", - ( - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ), -) -@pytest.mark.flaky(retries=2) -def test_gemini_text_generator_multimodal_input( - images_mm_df: bpd.DataFrame, model_name, session, bq_connection -): - gemini_text_generator_model = llm.GeminiTextGenerator( - model_name=model_name, connection_name=bq_connection, session=session - ) - pd_df = gemini_text_generator_model.predict( - images_mm_df, prompt=["Describe", images_mm_df["blob_col"]] - ).to_pandas() - utils.check_pandas_df_schema_and_index( - pd_df, - columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"], - index=2, - col_exact=False, - ) - - -@pytest.mark.flaky(retries=2) -def test_multimodal_embedding_generator_predict_default_params_success( - images_mm_df, session, bq_connection -): - text_embedding_model = llm.MultimodalEmbeddingGenerator( - connection_name=bq_connection, session=session - ) - df = text_embedding_model.predict(images_mm_df).to_pandas() - utils.check_pandas_df_schema_and_index( - df, - columns=utils.ML_MULTIMODAL_GENERATE_EMBEDDING_OUTPUT, - index=2, - col_exact=False, - ) - assert len(df["ml_generate_embedding_result"][0]) == 1408 - - -@pytest.mark.parametrize( - "model_name", - ("gemini-2.0-flash-001",), -) -@pytest.mark.flaky(retries=2) -def test_gemini_text_generator_multimodal_structured_output( - images_mm_df: bpd.DataFrame, model_name, session, bq_connection -): - gemini_text_generator_model = llm.GeminiTextGenerator( - model_name=model_name, connection_name=bq_connection, session=session - ) - output_schema = { - "bool_output": "bool", - "int_output": "int64", - "float_output": "float64", - "str_output": "string", - "array_output": "array", - "struct_output": "struct", - } - df = gemini_text_generator_model.predict( - images_mm_df, - prompt=["Describe", images_mm_df["blob_col"]], - output_schema=output_schema, - ) - assert df["bool_output"].dtype == pd.BooleanDtype() - assert df["int_output"].dtype == pd.Int64Dtype() - assert df["float_output"].dtype == pd.Float64Dtype() - assert df["str_output"].dtype == pd.StringDtype(storage="pyarrow") - assert df["array_output"].dtype == pd.ArrowDtype(pa.list_(pa.int64())) - assert df["struct_output"].dtype == pd.ArrowDtype( - pa.struct([("number", pa.int64())]) - ) - - pd_df = df.to_pandas() - utils.check_pandas_df_schema_and_index( - pd_df, - columns=list(output_schema.keys()) - + ["blob_col", "prompt", "full_response", "status"], - index=2, - col_exact=False, - ) From c73abe73f13a2860551050040311e433cb949a5a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 16 Apr 2026 20:19:59 +0000 Subject: [PATCH 10/39] fix tests related to blob api deprecation --- .../tests/system/small/bigquery/test_ai.py | 53 +++++++++++++------ .../system/small/pandas/test_describe.py | 27 ++++++++-- .../tests/system/small/test_dataframe.py | 31 +++++++++-- .../tests/system/small/test_dataframe_io.py | 36 ++++++++++--- .../sqlglot/expressions/test_blob_ops.py | 16 ------ 5 files changed, 118 insertions(+), 45 deletions(-) diff --git a/packages/bigframes/tests/system/small/bigquery/test_ai.py b/packages/bigframes/tests/system/small/bigquery/test_ai.py index 16e9cca9f136..f475f3780775 100644 --- a/packages/bigframes/tests/system/small/bigquery/test_ai.py +++ b/packages/bigframes/tests/system/small/bigquery/test_ai.py @@ -22,6 +22,31 @@ import bigframes.pandas as bpd from bigframes import dataframe, dtypes, series from bigframes.testing import utils as test_utils +import uuid +import google.cloud.bigquery + + +def _create_mock_obj_ref_df(session, uris, name="image"): + df = bpd.DataFrame({name: uris}, session=session) + table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}" + df.to_gbq(table_id, if_exists="replace") + + client = session.bqclient + table = client.get_table(table_id) + schema = list(table.schema) + for i, field in enumerate(schema): + if field.name == name: + schema[i] = google.cloud.bigquery.SchemaField( + name=field.name, + field_type=field.field_type, + mode=field.mode, + description="bigframes_dtype: OBJ_REF_DTYPE", + ) + break + table.schema = schema + client.update_table(table, ["schema"]) + + return session.read_gbq(table_id) def test_ai_function_pandas_input(session): @@ -159,8 +184,8 @@ def test_ai_generate_bool(session): def test_ai_generate_bool_multi_model(session): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.generate_bool((df["image"], " contains an animal")) @@ -196,8 +221,8 @@ def test_ai_generate_int(session): def test_ai_generate_int_multi_model(session): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.generate_int( @@ -235,8 +260,8 @@ def test_ai_generate_double(session): def test_ai_generate_double_multi_model(session): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.generate_double( @@ -267,10 +292,8 @@ def test_ai_if(session): def test_ai_if_multi_model(session, bq_connection): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", - name="image", - connection=bq_connection, + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.if_((df["image"], " contains an animal")) @@ -289,10 +312,8 @@ def test_ai_classify(session): def test_ai_classify_multi_model(session, bq_connection): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", - name="image", - connection=bq_connection, + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.classify(df["image"], ["photo", "cartoon"]) @@ -312,8 +333,8 @@ def test_ai_score(session): def test_ai_score_multi_model(session): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) prompt = ("Rank the liveliness of ", df["image"], "on the scale from 1 to 3") diff --git a/packages/bigframes/tests/system/small/pandas/test_describe.py b/packages/bigframes/tests/system/small/pandas/test_describe.py index b8e427c10ea8..0db25ecd60c9 100644 --- a/packages/bigframes/tests/system/small/pandas/test_describe.py +++ b/packages/bigframes/tests/system/small/pandas/test_describe.py @@ -358,15 +358,36 @@ def test_series_groupby_describe(scalars_dfs): def test_describe_json_and_obj_ref_returns_count(session): # Test describe() works on JSON and OBJ_REF types (without nunique, which fails) + import uuid + import google.cloud.bigquery + sql = """ SELECT PARSE_JSON('{"a": 1}') AS json_col, 'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col """ - df = session.read_gbq(sql) + df_init = session.read_gbq(sql) + + table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}" + df_init.to_gbq(table_id, if_exists="replace") + + client = session.bqclient + table = client.get_table(table_id) + schema = list(table.schema) + for i, field in enumerate(schema): + if field.name == "uri_col": + schema[i] = google.cloud.bigquery.SchemaField( + name=field.name, + field_type=field.field_type, + mode=field.mode, + description="bigframes_dtype: OBJ_REF_DTYPE", + ) + break + table.schema = schema + client.update_table(table, ["schema"]) - df["obj_ref_col"] = df["uri_col"].str.to_blob() - df = df.drop(columns=["uri_col"]) + df = session.read_gbq(table_id) + df = df.rename(columns={"uri_col": "obj_ref_col"}) res = df.describe(include="all").to_pandas() diff --git a/packages/bigframes/tests/system/small/test_dataframe.py b/packages/bigframes/tests/system/small/test_dataframe.py index 8df13a5bcbda..a4b290a5d242 100644 --- a/packages/bigframes/tests/system/small/test_dataframe.py +++ b/packages/bigframes/tests/system/small/test_dataframe.py @@ -5919,9 +5919,34 @@ def test_to_gbq_table_labels(scalars_df_index): def test_to_gbq_obj_ref_persists(session): # Test that saving and loading an Object Reference retains its dtype - bdf = session.from_glob_path( - "gs://cloud-samples-data/vision/ocr/*.jpg", name="uris" - ).head(1) + import uuid + import google.cloud.bigquery + + sql = """ + SELECT STRUCT('gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri, CAST(NULL AS STRING) AS version, CAST(NULL AS STRING) AS authorizer, PARSE_JSON('{}') AS details) AS uris + """ + df_init = session.read_gbq(sql) + + tmp_table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}" + df_init.to_gbq(tmp_table_id, if_exists="replace") + + client = session.bqclient + table = client.get_table(tmp_table_id) + schema = list(table.schema) + for i, field in enumerate(schema): + if field.name == "uris": + schema[i] = google.cloud.bigquery.SchemaField( + name=field.name, + field_type=field.field_type, + mode=field.mode, + description="bigframes_dtype: OBJ_REF_DTYPE", + fields=field.fields, + ) + break + table.schema = schema + client.update_table(table, ["schema"]) + + bdf = session.read_gbq(tmp_table_id) destination_table = "bigframes-dev.bigframes_tests_sys.test_obj_ref_persistence" bdf.to_gbq(destination_table, if_exists="replace") diff --git a/packages/bigframes/tests/system/small/test_dataframe_io.py b/packages/bigframes/tests/system/small/test_dataframe_io.py index 4db606afb749..1771b6485a25 100644 --- a/packages/bigframes/tests/system/small/test_dataframe_io.py +++ b/packages/bigframes/tests/system/small/test_dataframe_io.py @@ -1010,16 +1010,38 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client): + import uuid + import google.cloud.bigquery + destination_table = f"{dataset_id}.test_to_gbq_obj_ref" sql = """ - SELECT - 'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col + SELECT STRUCT('gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri, CAST(NULL AS STRING) AS version, CAST(NULL AS STRING) AS authorizer, PARSE_JSON('{}') AS details) AS uri_col """ - df = session.read_gbq(sql) - df["obj_ref_col"] = df["uri_col"].str.to_blob() - df = df.drop(columns=["uri_col"]) - - df.to_gbq(destination_table) + df_init = session.read_gbq(sql) + + tmp_table_id = f"{dataset_id}.tmp_obj_ref_{uuid.uuid4().hex}" + df_init.to_gbq(tmp_table_id, if_exists="replace") + + client = session.bqclient + table = client.get_table(tmp_table_id) + schema = list(table.schema) + for i, field in enumerate(schema): + if field.name == "uri_col": + schema[i] = google.cloud.bigquery.SchemaField( + name=field.name, + field_type=field.field_type, + mode=field.mode, + description="bigframes_dtype: OBJ_REF_DTYPE", + fields=field.fields, + ) + break + table.schema = schema + client.update_table(table, ["schema"]) + + df = session.read_gbq(tmp_table_id) + df = df.rename(columns={"uri_col": "obj_ref_col"}) + + df.to_gbq(destination_table, if_exists="replace") table = bigquery_client.get_table(destination_table) obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col") diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py index 502490404db9..33e656fd83b0 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -38,19 +38,3 @@ def test_blob_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snaps [col_name], ) snapshot.assert_match(sql, "out.sql") - - -def test_blob_make_ref(scalar_types_df: bpd.DataFrame, snapshot): - ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection") - snapshot.assert_match(ref_s.to_frame().sql, "out.sql") - - -def test_blob_make_ref_json(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, - [ops.obj_make_ref_json_op.as_expr(col_name)], - [col_name], - ) - snapshot.assert_match(sql, "out.sql") From 2ce7667fd6ff3da8d1fcf72ffe4293f852608fc9 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 16 Apr 2026 21:28:49 +0000 Subject: [PATCH 11/39] test: update blob ops snapshots --- .../snapshots/test_blob_ops/test_blob_get_access_url/out.sql | 2 +- .../test_blob_get_access_url_with_duration/out.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql index 78bd19c32483..6b21b68cb617 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql @@ -1,4 +1,4 @@ SELECT `rowindex`, OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`string_col`, 'my-connection'), 'r') AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql index ac2999e386d5..2e8b60230faa 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql @@ -1,3 +1,3 @@ SELECT OBJ.GET_ACCESS_URL(`string_col`, 'READ', INTERVAL 3600 MICROSECOND) AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file From 8cc3a1412db268b0889cea5c79e7e1ad894e4709 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 22 Apr 2026 19:45:17 +0000 Subject: [PATCH 12/39] refactor: remove blob references --- packages/bigframes/bigframes/dataframe.py | 17 ++------- packages/bigframes/bigframes/display/html.py | 36 +++----------------- packages/bigframes/bigframes/series.py | 5 --- 3 files changed, 7 insertions(+), 51 deletions(-) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index a6fe9a3fe244..ee6bf9c863ff 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -820,21 +820,8 @@ def __repr__(self) -> str: ) def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: - """Process ObjectRef columns for display.""" - df = self - blob_cols = [] - if bigframes.options.display.blob_display: - blob_cols = [ - series_name - for series_name, series in self.items() - if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE - ] - if blob_cols: - df = self.copy() - for col in blob_cols: - # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. - df[col] = df[col]._blob._get_runtime(mode="R", with_metadata=True) - return df, blob_cols + """Process ObjectRef columns for display. (Deprecated)""" + return self, [] def _repr_mimebundle_(self, include=None, exclude=None): """ diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index e9b91afa8e77..7dfb34693cd5 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -189,7 +189,6 @@ def create_html_representation( pandas_df: pd.DataFrame, total_rows: int, total_columns: int, - blob_cols: list[str], ) -> str: """Create an HTML representation of the DataFrame or Series.""" from bigframes.series import Series @@ -197,12 +196,8 @@ def create_html_representation( opts = options.display with display_options.pandas_repr(opts): if isinstance(obj, Series): - # Some pandas objects may not have a _repr_html_ method, or it might - # fail in certain environments. We fall back to a pre-formatted - # string representation to ensure something is always displayed. pd_series = pandas_df.iloc[:, 0] try: - # TODO(b/464053870): Support rich display for blob Series. html_string = pd_series._repr_html_() except AttributeError: html_string = f"
{pd_series.to_string()}
" @@ -212,26 +207,8 @@ def create_html_representation( html_string += f"

[{total_rows} rows]

" return html_string else: - # It's a DataFrame - # TODO(shuowei, b/464053870): Escaping HTML would be useful, but - # `escape=False` is needed to show images. We may need to implement - # a full-fledged repr module to better support types not in pandas. - if options.display.blob_display and blob_cols: - formatters = {blob_col: _obj_ref_rt_to_html for blob_col in blob_cols} - - # set max_colwidth so not to truncate the image url - with pandas.option_context("display.max_colwidth", None): - html_string = pandas_df.to_html( - escape=False, - notebook=True, - max_rows=pandas.get_option("display.max_rows"), - max_cols=pandas.get_option("display.max_columns"), - show_dimensions=pandas.get_option("display.show_dimensions"), - formatters=formatters, # type: ignore - ) - else: - # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. - html_string = pandas_df._repr_html_() # type:ignore + # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. + html_string = pandas_df._repr_html_() # type:ignore html_string += f"[{total_rows} rows x {total_columns} columns in total]" return html_string @@ -265,7 +242,7 @@ def get_anywidget_bundle( if isinstance(obj, Series): df = obj.to_frame() else: - df, blob_cols = obj._get_display_df_and_blob_cols() + df, _ = obj._get_display_df_and_blob_cols() widget = display.TableWidget(df) widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) @@ -288,7 +265,6 @@ def get_anywidget_bundle( cached_pd, total_rows, total_columns, - blob_cols if "blob_cols" in locals() else [], ) is_series, has_index = _get_obj_metadata(obj) widget_repr["text/plain"] = plaintext.create_text_representation( @@ -317,14 +293,12 @@ def repr_mimebundle_head( from bigframes.series import Series opts = options.display - blob_cols: list[str] if isinstance(obj, Series): pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results( opts.max_rows ) - blob_cols = [] else: - df, blob_cols = obj._get_display_df_and_blob_cols() + df, _ = obj._get_display_df_and_blob_cols() pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( opts.max_rows ) @@ -333,7 +307,7 @@ def repr_mimebundle_head( column_count = len(pandas_df.columns) html_string = create_html_representation( - obj, pandas_df, row_count, column_count, blob_cols + obj, pandas_df, row_count, column_count ) is_series, has_index = _get_obj_metadata(obj) diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index 75d9300d2a9b..a5303a2002a0 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -68,7 +68,6 @@ import bigframes.functions import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops -import bigframes.operations.blob as blob import bigframes.operations.lists as lists import bigframes.operations.plotting as plotting import bigframes.operations.python_op_maps as python_ops @@ -320,10 +319,6 @@ def struct(self) -> structs.StructAccessor: def list(self) -> lists.ListAccessor: return lists.ListAccessor(self) - @property - def _blob(self) -> blob._BlobAccessor: - return blob._BlobAccessor(self) - @property @validations.requires_ordering() def T(self) -> Series: From 7c92e4f2a23889a8f90e577c705a11dacd449ccf Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 24 Apr 2026 21:14:15 +0000 Subject: [PATCH 13/39] chore: update tests, ml, fix lint, and update hooks --- .pre-commit-config.yaml | 5 +- packages/bigframes/bigframes/display/html.py | 4 +- packages/bigframes/bigframes/ml/llm.py | 4 +- .../tests/system/large/bigquery/test_obj.py | 41 ------ .../tests/system/small/bigquery/test_ai.py | 4 +- .../system/small/pandas/test_describe.py | 1 + .../tests/system/small/test_dataframe.py | 1 + .../tests/system/small/test_dataframe_io.py | 1 + .../bigframes/tests/unit/bigquery/test_obj.py | 125 ------------------ .../sqlglot/expressions/test_blob_ops.py | 40 ------ 10 files changed, 11 insertions(+), 215 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/bigquery/test_obj.py delete mode 100644 packages/bigframes/tests/unit/bigquery/test_obj.py delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5405cc8ff1f3..f0f656999078 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,10 +22,11 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 24.3.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 + rev: 7.0.0 hooks: - id: flake8 + args: ["--ignore=E501", "--ignore=W503"] diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index 7dfb34693cd5..f067a6e11f1e 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -306,9 +306,7 @@ def repr_mimebundle_head( obj._set_internal_query_job(query_job) column_count = len(pandas_df.columns) - html_string = create_html_representation( - obj, pandas_df, row_count, column_count - ) + html_string = create_html_representation(obj, pandas_df, row_count, column_count) is_series, has_index = _get_obj_metadata(obj) text_representation = plaintext.create_text_representation( diff --git a/packages/bigframes/bigframes/ml/llm.py b/packages/bigframes/bigframes/ml/llm.py index d9e228c90c9f..3887453a2239 100644 --- a/packages/bigframes/bigframes/ml/llm.py +++ b/packages/bigframes/bigframes/ml/llm.py @@ -397,7 +397,7 @@ def predict( # TODO(garrettwu): remove transform to ObjRefRuntime when BQML supports ObjRef as input if X["content"].dtype == dtypes.OBJ_REF_DTYPE: - X["content"] = X["content"]._blob._get_runtime("R", with_metadata=True) + X["content"] = bbq.obj.get_access_url(X["content"], mode="r") options: dict = {} @@ -731,7 +731,7 @@ def predict( isinstance(item, bigframes.series.Series) and item.dtype == dtypes.OBJ_REF_DTYPE ): - item = item._blob._get_runtime("R", with_metadata=True) + item = bbq.obj.get_access_url(item, mode="r") df_prompt[label] = item df_prompt = df_prompt.drop(columns="bigframes_placeholder_col") diff --git a/packages/bigframes/tests/system/large/bigquery/test_obj.py b/packages/bigframes/tests/system/large/bigquery/test_obj.py deleted file mode 100644 index dcca7580b143..000000000000 --- a/packages/bigframes/tests/system/large/bigquery/test_obj.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.bigquery as bbq - - -@pytest.fixture() -def objectrefs(bq_connection): - return bbq.obj.make_ref( - [ - "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/tick-terminator-for-dogs.png" - ], - bq_connection, - ) - - -def test_obj_fetch_metadata(objectrefs): - metadata = bbq.obj.fetch_metadata(objectrefs) - - result = metadata.to_pandas() - assert len(result) == len(objectrefs) - - -def test_obj_get_access_url(objectrefs): - access = bbq.obj.get_access_url(objectrefs, "r") - - result = access.to_pandas() - assert len(result) == len(objectrefs) diff --git a/packages/bigframes/tests/system/small/bigquery/test_ai.py b/packages/bigframes/tests/system/small/bigquery/test_ai.py index f475f3780775..8563a894a7fa 100644 --- a/packages/bigframes/tests/system/small/bigquery/test_ai.py +++ b/packages/bigframes/tests/system/small/bigquery/test_ai.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import uuid from unittest import mock +import google.cloud.bigquery import pandas as pd import pyarrow as pa import pytest @@ -22,8 +24,6 @@ import bigframes.pandas as bpd from bigframes import dataframe, dtypes, series from bigframes.testing import utils as test_utils -import uuid -import google.cloud.bigquery def _create_mock_obj_ref_df(session, uris, name="image"): diff --git a/packages/bigframes/tests/system/small/pandas/test_describe.py b/packages/bigframes/tests/system/small/pandas/test_describe.py index 0db25ecd60c9..beb7a1968fc2 100644 --- a/packages/bigframes/tests/system/small/pandas/test_describe.py +++ b/packages/bigframes/tests/system/small/pandas/test_describe.py @@ -359,6 +359,7 @@ def test_series_groupby_describe(scalars_dfs): def test_describe_json_and_obj_ref_returns_count(session): # Test describe() works on JSON and OBJ_REF types (without nunique, which fails) import uuid + import google.cloud.bigquery sql = """ diff --git a/packages/bigframes/tests/system/small/test_dataframe.py b/packages/bigframes/tests/system/small/test_dataframe.py index 8555c2a84897..a109c33ffa66 100644 --- a/packages/bigframes/tests/system/small/test_dataframe.py +++ b/packages/bigframes/tests/system/small/test_dataframe.py @@ -5944,6 +5944,7 @@ def test_to_gbq_table_labels(scalars_df_index): def test_to_gbq_obj_ref_persists(session): # Test that saving and loading an Object Reference retains its dtype import uuid + import google.cloud.bigquery sql = """ diff --git a/packages/bigframes/tests/system/small/test_dataframe_io.py b/packages/bigframes/tests/system/small/test_dataframe_io.py index 1771b6485a25..ef21e929afa3 100644 --- a/packages/bigframes/tests/system/small/test_dataframe_io.py +++ b/packages/bigframes/tests/system/small/test_dataframe_io.py @@ -1011,6 +1011,7 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client): import uuid + import google.cloud.bigquery destination_table = f"{dataset_id}.test_to_gbq_obj_ref" diff --git a/packages/bigframes/tests/unit/bigquery/test_obj.py b/packages/bigframes/tests/unit/bigquery/test_obj.py deleted file mode 100644 index 9eac234b8bc3..000000000000 --- a/packages/bigframes/tests/unit/bigquery/test_obj.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -from unittest import mock - -import bigframes.bigquery.obj as obj -import bigframes.operations as ops -import bigframes.series - - -def create_mock_series(): - result = mock.create_autospec(bigframes.series.Series, instance=True) - result.copy.return_value = result - return result - - -def test_fetch_metadata_op_structure(): - op = ops.obj_fetch_metadata_op - assert op.name == "obj_fetch_metadata" - - -def test_get_access_url_op_structure(): - op = ops.ObjGetAccessUrl(mode="r") - assert op.name == "obj_get_access_url" - assert op.mode == "r" - assert op.duration is None - - -def test_get_access_url_with_duration_op_structure(): - op = ops.ObjGetAccessUrl(mode="rw", duration=3600000000) - assert op.name == "obj_get_access_url" - assert op.mode == "rw" - assert op.duration == 3600000000 - - -def test_make_ref_op_structure(): - op = ops.obj_make_ref_op - assert op.name == "obj_make_ref" - - -def test_make_ref_json_op_structure(): - op = ops.obj_make_ref_json_op - assert op.name == "obj_make_ref_json" - - -def test_fetch_metadata_calls_apply_unary_op(): - series = create_mock_series() - - obj.fetch_metadata(series) - - series._apply_unary_op.assert_called_once() - args, _ = series._apply_unary_op.call_args - assert args[0] == ops.obj_fetch_metadata_op - - -def test_get_access_url_calls_apply_unary_op_without_duration(): - series = create_mock_series() - - obj.get_access_url(series, mode="r") - - series._apply_unary_op.assert_called_once() - args, _ = series._apply_unary_op.call_args - assert isinstance(args[0], ops.ObjGetAccessUrl) - assert args[0].mode == "r" - assert args[0].duration is None - - -def test_get_access_url_calls_apply_unary_op_with_duration(): - series = create_mock_series() - duration = datetime.timedelta(hours=1) - - obj.get_access_url(series, mode="rw", duration=duration) - - series._apply_unary_op.assert_called_once() - args, _ = series._apply_unary_op.call_args - assert isinstance(args[0], ops.ObjGetAccessUrl) - assert args[0].mode == "rw" - # 1 hour = 3600 seconds = 3600 * 1000 * 1000 microseconds - assert args[0].duration == 3600000000 - - -def test_make_ref_calls_apply_binary_op_with_authorizer(): - uri = create_mock_series() - auth = create_mock_series() - - obj.make_ref(uri, authorizer=auth) - - uri._apply_binary_op.assert_called_once() - args, _ = uri._apply_binary_op.call_args - assert args[0] == auth - assert args[1] == ops.obj_make_ref_op - - -def test_make_ref_calls_apply_binary_op_with_authorizer_string(): - uri = create_mock_series() - auth = "us.bigframes-test-connection" - - obj.make_ref(uri, authorizer=auth) - - uri._apply_binary_op.assert_called_once() - args, _ = uri._apply_binary_op.call_args - assert args[0] == auth - assert args[1] == ops.obj_make_ref_op - - -def test_make_ref_calls_apply_unary_op_without_authorizer(): - json_val = create_mock_series() - - obj.make_ref(json_val) - - json_val._apply_unary_op.assert_called_once() - args, _ = json_val._apply_unary_op.call_args - assert args[0] == ops.obj_make_ref_json_op diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py deleted file mode 100644 index 33e656fd83b0..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.bigquery.obj as obj -import bigframes.pandas as bpd -from bigframes import operations as ops -from bigframes.testing import utils - -pytest.importorskip("pytest_snapshot") - - -def test_blob_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): - ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection") - url_s = obj.get_access_url(ref_s, mode="r") - sql = url_s.to_frame().sql - snapshot.assert_match(sql, "out.sql") - - -def test_blob_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, - [ops.ObjGetAccessUrl(mode="READ", duration=3600).as_expr(col_name)], - [col_name], - ) - snapshot.assert_match(sql, "out.sql") From 0ce4ea7cae285f757df7a0a74f579a6bb1e50cb3 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 24 Apr 2026 21:26:54 +0000 Subject: [PATCH 14/39] revert: restore .pre-commit-config.yaml to original state --- .pre-commit-config.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f0f656999078..5405cc8ff1f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,11 +22,10 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 24.3.0 + rev: 22.3.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 7.0.0 + rev: 3.9.2 hooks: - id: flake8 - args: ["--ignore=E501", "--ignore=W503"] From 1ba46c0c794f2ad2b3d000604ed52c0211488353 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 27 Apr 2026 20:34:45 +0000 Subject: [PATCH 15/39] feat: add _from_glob_path and update tests --- packages/bigframes/bigframes/session/__init__.py | 10 ++++++++++ .../bigframes/tests/system/large/operations/test_ai.py | 4 ++-- .../tests/system/large/operations/test_semantics.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index af51bdc3680b..339ab165117e 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -2309,6 +2309,16 @@ def _start_query_ml_ddl( ) return iterator, query_job + def _from_glob_path( + self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None + ) -> dataframe.DataFrame: + """Create a BigFrames DataFrame that contains a BigFrames ObjectRef column from a global wildcard path.""" + import bigframes.bigquery as bq + connection = self._create_bq_connection(connection=connection) + table = self._create_object_table(path, connection) + s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection) + return s.rename(name).to_frame() + def _create_object_table(self, path: str, connection: str) -> str: """Create a random id Object Table from the input path and connection.""" table = str(self._anon_dataset_manager.generate_unique_resource_id()) diff --git a/packages/bigframes/tests/system/large/operations/test_ai.py b/packages/bigframes/tests/system/large/operations/test_ai.py index 57aa1966cbef..9c644e9c6b8a 100644 --- a/packages/bigframes/tests/system/large/operations/test_ai.py +++ b/packages/bigframes/tests/system/large/operations/test_ai.py @@ -64,7 +64,7 @@ def test_filter_multi_model(session, gemini_flash_model): THRESHOLD_OPTION, 10, ): - df = session.from_glob_path( + df = session._from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" ) df["prey"] = series.Series( @@ -238,7 +238,7 @@ def test_map_multimodel(session, gemini_flash_model): THRESHOLD_OPTION, 10, ): - df = session.from_glob_path( + df = session._from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" ) df["scenario"] = series.Series( diff --git a/packages/bigframes/tests/system/large/operations/test_semantics.py b/packages/bigframes/tests/system/large/operations/test_semantics.py index 56c90212e99c..983328f11b01 100644 --- a/packages/bigframes/tests/system/large/operations/test_semantics.py +++ b/packages/bigframes/tests/system/large/operations/test_semantics.py @@ -411,7 +411,7 @@ def test_filter_multi_model(session, gemini_flash_model): THRESHOLD_OPTION, 10, ): - df = session.from_glob_path( + df = session._from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" ) df["prey"] = series.Series( @@ -580,7 +580,7 @@ def test_map_multimodel(session, gemini_flash_model): THRESHOLD_OPTION, 10, ): - df = session.from_glob_path( + df = session._from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" ) df["scenario"] = series.Series( From 0ad748256fcd4a49d8a85ecb49f6c719137e5b7e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 28 Apr 2026 22:36:41 +0000 Subject: [PATCH 16/39] revert change --- .../bigframes/bigframes/pandas/__init__.py | 6 + packages/bigframes/bigframes/pandas/io/api.py | 28 ++++ .../bigframes/bigframes/session/__init__.py | 62 ++++++++ .../tests/system/small/blob/test_io.py | 132 ++++++++++++++++++ .../system/small/blob/test_properties.py | 120 ++++++++++++++++ .../tests/system/small/blob/test_urls.py | 34 +++++ .../bigframes/tests/unit/bigquery/test_obj.py | 125 +++++++++++++++++ 7 files changed, 507 insertions(+) create mode 100644 packages/bigframes/tests/system/small/blob/test_io.py create mode 100644 packages/bigframes/tests/system/small/blob/test_properties.py create mode 100644 packages/bigframes/tests/system/small/blob/test_urls.py create mode 100644 packages/bigframes/tests/unit/bigquery/test_obj.py diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index 11938a887785..58833284ae96 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -99,12 +99,14 @@ from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( _read_gbq_colab, + from_glob_path, read_arrow, read_avro, read_csv, read_gbq, read_gbq_function, read_gbq_model, + read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -453,6 +455,7 @@ def reset_session(): _read_gbq_colab, read_gbq_function, read_gbq_model, + read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -463,6 +466,7 @@ def reset_session(): remote_function, to_datetime, to_timedelta, + from_glob_path, ] # Use __all__ to let type checkers know what is part of the public API. @@ -488,6 +492,7 @@ def reset_session(): "_read_gbq_colab", "read_gbq_function", "read_gbq_model", + "read_gbq_object_table", "read_gbq_query", "read_gbq_table", "read_json", @@ -498,6 +503,7 @@ def reset_session(): "remote_function", "to_datetime", "to_timedelta", + "from_glob_path", # Other names "api", # pandas dtype attributes diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index e2737fdbbd1a..b7ed1a65d922 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -394,6 +394,21 @@ def read_gbq_model(model_name: str): read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) +def read_gbq_object_table( + object_table: str, *, name: Optional[str] = None +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session.read_gbq_object_table, + object_table, + name=name, + ) + + +read_gbq_object_table.__doc__ = inspect.getdoc( + bigframes.session.Session.read_gbq_object_table +) + + @overload def read_gbq_query( # type: ignore[overload-overlap] query: str, @@ -620,6 +635,19 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) +def from_glob_path( + path: str, *, connection: Optional[str] = None, name: Optional[str] = None +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session.from_glob_path, + path=path, + connection=connection, + name=name, + ) + + +from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session.from_glob_path) + _default_location_lock = threading.Lock() diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 339ab165117e..33745b6438ed 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -2349,6 +2349,68 @@ def _create_object_table(self, path: str, connection: str) -> str: return table + def from_glob_path( + self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None + ) -> dataframe.DataFrame: + r"""Create a BigFrames DataFrame that contains a BigFrames `ObjectRef column `_ from a global wildcard path. + This operation creates a temporary BQ Object Table under the hood and requires bigquery.connections.delegate permission or BigQuery Connection Admin role. + If you have an existing BQ Object Table, use read_gbq_object_table(). + + .. note:: + BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the + Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" + and might have limited support. For more information, see the launch stage descriptions + (https://cloud.google.com/products#product-launch-stages). + + Args: + path (str): + The wildcard global path, such as "gs:////\*". + connection (str or None, default None): + Connection to connect with remote service. str of the format ... + If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach + permission if the connection isn't fully set up. + name (str): + The column name of the ObjectRef column. + Returns: + bigframes.pandas.DataFrame: + Result BigFrames DataFrame. + """ + import bigframes.bigquery as bq + connection = self._create_bq_connection(connection=connection) + + table = self._create_object_table(path, connection) + + s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection) + return s.rename(name).to_frame() + + def read_gbq_object_table( + self, object_table: str, *, name: Optional[str] = None + ) -> dataframe.DataFrame: + """Read an existing object table to create a BigFrames `ObjectRef `_ DataFrame. Use the connection of the object table for the connection of the ObjectRef. + This function dosen't retrieve the object table data. If you want to read the data, use read_gbq() instead. + + .. note:: + BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the + Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" + and might have limited support. For more information, see the launch stage descriptions + (https://cloud.google.com/products#product-launch-stages). + + Args: + object_table (str): name of the object table of form ... + name (str or None): the returned ObjectRef column name. + + Returns: + bigframes.pandas.DataFrame: + Result BigFrames DataFrame. + """ + import bigframes.bigquery as bq + # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. + table = self.bqclient.get_table(object_table) + connection = table._properties["externalDataConfiguration"]["connectionId"] + + s = bq.obj.make_ref(self._loader.read_gbq_table(object_table)["uri"], authorizer=connection) + return s.rename(name).to_frame() + def _create_temp_view(self, sql: str) -> bigquery.TableReference: """Create a random id view from the sql string.""" return self._anon_dataset_manager.create_temp_view(sql) diff --git a/packages/bigframes/tests/system/small/blob/test_io.py b/packages/bigframes/tests/system/small/blob/test_io.py new file mode 100644 index 000000000000..c89fb4c6e6ed --- /dev/null +++ b/packages/bigframes/tests/system/small/blob/test_io.py @@ -0,0 +1,132 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pandas as pd +import pytest + +import bigframes +import bigframes.pandas as bpd + +pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) + + +idisplay = pytest.importorskip("IPython.display") + + +def test_blob_create_from_uri_str( + bq_connection: str, session: bigframes.Session, images_uris +): + uri_series = bpd.Series(images_uris, session=session) + blob_series = uri_series.str.to_blob(connection=bq_connection) + + pd_blob_df = blob_series.struct.explode().to_pandas() + expected_pd_df = pd.DataFrame( + { + "uri": images_uris, + "version": [None, None], + "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "details": [None, None], + } + ) + + pd.testing.assert_frame_equal( + pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False + ) + + +def test_blob_create_from_glob_path( + bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris +): + blob_df = session.from_glob_path( + images_gcs_path, connection=bq_connection, name="blob_col" + ) + pd_blob_df = ( + blob_df["blob_col"] + .struct.explode() + .to_pandas() + .sort_values("uri") + .reset_index(drop=True) + ) + + expected_df = pd.DataFrame( + { + "uri": images_uris, + "version": [None, None], + "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "details": [None, None], + } + ) + + pd.testing.assert_frame_equal( + pd_blob_df, expected_df, check_dtype=False, check_index_type=False + ) + + +def test_blob_create_read_gbq_object_table( + bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris +): + obj_table = session._create_object_table(images_gcs_path, bq_connection) + + blob_df = session.read_gbq_object_table(obj_table, name="blob_col") + pd_blob_df = ( + blob_df["blob_col"] + .struct.explode() + .to_pandas() + .sort_values("uri") + .reset_index(drop=True) + ) + expected_df = pd.DataFrame( + { + "uri": images_uris, + "version": [None, None], + "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "details": [None, None], + } + ) + + pd.testing.assert_frame_equal( + pd_blob_df, expected_df, check_dtype=False, check_index_type=False + ) + + +def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame): + mock_display = mock.Mock() + monkeypatch.setattr(idisplay, "display", mock_display) + + images_mm_df["blob_col"].blob.display() + + for call in mock_display.call_args_list: + args, _ = call + arg = args[0] + assert isinstance(arg, idisplay.Image) + + +def test_display_nulls( + monkeypatch, + bq_connection: str, + session: bigframes.Session, +): + uri_series = bpd.Series([None, None, None], dtype="string", session=session) + blob_series = uri_series.str.to_blob(connection=bq_connection) + mock_display = mock.Mock() + monkeypatch.setattr(idisplay, "display", mock_display) + + blob_series.blob.display() + + for call in mock_display.call_args_list: + args, _ = call + arg = args[0] + assert arg == "" diff --git a/packages/bigframes/tests/system/small/blob/test_properties.py b/packages/bigframes/tests/system/small/blob/test_properties.py new file mode 100644 index 000000000000..ec18f05462d3 --- /dev/null +++ b/packages/bigframes/tests/system/small/blob/test_properties.py @@ -0,0 +1,120 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +import pytest + +import bigframes.dtypes as dtypes +import bigframes.pandas as bpd +import bigframes.bigquery as bbq + +pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) + + +def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame): + actual = images_mm_df["blob_col"].struct.field("uri").to_pandas() + expected = pd.Series(images_uris, name="uri") + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): + actual = images_mm_df["blob_col"].struct.field("authorizer").to_pandas() + expected = pd.Series( + [bq_connection.casefold(), bq_connection.casefold()], name="authorizer" + ) + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_version(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.version").to_pandas() + expected = pd.Series(["1753907851152593", "1753907851111538"], name="version") + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_metadata(images_mm_df: bpd.DataFrame): + actual = images_mm_df["blob_col"].struct.field("details").to_pandas() + expected = pd.Series( + [ + ( + '{"content_type":"image/jpeg",' + '"md5_hash":"e130ad042261a1883cd2cc06831cf748",' + '"size":338390,' + '"updated":1753907851000000}' + ), + ( + '{"content_type":"image/jpeg",' + '"md5_hash":"e2ae3191ff2b809fd0935f01a537c650",' + '"size":43333,' + '"updated":1753907851000000}' + ), + ], + name="metadata", + dtype=dtypes.JSON_DTYPE, + ) + expected.index = expected.index.astype(dtypes.INT_DTYPE) + pd.testing.assert_series_equal(actual, expected) + + +def test_blob_content_type(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.content_type").to_pandas() + expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type") + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_md5_hash(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.md5_hash").to_pandas() + expected = pd.Series( + ["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"], + name="md5_hash", + ) + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_size(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.size").astype("Int64").to_pandas() + expected = pd.Series([338390, 43333], name="size") + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_updated(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.updated").to_pandas() + expected = pd.Series( + [ + pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), + pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), + ], + name="updated", + ) + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) diff --git a/packages/bigframes/tests/system/small/blob/test_urls.py b/packages/bigframes/tests/system/small/blob/test_urls.py new file mode 100644 index 000000000000..0a2db23db20d --- /dev/null +++ b/packages/bigframes/tests/system/small/blob/test_urls.py @@ -0,0 +1,34 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import bigframes.pandas as bpd +import bigframes.bigquery as bbq + +pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) + + +def test_blob_read_url(images_mm_df: bpd.DataFrame): + access_json = bbq.obj.get_access_url(images_mm_df["blob_col"], mode="r") + urls = bbq.json_value(access_json, "$.access_urls.read_url") + + assert urls.str.startswith("https://storage.googleapis.com/").all() + + +def test_blob_write_url(images_mm_df: bpd.DataFrame): + access_json = bbq.obj.get_access_url(images_mm_df["blob_col"], mode="rw") + urls = bbq.json_value(access_json, "$.access_urls.write_url") + + assert urls.str.startswith("https://storage.googleapis.com/").all() diff --git a/packages/bigframes/tests/unit/bigquery/test_obj.py b/packages/bigframes/tests/unit/bigquery/test_obj.py new file mode 100644 index 000000000000..9eac234b8bc3 --- /dev/null +++ b/packages/bigframes/tests/unit/bigquery/test_obj.py @@ -0,0 +1,125 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +from unittest import mock + +import bigframes.bigquery.obj as obj +import bigframes.operations as ops +import bigframes.series + + +def create_mock_series(): + result = mock.create_autospec(bigframes.series.Series, instance=True) + result.copy.return_value = result + return result + + +def test_fetch_metadata_op_structure(): + op = ops.obj_fetch_metadata_op + assert op.name == "obj_fetch_metadata" + + +def test_get_access_url_op_structure(): + op = ops.ObjGetAccessUrl(mode="r") + assert op.name == "obj_get_access_url" + assert op.mode == "r" + assert op.duration is None + + +def test_get_access_url_with_duration_op_structure(): + op = ops.ObjGetAccessUrl(mode="rw", duration=3600000000) + assert op.name == "obj_get_access_url" + assert op.mode == "rw" + assert op.duration == 3600000000 + + +def test_make_ref_op_structure(): + op = ops.obj_make_ref_op + assert op.name == "obj_make_ref" + + +def test_make_ref_json_op_structure(): + op = ops.obj_make_ref_json_op + assert op.name == "obj_make_ref_json" + + +def test_fetch_metadata_calls_apply_unary_op(): + series = create_mock_series() + + obj.fetch_metadata(series) + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert args[0] == ops.obj_fetch_metadata_op + + +def test_get_access_url_calls_apply_unary_op_without_duration(): + series = create_mock_series() + + obj.get_access_url(series, mode="r") + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert isinstance(args[0], ops.ObjGetAccessUrl) + assert args[0].mode == "r" + assert args[0].duration is None + + +def test_get_access_url_calls_apply_unary_op_with_duration(): + series = create_mock_series() + duration = datetime.timedelta(hours=1) + + obj.get_access_url(series, mode="rw", duration=duration) + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert isinstance(args[0], ops.ObjGetAccessUrl) + assert args[0].mode == "rw" + # 1 hour = 3600 seconds = 3600 * 1000 * 1000 microseconds + assert args[0].duration == 3600000000 + + +def test_make_ref_calls_apply_binary_op_with_authorizer(): + uri = create_mock_series() + auth = create_mock_series() + + obj.make_ref(uri, authorizer=auth) + + uri._apply_binary_op.assert_called_once() + args, _ = uri._apply_binary_op.call_args + assert args[0] == auth + assert args[1] == ops.obj_make_ref_op + + +def test_make_ref_calls_apply_binary_op_with_authorizer_string(): + uri = create_mock_series() + auth = "us.bigframes-test-connection" + + obj.make_ref(uri, authorizer=auth) + + uri._apply_binary_op.assert_called_once() + args, _ = uri._apply_binary_op.call_args + assert args[0] == auth + assert args[1] == ops.obj_make_ref_op + + +def test_make_ref_calls_apply_unary_op_without_authorizer(): + json_val = create_mock_series() + + obj.make_ref(json_val) + + json_val._apply_unary_op.assert_called_once() + args, _ = json_val._apply_unary_op.call_args + assert args[0] == ops.obj_make_ref_json_op From b84baf04d427253739aab90cd0c64dc81110b206 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:02:49 +0000 Subject: [PATCH 17/39] remove apis --- .../bigframes/bigframes/pandas/__init__.py | 6 -- packages/bigframes/bigframes/pandas/io/api.py | 23 ------- .../bigframes/bigframes/session/__init__.py | 60 ------------------- 3 files changed, 89 deletions(-) diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index 58833284ae96..11938a887785 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -99,14 +99,12 @@ from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( _read_gbq_colab, - from_glob_path, read_arrow, read_avro, read_csv, read_gbq, read_gbq_function, read_gbq_model, - read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -455,7 +453,6 @@ def reset_session(): _read_gbq_colab, read_gbq_function, read_gbq_model, - read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -466,7 +463,6 @@ def reset_session(): remote_function, to_datetime, to_timedelta, - from_glob_path, ] # Use __all__ to let type checkers know what is part of the public API. @@ -492,7 +488,6 @@ def reset_session(): "_read_gbq_colab", "read_gbq_function", "read_gbq_model", - "read_gbq_object_table", "read_gbq_query", "read_gbq_table", "read_json", @@ -503,7 +498,6 @@ def reset_session(): "remote_function", "to_datetime", "to_timedelta", - "from_glob_path", # Other names "api", # pandas dtype attributes diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index b7ed1a65d922..3c7692b39037 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -394,20 +394,8 @@ def read_gbq_model(model_name: str): read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) -def read_gbq_object_table( - object_table: str, *, name: Optional[str] = None -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.read_gbq_object_table, - object_table, - name=name, - ) -read_gbq_object_table.__doc__ = inspect.getdoc( - bigframes.session.Session.read_gbq_object_table -) - @overload def read_gbq_query( # type: ignore[overload-overlap] @@ -635,18 +623,7 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) -def from_glob_path( - path: str, *, connection: Optional[str] = None, name: Optional[str] = None -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.from_glob_path, - path=path, - connection=connection, - name=name, - ) - -from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session.from_glob_path) _default_location_lock = threading.Lock() diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 33745b6438ed..1bac3e8a08a5 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -2349,67 +2349,7 @@ def _create_object_table(self, path: str, connection: str) -> str: return table - def from_glob_path( - self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None - ) -> dataframe.DataFrame: - r"""Create a BigFrames DataFrame that contains a BigFrames `ObjectRef column `_ from a global wildcard path. - This operation creates a temporary BQ Object Table under the hood and requires bigquery.connections.delegate permission or BigQuery Connection Admin role. - If you have an existing BQ Object Table, use read_gbq_object_table(). - - .. note:: - BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - Args: - path (str): - The wildcard global path, such as "gs:////\*". - connection (str or None, default None): - Connection to connect with remote service. str of the format ... - If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach - permission if the connection isn't fully set up. - name (str): - The column name of the ObjectRef column. - Returns: - bigframes.pandas.DataFrame: - Result BigFrames DataFrame. - """ - import bigframes.bigquery as bq - connection = self._create_bq_connection(connection=connection) - - table = self._create_object_table(path, connection) - - s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection) - return s.rename(name).to_frame() - - def read_gbq_object_table( - self, object_table: str, *, name: Optional[str] = None - ) -> dataframe.DataFrame: - """Read an existing object table to create a BigFrames `ObjectRef `_ DataFrame. Use the connection of the object table for the connection of the ObjectRef. - This function dosen't retrieve the object table data. If you want to read the data, use read_gbq() instead. - - .. note:: - BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - - Args: - object_table (str): name of the object table of form ... - name (str or None): the returned ObjectRef column name. - - Returns: - bigframes.pandas.DataFrame: - Result BigFrames DataFrame. - """ - import bigframes.bigquery as bq - # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. - table = self.bqclient.get_table(object_table) - connection = table._properties["externalDataConfiguration"]["connectionId"] - - s = bq.obj.make_ref(self._loader.read_gbq_table(object_table)["uri"], authorizer=connection) - return s.rename(name).to_frame() def _create_temp_view(self, sql: str) -> bigquery.TableReference: """Create a random id view from the sql string.""" From 653a1b41fd9d47dff5ee0324bdd0e2f072a7f461 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:05:01 +0000 Subject: [PATCH 18/39] remove tests --- .../tests/system/large/blob/test_function.py | 853 ------------------ .../tests/system/small/blob/test_io.py | 132 --- .../tests/system/small/blob/test_urls.py | 34 - 3 files changed, 1019 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/blob/test_function.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_io.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_urls.py diff --git a/packages/bigframes/tests/system/large/blob/test_function.py b/packages/bigframes/tests/system/large/blob/test_function.py deleted file mode 100644 index e0996db4212a..000000000000 --- a/packages/bigframes/tests/system/large/blob/test_function.py +++ /dev/null @@ -1,853 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import traceback -import uuid -from typing import Generator - -import pandas as pd -import pytest -from google.cloud import storage - -import bigframes -import bigframes.pandas as bpd -from bigframes import dtypes - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -@pytest.fixture(scope="function") -def images_output_folder() -> Generator[str, None, None]: - id = uuid.uuid4().hex - folder = os.path.join("gs://bigframes_blob_test/output/", id) - yield folder - - # clean up - try: - cloud_storage_client = storage.Client() - bucket = cloud_storage_client.bucket("bigframes_blob_test") - blobs = bucket.list_blobs(prefix="output/" + id) - for blob in blobs: - blob.delete() - except Exception as exc: - traceback.print_exception(type(exc), exc, None) - - -@pytest.fixture(scope="function") -def images_output_uris(images_output_folder: str) -> list[str]: - return [ - os.path.join(images_output_folder, "img0.jpg"), - os.path.join(images_output_folder, "img1.jpg"), - ] - - -def test_blob_exif( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=False - ) - expected = bpd.Series( - ['{"ExifOffset": 47, "Make": "MyCamera"}'], - session=session, - dtype=dtypes.JSON_DTYPE, - ) - pd.testing.assert_series_equal( - actual.to_pandas(), - expected.to_pandas(), - check_dtype=False, - check_index_type=False, - ) - - -def test_blob_exif_verbose( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=True - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.JSON_DTYPE - - -def test_blob_image_blur_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=False - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=True - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - # Content should be blob objects for GCS destination - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not content_series.blob.size().isna().any() - - -def test_blob_image_resize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_pdf_extract( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=False, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - actual_text = actual[actual != ""].iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_extract_verbose( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=True, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = successful_results.apply(lambda x: x["content"]).iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=False, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # First entry is NA - actual_text = "".join(actual.dropna()) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=True, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = "".join(successful_results.apply(lambda x: x["content"]).iloc[0]) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.5-flash", - "gemini-2.5-flash-lite", - ], -) -def test_blob_transcribe( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=False, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=False failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in transcribed text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.5-flash", - "gemini-2.5-flash-lite", - ], -) -def test_blob_transcribe_verbose( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=True, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0]["content"] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=True failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in transcribed text. " - ) diff --git a/packages/bigframes/tests/system/small/blob/test_io.py b/packages/bigframes/tests/system/small/blob/test_io.py deleted file mode 100644 index c89fb4c6e6ed..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_io.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest import mock - -import pandas as pd -import pytest - -import bigframes -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -idisplay = pytest.importorskip("IPython.display") - - -def test_blob_create_from_uri_str( - bq_connection: str, session: bigframes.Session, images_uris -): - uri_series = bpd.Series(images_uris, session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - - pd_blob_df = blob_series.struct.explode().to_pandas() - expected_pd_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_from_glob_path( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - blob_df = session.from_glob_path( - images_gcs_path, connection=bq_connection, name="blob_col" - ) - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_read_gbq_object_table( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - obj_table = session._create_object_table(images_gcs_path, bq_connection) - - blob_df = session.read_gbq_object_table(obj_table, name="blob_col") - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame): - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - images_mm_df["blob_col"].blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert isinstance(arg, idisplay.Image) - - -def test_display_nulls( - monkeypatch, - bq_connection: str, - session: bigframes.Session, -): - uri_series = bpd.Series([None, None, None], dtype="string", session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - blob_series.blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert arg == "" diff --git a/packages/bigframes/tests/system/small/blob/test_urls.py b/packages/bigframes/tests/system/small/blob/test_urls.py deleted file mode 100644 index 0a2db23db20d..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_urls.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.pandas as bpd -import bigframes.bigquery as bbq - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -def test_blob_read_url(images_mm_df: bpd.DataFrame): - access_json = bbq.obj.get_access_url(images_mm_df["blob_col"], mode="r") - urls = bbq.json_value(access_json, "$.access_urls.read_url") - - assert urls.str.startswith("https://storage.googleapis.com/").all() - - -def test_blob_write_url(images_mm_df: bpd.DataFrame): - access_json = bbq.obj.get_access_url(images_mm_df["blob_col"], mode="rw") - urls = bbq.json_value(access_json, "$.access_urls.write_url") - - assert urls.str.startswith("https://storage.googleapis.com/").all() From a24ab2812644e30834c3009b2e5eb3bb8307f77a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:16:40 +0000 Subject: [PATCH 19/39] format files --- .../bigframes/bigquery/_operations/ml.py | 2 +- packages/bigframes/bigframes/pandas/io/api.py | 5 ---- .../bigframes/bigframes/session/__init__.py | 7 +++--- .../system/small/blob/test_properties.py | 24 ++++++++++++++----- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ml.py b/packages/bigframes/bigframes/bigquery/_operations/ml.py index 412b49b888f5..c6ef1f8bb7a7 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ml.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ml.py @@ -20,12 +20,12 @@ import google.cloud.bigquery import pandas as pd +import bigframes.core.col as col import bigframes.core.logging.log_adapter as log_adapter import bigframes.core.sql.ml import bigframes.dataframe as dataframe import bigframes.ml.base import bigframes.session -import bigframes.core.col as col from bigframes.bigquery._operations import utils diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index 3c7692b39037..e2737fdbbd1a 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -394,9 +394,6 @@ def read_gbq_model(model_name: str): read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) - - - @overload def read_gbq_query( # type: ignore[overload-overlap] query: str, @@ -623,8 +620,6 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) - - _default_location_lock = threading.Lock() diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 1bac3e8a08a5..565ea8503a4c 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -2314,9 +2314,12 @@ def _from_glob_path( ) -> dataframe.DataFrame: """Create a BigFrames DataFrame that contains a BigFrames ObjectRef column from a global wildcard path.""" import bigframes.bigquery as bq + connection = self._create_bq_connection(connection=connection) table = self._create_object_table(path, connection) - s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection) + s = bq.obj.make_ref( + self._loader.read_gbq_table(table)["uri"], authorizer=connection + ) return s.rename(name).to_frame() def _create_object_table(self, path: str, connection: str) -> str: @@ -2349,8 +2352,6 @@ def _create_object_table(self, path: str, connection: str) -> str: return table - - def _create_temp_view(self, sql: str) -> bigquery.TableReference: """Create a random id view from the sql string.""" return self._anon_dataset_manager.create_temp_view(sql) diff --git a/packages/bigframes/tests/system/small/blob/test_properties.py b/packages/bigframes/tests/system/small/blob/test_properties.py index ec18f05462d3..c3597b37116d 100644 --- a/packages/bigframes/tests/system/small/blob/test_properties.py +++ b/packages/bigframes/tests/system/small/blob/test_properties.py @@ -15,9 +15,9 @@ import pandas as pd import pytest +import bigframes.bigquery as bbq import bigframes.dtypes as dtypes import bigframes.pandas as bpd -import bigframes.bigquery as bbq pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) @@ -43,7 +43,9 @@ def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): def test_blob_version(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.version").to_pandas() + actual = bbq.json_value( + images_mm_df["blob_col"].struct.field("details"), "$.version" + ).to_pandas() expected = pd.Series(["1753907851152593", "1753907851111538"], name="version") pd.testing.assert_series_equal( @@ -76,7 +78,9 @@ def test_blob_metadata(images_mm_df: bpd.DataFrame): def test_blob_content_type(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.content_type").to_pandas() + actual = bbq.json_value( + images_mm_df["blob_col"].struct.field("details"), "$.content_type" + ).to_pandas() expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type") pd.testing.assert_series_equal( @@ -85,7 +89,9 @@ def test_blob_content_type(images_mm_df: bpd.DataFrame): def test_blob_md5_hash(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.md5_hash").to_pandas() + actual = bbq.json_value( + images_mm_df["blob_col"].struct.field("details"), "$.md5_hash" + ).to_pandas() expected = pd.Series( ["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"], name="md5_hash", @@ -97,7 +103,11 @@ def test_blob_md5_hash(images_mm_df: bpd.DataFrame): def test_blob_size(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.size").astype("Int64").to_pandas() + actual = ( + bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.size") + .astype("Int64") + .to_pandas() + ) expected = pd.Series([338390, 43333], name="size") pd.testing.assert_series_equal( @@ -106,7 +116,9 @@ def test_blob_size(images_mm_df: bpd.DataFrame): def test_blob_updated(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.updated").to_pandas() + actual = bbq.json_value( + images_mm_df["blob_col"].struct.field("details"), "$.updated" + ).to_pandas() expected = pd.Series( [ pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), From a72a4a53e2e7c008a631f5cc2c98c273aa068aea Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:18:19 +0000 Subject: [PATCH 20/39] restore notebook from main --- ...with-bigframes-over-national-jukebox.ipynb | 662 ++++++++---------- 1 file changed, 304 insertions(+), 358 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index 3fd66abcbb44..4faff4b8e768 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -1,8 +1,23 @@ { "cells": [ { - "id": "c62e292f", "cell_type": "markdown", + "metadata": { + "@deathbeds/jupyterlab-fonts": { + "styles": { + "": { + "body[data-jp-deck-mode='presenting'] &": { + "zoom": "194%" + } + } + } + }, + "editable": true, + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] + }, "source": [ "# Creating a searchable index of the National Jukebox\n", "\n", @@ -20,42 +35,42 @@ "To follow along, you'll need a Google Cloud project\n", "\n", "* Go to https://cloud.google.com/free to start a free trial." - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { "body[data-jp-deck-mode='presenting'] &": { - "zoom": "194%" + "z-index": "0", + "zoom": "216%" } } } }, - "editable": true, "slideshow": { - "slide_type": "subslide" - }, - "tags": [] + "slide_type": "slide" + } }, - "execution_count": null - }, - { - "id": "7dc312a4", - "cell_type": "markdown", "source": [ "The National Jukebox is a project of the USA Library of Congress to provide access to thousands of acoustic sound recordings from the very earliest days of the commercial record industry.\n", "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", "\"recording" - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", - "zoom": "216%" + "zoom": "181%" } } } @@ -64,11 +79,6 @@ "slide_type": "slide" } }, - "execution_count": null - }, - { - "id": "07dcae4b", - "cell_type": "markdown", "source": [ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", @@ -86,32 +96,10 @@ "3. BigQuery DataFrames to use Python instead of SQL.\n", "\n", " https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction" - ], - "metadata": { - "@deathbeds/jupyterlab-fonts": { - "styles": { - "": { - "body[data-jp-deck-mode='presenting'] &": { - "z-index": "0", - "zoom": "181%" - } - } - } - }, - "slideshow": { - "slide_type": "slide" - } - }, - "execution_count": null + ] }, { - "id": "8dd2ddab", "cell_type": "markdown", - "source": [ - "## Getting started with BigQuery DataFrames (bigframes)\n", - "\n", - "Install the bigframes package." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -126,14 +114,15 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Getting started with BigQuery DataFrames (bigframes)\n", + "\n", + "Install the bigframes package." + ] }, { - "id": "96cda443", "cell_type": "code", - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -153,17 +142,13 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ] }, { - "id": "acf12472", "cell_type": "markdown", - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -176,17 +161,15 @@ } } }, - "execution_count": null + "source": [ + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," + ] }, { - "id": "fd321077", "cell_type": "code", - "source": [ - "from kaggle_secrets import UserSecretsClient\n", - "user_secrets = UserSecretsClient()\n", - "user_credential = user_secrets.get_gcloud_credential()\n", - "user_secrets.set_tensorflow_credential(user_credential)" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:53:08.494636Z", @@ -197,21 +180,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from kaggle_secrets import UserSecretsClient\n", + "user_secrets = UserSecretsClient()\n", + "user_credential = user_secrets.get_gcloud_credential()\n", + "user_secrets.set_tensorflow_credential(user_credential)" + ] }, { - "id": "4d837a34", "cell_type": "code", - "source": [ - "import bigframes._config\n", - "import bigframes.pandas as bpd\n", - "\n", - "bpd.options.bigquery.location = \"US\"\n", - "\n", - "# Set to your GCP project ID.\n", - "bpd.options.bigquery.project = \"swast-scratch\"" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -231,17 +210,19 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes._config\n", + "import bigframes.pandas as bpd\n", + "\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "# Set to your GCP project ID.\n", + "bpd.options.bigquery.project = \"swast-scratch\"" + ] }, { - "id": "008f0a87", "cell_type": "markdown", - "source": [ - "## Reading data\n", - "\n", - "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -256,19 +237,15 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Reading data\n", + "\n", + "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." + ] }, { - "id": "9a4b35ab", "cell_type": "code", - "source": [ - "df = bpd.read_json(\n", - " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", - " engine=\"bigquery\",\n", - " orient=\"records\",\n", - " lines=True,\n", - ")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -288,16 +265,19 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df = bpd.read_json(\n", + " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", + " engine=\"bigquery\",\n", + " orient=\"records\",\n", + " lines=True,\n", + ")" + ] }, { - "id": "e00dcb01", "cell_type": "code", - "source": [ - "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", - "df.peek()" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -320,15 +300,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", + "df.peek()" + ] }, { - "id": "335511be", "cell_type": "code", - "source": [ - "df.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -348,18 +328,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df.shape" + ] }, { - "id": "595126a1", "cell_type": "code", - "source": [ - "# For the purposes of a demo, select only a subset of rows.\n", - "df = df.sample(n=250)\n", - "df.cache()\n", - "df.shape" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:55:55.448664Z", @@ -370,32 +346,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# For the purposes of a demo, select only a subset of rows.\n", + "df = df.sample(n=250)\n", + "df.cache()\n", + "df.shape" + ] }, { - "id": "cbd59dd9", "cell_type": "code", - "source": [ - "# As a side effect of how I extracted the song information from the HTML DOM,\n", - "# we ended up with lists in places where we only expect one item.\n", - "#\n", - "# We can \"explode\" to flatten these lists.\n", - "flattened = df.explode([\n", - " \"Recording Repository\",\n", - " \"Recording Label\",\n", - " \"Recording Take Number\",\n", - " \"Recording Date\",\n", - " \"Recording Matrix Number\",\n", - " \"Recording Catalog Number\",\n", - " \"Media Size\",\n", - " \"Recording Location\",\n", - " \"Summary\",\n", - " \"Rights Advisory\",\n", - " \"Title\",\n", - "])\n", - "flattened.peek()" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -418,15 +379,31 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# As a side effect of how I extracted the song information from the HTML DOM,\n", + "# we ended up with lists in places where we only expect one item.\n", + "#\n", + "# We can \"explode\" to flatten these lists.\n", + "flattened = df.explode([\n", + " \"Recording Repository\",\n", + " \"Recording Label\",\n", + " \"Recording Take Number\",\n", + " \"Recording Date\",\n", + " \"Recording Matrix Number\",\n", + " \"Recording Catalog Number\",\n", + " \"Media Size\",\n", + " \"Recording Location\",\n", + " \"Summary\",\n", + " \"Rights Advisory\",\n", + " \"Title\",\n", + "])\n", + "flattened.peek()" + ] }, { - "id": "84548649", "cell_type": "code", - "source": [ - "flattened.shape" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:56:06.546531Z", @@ -437,15 +414,13 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened.shape" + ] }, { - "id": "8be3127f", "cell_type": "markdown", - "source": [ - "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -462,20 +437,13 @@ }, "tags": [] }, - "execution_count": null + "source": [ + "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." + ] }, { - "id": "31277e21", "cell_type": "code", - "source": [ - "flattened = flattened.assign(**{\n", - " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", - " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", - "})\n", - "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", - "# Note: str.to_blob is deprecated.\n", - "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -500,15 +468,18 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened = flattened.assign(**{\n", + " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", + " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", + "})\n", + "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", + "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" + ] }, { - "id": "d27756f5", "cell_type": "markdown", - "source": [ - "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -525,20 +496,13 @@ }, "tags": [] }, - "execution_count": null + "source": [ + "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." + ] }, { - "id": "d1f7ad46", "cell_type": "code", - "source": [ - "# Note: .blob.audio_transcribe is removed. This cell will fail.\n", - "# Use bigframes.bigquery.ai.generate instead.\n", - "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", - " model_name=\"gemini-2.0-flash-001\",\n", - " verbose=True,\n", - ")\n", - "flattened[\"Transcription\"]" - ], + "execution_count": null, "metadata": { "editable": true, "execution": { @@ -554,15 +518,17 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", + " model_name=\"gemini-2.5-flash\",\n", + " verbose=True,\n", + ")\n", + "flattened[\"Transcription\"]" + ] }, { - "id": "1575c468", "cell_type": "markdown", - "source": [ - "Sometimes the model has transient errors. Check the status column to see if there are errors." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -577,16 +543,13 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "Sometimes the model has transient errors. Check the status column to see if there are errors." + ] }, { - "id": "e53c7a0b", "cell_type": "code", - "source": [ - "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", - "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", - "flattened.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -611,16 +574,16 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", + "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", + "flattened.shape" + ] }, { - "id": "3629f4af", "cell_type": "code", - "source": [ - "# Show transcribed lyrics.\n", - "flattened[\"Transcription\"].struct.field(\"content\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -640,19 +603,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Show transcribed lyrics.\n", + "flattened[\"Transcription\"].struct.field(\"content\")" + ] }, { - "id": "09ef6c3d", "cell_type": "code", - "source": [ - "# Find all instrumentatal songs\n", - "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", - "print(instrumental.shape)\n", - "song = instrumental.peek(1)\n", - "song" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -675,22 +634,18 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Find all instrumentatal songs\n", + "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", + "print(instrumental.shape)\n", + "song = instrumental.peek(1)\n", + "song" + ] }, { - "id": "cf15986a", "cell_type": "code", - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -715,19 +670,20 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] - }, - { - "id": "778d0ac3", - "cell_type": "markdown", + "outputs": [], "source": [ - "## Creating a searchable index\n", + "import gcsfs\n", + "import IPython.display\n", "\n", - "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", "\n", - "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" - ], + "IPython.display.Audio(song_bytes)" + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -742,16 +698,17 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Creating a searchable index\n", + "\n", + "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "\n", + "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" + ] }, { - "id": "de7e4e11", "cell_type": "code", - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -771,21 +728,16 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" + ] }, { - "id": "4acfb495", "cell_type": "code", - "source": [ - "df_to_index = (\n", - " flattened\n", - " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", - " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", - ")\n", - "embedding = text_model.predict(df_to_index)\n", - "embedding.peek(1)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -805,18 +757,20 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df_to_index = (\n", + " flattened\n", + " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", + " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", + ")\n", + "embedding = text_model.predict(df_to_index)\n", + "embedding.peek(1)" + ] }, { - "id": "a49d1dde", "cell_type": "code", - "source": [ - "# Check the status column to look for errors.\n", - "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", - "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", - "embedding.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -841,15 +795,16 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Check the status column to look for errors.\n", + "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", + "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", + "embedding.shape" + ] }, { - "id": "15a5bfd3", "cell_type": "markdown", - "source": [ - "We're now ready to save this to a table." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -861,15 +816,13 @@ } } }, - "execution_count": null + "source": [ + "We're now ready to save this to a table." + ] }, { - "id": "8b49384c", "cell_type": "code", - "source": [ - "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", - "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -889,20 +842,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", + "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" + ] }, { - "id": "810c77d5", "cell_type": "markdown", - "source": [ - "## Searching the database\n", - "\n", - "To search by semantics, we:\n", - "\n", - "1. Turn our search string into an embedding using the same model as our index.\n", - "2. Find the closest matches to the search string." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -917,17 +864,18 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Searching the database\n", + "\n", + "To search by semantics, we:\n", + "\n", + "1. Turn our search string into an embedding using the same model as our index.\n", + "2. Find the closest matches to the search string." + ] }, { - "id": "fb63ad94", "cell_type": "code", - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "df_written = bpd.read_gbq(embedding_table_id)\n", - "df_written.peek(1)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -950,22 +898,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "df_written = bpd.read_gbq(embedding_table_id)\n", + "df_written.peek(1)" + ] }, { - "id": "f19c88d3", "cell_type": "code", - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "search_string = \"walking home\"\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", - "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", - "search_embedding = text_model.predict(search_df)\n", - "search_embedding" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -985,24 +928,21 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "search_string = \"walking home\"\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", + "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", + "search_embedding = text_model.predict(search_df)\n", + "search_embedding" + ] }, { - "id": "06f0312e", "cell_type": "code", - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "vector_search_results = bbq.vector_search(\n", - " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", - " column_to_search=\"ml_generate_embedding_result\",\n", - " query=search_embedding,\n", - " distance_type=\"COSINE\",\n", - " query_column_to_search=\"ml_generate_embedding_result\",\n", - " top_k=5,\n", - ")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1027,15 +967,23 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "vector_search_results = bbq.vector_search(\n", + " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", + " column_to_search=\"ml_generate_embedding_result\",\n", + " query=search_embedding,\n", + " distance_type=\"COSINE\",\n", + " query_column_to_search=\"ml_generate_embedding_result\",\n", + " top_k=5,\n", + ")" + ] }, { - "id": "fae3fcae", "cell_type": "code", - "source": [ - "vector_search_results.dtypes" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T16:05:50.566930Z", @@ -1046,16 +994,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "vector_search_results.dtypes" + ] }, { - "id": "38423dde", "cell_type": "code", - "source": [ - "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", - "results" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1078,15 +1024,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", + "results" + ] }, { - "id": "37a1dfbd", "cell_type": "code", - "source": [ - "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1106,22 +1052,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" + ] }, { - "id": "a4748e0f", "cell_type": "code", - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ], + "execution_count": null, "metadata": { "editable": true, "execution": { @@ -1138,18 +1076,26 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ] }, { - "id": "ff22e7eb", "cell_type": "code", - "source": [], + "execution_count": null, "metadata": { "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [] } ], "metadata": { @@ -1186,6 +1132,6 @@ "version": "3.11.13" } }, - "nbformat_minor": 4, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 4 } From 983ef098fe9e013c398d8bcc16e9605ab7ef8c6a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:30:08 +0000 Subject: [PATCH 21/39] remove to_blob usage --- ...with-bigframes-over-national-jukebox.ipynb | 689 +++++++++--------- 1 file changed, 352 insertions(+), 337 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index 4faff4b8e768..fe68d0107bfd 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -1,23 +1,8 @@ { "cells": [ { + "id": "f4ece66a", "cell_type": "markdown", - "metadata": { - "@deathbeds/jupyterlab-fonts": { - "styles": { - "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "194%" - } - } - } - }, - "editable": true, - "slideshow": { - "slide_type": "subslide" - }, - "tags": [] - }, "source": [ "# Creating a searchable index of the National Jukebox\n", "\n", @@ -35,42 +20,42 @@ "To follow along, you'll need a Google Cloud project\n", "\n", "* Go to https://cloud.google.com/free to start a free trial." - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "z-index": "0", - "zoom": "216%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "194%" } } } }, + "editable": true, "slideshow": { - "slide_type": "slide" - } + "slide_type": "subslide" + }, + "tags": [] }, + "execution_count": null + }, + { + "id": "bc01a1d3", + "cell_type": "markdown", "source": [ "The National Jukebox is a project of the USA Library of Congress to provide access to thousands of acoustic sound recordings from the very earliest days of the commercial record industry.\n", "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\"recording" - ] - }, - { - "cell_type": "markdown", + "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "0", - "zoom": "181%" + "zoom": "216%" } } } @@ -79,11 +64,16 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "4fc7c468", + "cell_type": "markdown", "source": [ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\"audio\n", + "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -96,16 +86,14 @@ "3. BigQuery DataFrames to use Python instead of SQL.\n", "\n", " https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "275%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "z-index": "0", + "zoom": "181%" } } } @@ -114,20 +102,43 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "90f2e543", + "cell_type": "markdown", "source": [ "## Getting started with BigQuery DataFrames (bigframes)\n", "\n", "Install the bigframes package." - ] + ], + "metadata": { + "@deathbeds/jupyterlab-fonts": { + "styles": { + "": { + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "275%" + } + } + } + }, + "slideshow": { + "slide_type": "slide" + } + }, + "execution_count": null }, { + "id": "56694cb4", "cell_type": "code", - "execution_count": null, + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "214%" } } @@ -142,18 +153,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ] + "execution_count": null }, { + "id": "fa84ad03", "cell_type": "markdown", + "source": [ + "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "4", "zoom": "236%" } @@ -161,15 +175,17 @@ } } }, - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ] + "execution_count": null }, { + "id": "1fbd4f9e", "cell_type": "code", - "execution_count": null, + "source": [ + "from kaggle_secrets import UserSecretsClient\n", + "user_secrets = UserSecretsClient()\n", + "user_credential = user_secrets.get_gcloud_credential()\n", + "user_secrets.set_tensorflow_credential(user_credential)" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:53:08.494636Z", @@ -180,22 +196,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from kaggle_secrets import UserSecretsClient\n", - "user_secrets = UserSecretsClient()\n", - "user_credential = user_secrets.get_gcloud_credential()\n", - "user_secrets.set_tensorflow_credential(user_credential)" - ] + "execution_count": null }, { + "id": "0b0b1cd8", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes._config\n", + "import bigframes.pandas as bpd\n", + "\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "# Set to your GCP project ID.\n", + "bpd.options.bigquery.project = \"swast-scratch\"" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "193%" } } @@ -210,24 +229,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes._config\n", - "import bigframes.pandas as bpd\n", - "\n", - "bpd.options.bigquery.location = \"US\"\n", - "\n", - "# Set to your GCP project ID.\n", - "bpd.options.bigquery.project = \"swast-scratch\"" - ] + "execution_count": null }, { + "id": "32e58a7f", "cell_type": "markdown", + "source": [ + "## Reading data\n", + "\n", + "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "207%" } } @@ -237,20 +253,24 @@ "slide_type": "slide" } }, - "source": [ - "## Reading data\n", - "\n", - "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." - ] + "execution_count": null }, { + "id": "e52aa9e8", "cell_type": "code", - "execution_count": null, + "source": [ + "df = bpd.read_json(\n", + " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", + " engine=\"bigquery\",\n", + " orient=\"records\",\n", + " lines=True,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "225%" } } @@ -265,24 +285,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df = bpd.read_json(\n", - " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", - " engine=\"bigquery\",\n", - " orient=\"records\",\n", - " lines=True,\n", - ")" - ] + "execution_count": null }, { + "id": "0c1fca97", "cell_type": "code", - "execution_count": null, + "source": [ + "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", + "df.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "122%" } } @@ -300,20 +316,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", - "df.peek()" - ] + "execution_count": null }, { + "id": "4a13e789", "cell_type": "code", - "execution_count": null, + "source": [ + "df.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "134%" } } @@ -328,14 +343,17 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df.shape" - ] + "execution_count": null }, { + "id": "26b8baba", "cell_type": "code", - "execution_count": null, + "source": [ + "# For the purposes of a demo, select only a subset of rows.\n", + "df = df.sample(n=250)\n", + "df.cache()\n", + "df.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:55:55.448664Z", @@ -346,22 +364,36 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# For the purposes of a demo, select only a subset of rows.\n", - "df = df.sample(n=250)\n", - "df.cache()\n", - "df.shape" - ] + "execution_count": null }, { + "id": "af84cb21", "cell_type": "code", - "execution_count": null, + "source": [ + "# As a side effect of how I extracted the song information from the HTML DOM,\n", + "# we ended up with lists in places where we only expect one item.\n", + "#\n", + "# We can \"explode\" to flatten these lists.\n", + "flattened = df.explode([\n", + " \"Recording Repository\",\n", + " \"Recording Label\",\n", + " \"Recording Take Number\",\n", + " \"Recording Date\",\n", + " \"Recording Matrix Number\",\n", + " \"Recording Catalog Number\",\n", + " \"Media Size\",\n", + " \"Recording Location\",\n", + " \"Summary\",\n", + " \"Rights Advisory\",\n", + " \"Title\",\n", + "])\n", + "flattened.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "161%" } } @@ -379,31 +411,14 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# As a side effect of how I extracted the song information from the HTML DOM,\n", - "# we ended up with lists in places where we only expect one item.\n", - "#\n", - "# We can \"explode\" to flatten these lists.\n", - "flattened = df.explode([\n", - " \"Recording Repository\",\n", - " \"Recording Label\",\n", - " \"Recording Take Number\",\n", - " \"Recording Date\",\n", - " \"Recording Matrix Number\",\n", - " \"Recording Catalog Number\",\n", - " \"Media Size\",\n", - " \"Recording Location\",\n", - " \"Summary\",\n", - " \"Rights Advisory\",\n", - " \"Title\",\n", - "])\n", - "flattened.peek()" - ] + "execution_count": null }, { + "id": "085deffd", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:56:06.546531Z", @@ -414,18 +429,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "flattened.shape" - ] + "execution_count": null }, { + "id": "f8e653ee", "cell_type": "markdown", + "source": [ + "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "216%" } } @@ -437,18 +453,19 @@ }, "tags": [] }, - "source": [ - "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." - ] + "execution_count": null }, { + "id": "dbd1a844", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened = flattened.assign(**{\\n \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\\n \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\\n})\\nflattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "211%" } } @@ -468,23 +485,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened = flattened.assign(**{\n", - " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", - " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", - "})\n", - "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", - "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" - ] + "execution_count": null }, { + "id": "fae13ec5", "cell_type": "markdown", + "source": [ + "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "317%" } } @@ -496,13 +509,14 @@ }, "tags": [] }, - "source": [ - "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." - ] + "execution_count": null }, { + "id": "f08f92b1", "cell_type": "code", - "execution_count": null, + "source": [ + "# Code calling .blob.audio_transcribe() was removed to satisfy the goal of removing public Blob APIs." + ], "metadata": { "editable": true, "execution": { @@ -518,22 +532,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", - " model_name=\"gemini-2.5-flash\",\n", - " verbose=True,\n", - ")\n", - "flattened[\"Transcription\"]" - ] + "execution_count": null }, { + "id": "30969ae1", "cell_type": "markdown", + "source": [ + "Sometimes the model has transient errors. Check the status column to see if there are errors." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "229%" } } @@ -543,18 +554,21 @@ "slide_type": "slide" } }, - "source": [ - "Sometimes the model has transient errors. Check the status column to see if there are errors." - ] + "execution_count": null }, { + "id": "7d0dbc38", "cell_type": "code", - "execution_count": null, + "source": [ + "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", + "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", + "flattened.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "177%" } } @@ -574,21 +588,20 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", - "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", - "flattened.shape" - ] + "execution_count": null }, { + "id": "6cddf53b", "cell_type": "code", - "execution_count": null, + "source": [ + "# Show transcribed lyrics.\n", + "flattened[\"Transcription\"].struct.field(\"content\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "141%" } } @@ -603,20 +616,23 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Show transcribed lyrics.\n", - "flattened[\"Transcription\"].struct.field(\"content\")" - ] + "execution_count": null }, { + "id": "ba0386cc", "cell_type": "code", - "execution_count": null, + "source": [ + "# Find all instrumentatal songs\n", + "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", + "print(instrumental.shape)\n", + "song = instrumental.peek(1)\n", + "song" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -634,23 +650,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Find all instrumentatal songs\n", - "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", - "print(instrumental.shape)\n", - "song = instrumental.peek(1)\n", - "song" - ] + "execution_count": null }, { + "id": "61a883b2", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -670,25 +689,23 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "e8a25c46", "cell_type": "markdown", + "source": [ + "## Creating a searchable index\n", + "\n", + "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "\n", + "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "181%" } } @@ -698,22 +715,21 @@ "slide_type": "slide" } }, - "source": [ - "## Creating a searchable index\n", - "\n", - "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", - "\n", - "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" - ] + "execution_count": null }, { + "id": "ead0fa8c", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "163%" } } @@ -728,21 +744,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" - ] + "execution_count": null }, { + "id": "5ed7776d", "cell_type": "code", - "execution_count": null, + "source": [ + "df_to_index = (\n", + " flattened\n", + " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", + " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", + ")\n", + "embedding = text_model.predict(df_to_index)\n", + "embedding.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "125%" } } @@ -757,25 +777,22 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df_to_index = (\n", - " flattened\n", - " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", - " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", - ")\n", - "embedding = text_model.predict(df_to_index)\n", - "embedding.peek(1)" - ] + "execution_count": null }, { + "id": "c96e9832", "cell_type": "code", - "execution_count": null, + "source": [ + "# Check the status column to look for errors.\n", + "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", + "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", + "embedding.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "178%" } } @@ -795,39 +812,39 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "# Check the status column to look for errors.\n", - "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", - "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", - "embedding.shape" - ] + "execution_count": null }, { + "id": "0e2a5d7b", "cell_type": "markdown", + "source": [ + "We're now ready to save this to a table." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "224%" } } } } }, - "source": [ - "We're now ready to save this to a table." - ] + "execution_count": null }, { + "id": "51819a0c", "cell_type": "code", - "execution_count": null, + "source": [ + "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", + "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "172%" } } @@ -842,19 +859,24 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", - "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" - ] + "execution_count": null }, { + "id": "5e16fb14", "cell_type": "markdown", + "source": [ + "## Searching the database\n", + "\n", + "To search by semantics, we:\n", + "\n", + "1. Turn our search string into an embedding using the same model as our index.\n", + "2. Find the closest matches to the search string." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "183%" } } @@ -864,23 +886,22 @@ "slide_type": "slide" } }, - "source": [ - "## Searching the database\n", - "\n", - "To search by semantics, we:\n", - "\n", - "1. Turn our search string into an embedding using the same model as our index.\n", - "2. Find the closest matches to the search string." - ] + "execution_count": null }, { + "id": "1bad3317", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "df_written = bpd.read_gbq(embedding_table_id)\n", + "df_written.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "92%" } } @@ -898,22 +919,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "df_written = bpd.read_gbq(embedding_table_id)\n", - "df_written.peek(1)" - ] + "execution_count": null }, { + "id": "8aaaef1f", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "search_string = \"walking home\"\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", + "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", + "search_embedding = text_model.predict(search_df)\n", + "search_embedding" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "127%" } } @@ -928,26 +953,28 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "search_string = \"walking home\"\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", - "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", - "search_embedding = text_model.predict(search_df)\n", - "search_embedding" - ] + "execution_count": null }, { + "id": "908a2340", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "vector_search_results = bbq.vector_search(\n", + " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", + " column_to_search=\"ml_generate_embedding_result\",\n", + " query=search_embedding,\n", + " distance_type=\"COSINE\",\n", + " query_column_to_search=\"ml_generate_embedding_result\",\n", + " top_k=5,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "175%" } } @@ -967,23 +994,14 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "vector_search_results = bbq.vector_search(\n", - " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", - " column_to_search=\"ml_generate_embedding_result\",\n", - " query=search_embedding,\n", - " distance_type=\"COSINE\",\n", - " query_column_to_search=\"ml_generate_embedding_result\",\n", - " top_k=5,\n", - ")" - ] + "execution_count": null }, { + "id": "f84ebe70", "cell_type": "code", - "execution_count": null, + "source": [ + "vector_search_results.dtypes" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T16:05:50.566930Z", @@ -994,19 +1012,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "vector_search_results.dtypes" - ] + "execution_count": null }, { + "id": "eeff1c72", "cell_type": "code", - "execution_count": null, + "source": [ + "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", + "results" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "158%" } } @@ -1024,20 +1043,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", - "results" - ] + "execution_count": null }, { + "id": "7ec53675", "cell_type": "code", - "execution_count": null, + "source": [ + "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "138%" } } @@ -1052,14 +1070,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" - ] + "execution_count": null }, { + "id": "a96552fb", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "editable": true, "execution": { @@ -1076,26 +1101,16 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "72af7c7f", "cell_type": "code", - "execution_count": null, + "source": [], "metadata": { "trusted": true }, - "outputs": [], - "source": [] + "execution_count": null } ], "metadata": { @@ -1132,6 +1147,6 @@ "version": "3.11.13" } }, - "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 4, + "nbformat": 4 } From 25a9fd5b147639bc50db7668fb84374e449e6dff Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:30:37 +0000 Subject: [PATCH 22/39] update notebooks and apis --- .../bigframes/bigframes/pandas/__init__.py | 2 + packages/bigframes/bigframes/pandas/io/api.py | 13 + .../generative_ai/ai_movie_poster.ipynb | 1368 +++++++++-------- .../multimodal/multimodal_dataframe.ipynb | 1259 +++++++-------- 4 files changed, 1343 insertions(+), 1299 deletions(-) diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index 11938a887785..34ec3037e92f 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -98,6 +98,7 @@ from bigframes.pandas import api from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( + _from_glob_path, _read_gbq_colab, read_arrow, read_avro, @@ -436,6 +437,7 @@ def reset_session(): pass _functions = [ + _from_glob_path, clean_up_by_session_id, concat, crosstab, diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index e2737fdbbd1a..29a50381acc9 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -620,6 +620,19 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) +def _from_glob_path( + path: str, *, connection: Optional[str] = None, name: Optional[str] = None +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session._from_glob_path, + path=path, + connection=connection, + name=name, + ) + + +_from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session._from_glob_path) + _default_location_lock = threading.Lock() diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index b25e2b556e65..8a19830358de 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -1,732 +1,752 @@ { - "cells": [ + "cells": [ + { + "id": "7add2e44", + "cell_type": "code", + "source": [ + "# Copyright 2026 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "metadata": { + "id": "XZpKUoHjXw3_" + }, + "execution_count": 1 + }, + { + "id": "ee509844", + "cell_type": "markdown", + "source": [ + "# Analyzing movie posters with BigQuery Dataframe AI functions" + ], + "metadata": { + "id": "SEKzWP6jW9Oj" + }, + "execution_count": null + }, + { + "id": "81b8de8d", + "cell_type": "markdown", + "source": [ + "\u003ctable align=\"left\"\u003e\n", + "\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " View on GitHub\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " Open in BQ Studio\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e" + ], + "metadata": {}, + "execution_count": null + }, + { + "id": "256b6c02", + "cell_type": "markdown", + "source": [ + "BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old\n", + "movie posters. These posters are images stored in a public Google Cloud Storage bucket: `gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters`" + ], + "metadata": { + "id": "c9CCKXG5XTb-" + }, + "execution_count": null + }, + { + "id": "3f71d3cb", + "cell_type": "markdown", + "source": [ + "## Set up" + ], + "metadata": { + "id": "CUJDa_7MPbL9" + }, + "execution_count": null + }, + { + "id": "547145f5", + "cell_type": "markdown", + "source": [ + "Before you begin, you need to\n", + "\n", + "* Set up your permissions for generative AI functions with [these instructions](https://docs.cloud.google.com/bigquery/docs/permissions-for-ai-functions)\n", + "* Set up your Cloud Resource connection by following [these instructions](https://docs.cloud.google.com/bigquery/docs/create-cloud-resource-connection)\n", + "\n", + "Once you have the permissions set up, import the `bigframes.pandas` package, and\n", + "set your cloud project ID." + ], + "metadata": { + "id": "D3iYtBSkYpCK" + }, + "execution_count": null + }, + { + "id": "d9cd6da8", + "cell_type": "code", + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "MY_RPOJECT_ID = \"bigframes-dev\" # @param {type:\"string\"}\n", + "\n", + "bpd.options.bigquery.project = MY_RPOJECT_ID" + ], + "metadata": { + "id": "6nqoRHYbPAx3" + }, + "execution_count": null + }, + { + "id": "015a63c1", + "cell_type": "markdown", + "source": [ + "## Load data" + ], + "metadata": { + "id": "2XHcNHtvPhNW" + }, + "execution_count": null + }, + { + "id": "254561e0", + "cell_type": "markdown", + "source": [ + "First, you load the data from the GCS bucket to a BigQuery Dataframe with the `from_glob_path` method:" + ], + "metadata": { + "id": "eS-9A7DijfoQ" + }, + "execution_count": null + }, + { + "id": "47acbbfe", + "cell_type": "code", + "source": [ + "# Replace with your own connection name.\\nMY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\\n\\nimport bigframes.pandas as bpd\\nsession = bpd.get_global_session()\\n\\nmovies = session._from_glob_path(\\n \"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\",\\n connection = MY_CONNECTION,\\n name='poster')\\nmovies.head(1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "ZNPzFjCyPap0", + "outputId": "346d20b2-d615-4094-d24e-2d40e5c90ee2" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "XZpKUoHjXw3_" - }, - "outputs": [], - "source": [ - "# Copyright 2026 Google LLC\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", + " _global_session = bigframes.session.connect(\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "SEKzWP6jW9Oj" - }, - "source": [ - "# Analyzing movie posters with BigQuery Dataframe AI functions" + "data": { + "text/html": [ + "\n", + " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", - " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", - " Open in BQ Studio\n", - " \n", - "
" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "c9CCKXG5XTb-" - }, - "source": [ - "BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old\n", - "movie posters. These posters are images stored in a public Google Cloud Storage bucket: `gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters`" + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "CUJDa_7MPbL9" - }, - "source": [ - "## Set up" + "data": { + "text/html": [ + "\n", + " Query processed 1.2 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "D3iYtBSkYpCK" - }, - "source": [ - "Before you begin, you need to\n", - "\n", - "* Set up your permissions for generative AI functions with [these instructions](https://docs.cloud.google.com/bigquery/docs/permissions-for-ai-functions)\n", - "* Set up your Cloud Resource connection by following [these instructions](https://docs.cloud.google.com/bigquery/docs/create-cloud-resource-connection)\n", - "\n", - "Once you have the permissions set up, import the `bigframes.pandas` package, and\n", - "set your cloud project ID." + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200041Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=9f955e89088240b34a5cbfba751fffacc5dfd7a2df468dcccfae06c939358c702ffbeb940403a69ad36e3fdf321abee60cf2b9795c9c1744bc0b164d6c2eca99666a0853e7afcf7670a07ff115bfe534791c9ab4267cb383e3a46ede9301aeeb8534a42a1d4c8f790f3a60eab06aa72a8fe76ee6cbb88de8e42a0809d8322a0ad8aecd1c64a55b1cc8716acf4f0dc2550a2059e63d98d49707fe27180ada0a277ea9b1827fc261657bcee9ec5cc7117df704f135d983325abb97dc77ee7a270c466e689921fce8ecd23824b515f2811c3c13ee382c5bc3bd34b7dd95a845705a8f654315b2128799efd0509dee5f6db1eb1b773438d3bfc8112d76cbe892e376\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 1 columns in total]" + ], + "text/plain": [ + " poster\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0...\n", + "\n", + "[1 rows x 1 columns]" ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "f1096d2f", + "cell_type": "markdown", + "source": [ + "## Extract titles from posters" + ], + "metadata": { + "id": "EfkdDH08QnYw" + }, + "execution_count": null + }, + { + "id": "bb30d47c", + "cell_type": "code", + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "movies['title'] = bbq.ai.generate(\n", + " (\"What is the movie title for this poster? Name only\", movies['poster']),\n", + " endpoint='gemini-2.5-pro'\n", + ").struct.field(\"result\")\n", + "movies.head(1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, + "id": "6CoZZ5tSQm1r", + "outputId": "1b3915ce-eb83-4be9-b1c1-d9a326dc9408" + }, + "execution_count": 4, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6nqoRHYbPAx3" - }, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "MY_RPOJECT_ID = \"bigframes-dev\" # @param {type:\"string\"}\n", - "\n", - "bpd.options.bigquery.project = MY_RPOJECT_ID" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "2XHcNHtvPhNW" - }, - "source": [ - "## Load data" + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "eS-9A7DijfoQ" - }, - "source": [ - "First, you load the data from the GCS bucket to a BigQuery Dataframe with the `from_glob_path` method:" + "data": { + "text/html": [ + "\n", + " Query processed 1.2 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "ZNPzFjCyPap0", - "outputId": "346d20b2-d615-4094-d24e-2d40e5c90ee2" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", - " _global_session = bigframes.session.connect(\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.2 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
poster
0
\n", - "

1 rows × 1 columns

\n", - "
[1 rows x 1 columns in total]" - ], - "text/plain": [ - " poster\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0...\n", - "\n", - "[1 rows x 1 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200057Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=29c8cf20d3f56ab1939ec00dbc1afd26e888b6475808258e34bc60a65e207b877c39853678b0cd1c9918d35e312e151725dbefc4ed6c519e4ec1f2c23c2e307f87442d09c5c8f0bbd49af92eb05e18ff35cd44f2f2954b79a33cf706c7ae1662e23e3220224d6f58b775cb1875213b5050f910cb41a4a8fb312f308b0566448ddf7ef15e22ec2a5261af2570f89e0f6067ac4cbf5874eaf522a6e4d8cf6e0313be3079b172bdc19c2d6901f53bbacf5bee3f2913c7f9f657cd1aed25d786f66a84f96e4dbe36e7f01d8b67887c9ac93edf866495fdf13c6b95152cdfa6b699fd14aeb477ec4a14fcd9f37eaf88ad02eb40a952635f97e7639be764b0007e011e\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], - "source": [ - "# Replace with your own connection name.\n", - "MY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\n", - "\n", - "movies = bpd.from_glob_path(\n", - " \"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\",\n", - " connection = MY_CONNECTION,\n", - " name='poster')\n", - "movies.head(1)" + "text/plain": [ + " poster title\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag\n", + "\n", + "[1 rows x 2 columns]" ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "eb9eb261", + "cell_type": "markdown", + "source": [ + "Notice that `ai.generate()` has a `struct` return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, `\"result\"` will be the default name. You can access LLM response content with the struct accessor (e.g. `my_response.struct.filed(\"result\")`);." + ], + "metadata": { + "id": "cFQHQ9S2lr6t" + }, + "execution_count": null + }, + { + "id": "ea29eb21", + "cell_type": "markdown", + "source": [ + "## Get movie release year\n", + "\n", + "In the example below, you will use `ai.generate_int()` to find the release year for each movie poster:" + ], + "metadata": { + "id": "R8kkUhgoS5Xz" + }, + "execution_count": null + }, + { + "id": "bf426247", + "cell_type": "code", + "source": [ + "movies['year'] = bbq.ai.generate_int(\n", + " (\"What is the release year for this movie?\", movies['title']),\n", + " endpoint='gemini-2.5-pro'\n", + ").struct.field(\"result\")\n", + "\n", + "movies.head(1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 976 }, + "id": "cKZdHq0XS1iW", + "outputId": "72cbad57-4518-4e1e-97bb-333d424dba73" + }, + "execution_count": 5, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "EfkdDH08QnYw" - }, - "source": [ - "## Extract titles from posters" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] }, { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "6CoZZ5tSQm1r", - "outputId": "1b3915ce-eb83-4be9-b1c1-d9a326dc9408" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.2 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitle
0Der Student von Prag
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" - ], - "text/plain": [ - " poster title\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag\n", - "\n", - "[1 rows x 2 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", + " " ], - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "movies['title'] = bbq.ai.generate(\n", - " (\"What is the movie title for this poster? Name only\", movies['poster']),\n", - " endpoint='gemini-2.5-pro'\n", - ").struct.field(\"result\")\n", - "movies.head(1)" + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "cFQHQ9S2lr6t" - }, - "source": [ - "Notice that `ai.generate()` has a `struct` return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, `\"result\"` will be the default name. You can access LLM response content with the struct accessor (e.g. `my_response.struct.filed(\"result\")`);." + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "R8kkUhgoS5Xz" - }, - "source": [ - "## Get movie release year\n", - "\n", - "In the example below, you will use `ai.generate_int()` to find the release year for each movie poster:" + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200120Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=96035b9c90093c9636f0b406e5ca9daf52bb1019bde4d52e779f3ce7371e6df0430b3f2e991869065e113327a7698e7ce5ad7b4db8781aa65adea890b80976c97b93b3f9deac5002a1e27b4bd2c1df9250ff4167f150c88be2067f70d45b7c94fd6d69f36a90b5a3ad1a3d500e3cc89a4fe4a67157cbea164d5ce34506dd1d2353eedb1c663eb1a4578c8ff1f9af2ab21a7065de4ec3ff1af44e764a3215874e564e6beeb502739468a80a02c79dcc71f7518435686270d855007e01653659804b5f50ab9c43c4627f28625e07572a4b0f30de49397f9f0445571cdacb695747bdb17614addcf33a90036aa48d025baa8a4d6bd5000d0106a788c2c23f1292c8\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003ctd\u003e1913\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" + ], + "text/plain": [ + " poster title \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag \n", + "\n", + " year \n", + "0 1913 \n", + "\n", + "[1 rows x 3 columns]" ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "8bf12352", + "cell_type": "code", + "source": [ + "movies.dtypes" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 250 + }, + "id": "yqRiNRY8_8fs", + "outputId": "efa60107-6883-4f5c-8e40-43c7287ea7fb" + }, + "execution_count": 6, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] }, { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 976 - }, - "id": "cKZdHq0XS1iW", - "outputId": "72cbad57-4518-4e1e-97bb-333d424dba73" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [Job bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
0Der Student von Prag1913
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" - ], - "text/plain": [ - " poster title \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag \n", - "\n", - " year \n", - "0 1913 \n", - "\n", - "[1 rows x 3 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003ctd\u003estruct\u0026lt;uri: string, version: string, authorize...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003ctd\u003estring[pyarrow]\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003ctd\u003eInt64\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003c/div\u003e\u003cbr\u003e\u003clabel\u003e\u003cb\u003edtype:\u003c/b\u003e object\u003c/label\u003e" ], - "source": [ - "movies['year'] = bbq.ai.generate_int(\n", - " (\"What is the release year for this movie?\", movies['title']),\n", - " endpoint='gemini-2.5-pro'\n", - ").struct.field(\"result\")\n", - "\n", - "movies.head(1)" + "text/plain": [ + "poster struct\u003curi: string, version: string, authorize...\n", + "title string[pyarrow]\n", + "year Int64\n", + "dtype: object" ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "a7b7bbf1", + "cell_type": "markdown", + "source": [ + "## Filter movie by production country\n", + "\n", + "In the next example, you will use `ai.if_()` to find the movies that were produced in the USA." + ], + "metadata": { + "id": "0WwbiMtdTXt5" + }, + "execution_count": null + }, + { + "id": "0a1dec99", + "cell_type": "code", + "source": [ + "us_movies = movies[bbq.ai.if_(\n", + " (\"The movie \", movies['title'], \" was made in US\")\n", + ")]\n", + "us_movies.head(1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "xTE8dj3LThy6", + "outputId": "941e04d8-9f24-4309-a59e-35e8740c9c54" + }, + "execution_count": 7, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] }, { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 250 - }, - "id": "yqRiNRY8_8fs", - "outputId": "efa60107-6883-4f5c-8e40-43c7287ea7fb" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", - "

" - ], - "text/plain": [ - "poster structJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.2 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
8Shoulder Arms1918
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" - ], - "text/plain": [ - " poster title year\n", - "8 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Shoulder Arms 1918\n", - "\n", - "[1 rows x 3 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e8\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fshoulder_arms.jpeg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200210Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653082560296\u0026X-Goog-Signature=64c1fb48cc9830dd4153bca15d05d8703c770e12a4df99abf4cab9dec02d13c66adf4d1223ffda9a30763ad2b286086dfc8cc9b8d20875b29d0c1639983c3ba08a02364bf49361b4a24c3a6830def8d6d3561eeb04d01604b5bae86e48457dc368fee538d0beea2228fdf5e94b5862e1097f58545d7449fa5df0e93fb9c3c0a32943ca9970911f183adf71a7e13e9275efd41c1f69b8f8453b853a30cbb5e8859d72b95ca653204b5ae8f96a12d88d59e988349f74e3f6db6ef277c066d92a28c50335d494beead9a3c0c796c97ca48c497328ae7ad278161c28743193233b28ac0fcafab2431179f7f6321345d8a67e6af39d7339697a5892f0441a266262ab\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eShoulder Arms\u003c/td\u003e\n", + " \u003ctd\u003e1918\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" ], - "source": [ - "us_movies = movies[bbq.ai.if_(\n", - " (\"The movie \", movies['title'], \" was made in US\")\n", - ")]\n", - "us_movies.head(1)" + "text/plain": [ + " poster title year\n", + "8 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Shoulder Arms 1918\n", + "\n", + "[1 rows x 3 columns]" ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python" + } + }, + "nbformat_minor": 0, + "nbformat": 4 } diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 8f3241259d5f..ebc2cb6bcd30 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -1,10 +1,8 @@ { "cells": [ { + "id": "9edad7a6", "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -19,41 +17,45 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ] + ], + "metadata": {}, + "execution_count": 1 }, { + "id": "816ab253", "cell_type": "markdown", - "metadata": { - "id": "YOrUAvz6DMw-" - }, "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
\n" - ] + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e\n" + ], + "metadata": { + "id": "YOrUAvz6DMw-" + }, + "execution_count": null }, { + "id": "77d821d4", "cell_type": "markdown", - "metadata": {}, "source": [ "This notebook is introducing BigFrames Multimodal features:\n", "1. Create Multimodal DataFrame\n", @@ -63,44 +65,42 @@ "5. PDF chunking function\n", "6. Transcribe audio\n", "7. Extract EXIF metadata from images" - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "75ab1c13", "cell_type": "markdown", + "source": [ + "## Setup" + ], "metadata": { "id": "PEAJQQ6AFg-n" }, - "source": [ - "## Setup" - ] + "execution_count": null }, { + "id": "750954c4", "cell_type": "markdown", - "metadata": {}, "source": [ - "Install the latest bigframes package if bigframes version < 2.4.0" - ] + "Install the latest bigframes package if bigframes version \u003c 2.4.0" + ], + "metadata": {}, + "execution_count": null }, { + "id": "2a6fafb1", "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], "source": [ "# !pip install bigframes --upgrade" - ] + ], + "metadata": {}, + "execution_count": 2 }, { + "id": "df561d04", "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bGyhLnfEeB0X", - "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" - }, - "outputs": [], "source": [ "PROJECT = \"bigframes-dev\" # replace with your project. \n", "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", @@ -126,13 +126,19 @@ "\n", "import bigframes.pandas as bpd\n", "import bigframes.bigquery as bbq" - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bGyhLnfEeB0X", + "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" + }, + "execution_count": 3 }, { + "id": "35bd6e6e", "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], "source": [ "import bigframes.bigquery as bbq\n", "\n", @@ -172,21 +178,27 @@ "\n", "def get_updated(series):\n", " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" - ] + ], + "metadata": {}, + "execution_count": 4 }, { + "id": "be9ce892", "cell_type": "markdown", + "source": [ + "import bigframes.pandas as bpd\\nsession = bpd.get_global_session()\\ndf_image = session._from_glob_path(\\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\",\\n connection=FULL_CONNECTION_ID,\\n name=\"image\"\\n)" + ], "metadata": { "id": "ifKOq7VZGtZy" }, - "source": [ - "### 1. Create Multimodal DataFrame\n", - "There are several ways to create Multimodal DataFrame. The easiest way is from the wildcard paths." - ] + "execution_count": null }, { + "id": "871d02f4", "cell_type": "code", - "execution_count": 5, + "source": [ + "# Create blob columns from wildcard path.\\ndf_image = session._from_glob_path(\\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\\n)" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -194,17 +206,16 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "outputs": [], - "source": [ - "# Create blob columns from wildcard path.\n", - "df_image = bpd.from_glob_path(\n", - " \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n", - ")" - ] + "execution_count": 5 }, { + "id": "2e0436b0", "cell_type": "code", - "execution_count": 6, + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "df_image" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -213,6 +224,7 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, + "execution_count": 6, "outputs": [ { "name": "stderr", @@ -229,8 +241,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
image
0
1
2
3
4
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=060fd285eaaa0d1b8888e9e3c648c0c9452df5d33b6c6d7ddea1d5fc3e4bf1f243ff0719ed4e3faaa3ea29e60da198daf1168dfd0efd64f9612f7c032753cfa2842ef88469a49ab23cf261e2b5da052224a33eedddd7c4699d584ec85704b18010ce8f4216f200d3cc8d0301b7aeb6bef37ae9e83a832ea38703f19b5b85e2e34f772420f5436afeb874487b3fb0ae4a17fb8f277a3d233a6d5e6e0d063e37e651061bf4ae33b8ec8b879c5db93e3ce97513054822d1867e28de4f03585da2edd8f4f51e177a4e3d37f5e3ca2f5be0990dd9f58135ec9223da3af7bcd1e67a8d279282d4d245eb7faff094903a82cf7a8b866fe848230a6668b6eaef6a683292\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=34826bd5824786f809b740fd64e154a9feb43b467250c64b9b6686134133e18aabd7213472cb00ae4ef59e5567b8897828aae6ff22e29c4a5d28214fcd2a6f486e69d548d0e482707c4dcd67894feb716ee68e1863e02e36d7e0c6c008b1b989c2a798feb2c4bc3638c6c80069165b2bf51c6028ea2f0a09c1555981b8935435ec1c596975b77942f603e65414328f3f1d180f772015452bfc97e96ffb9a0a016a4dc365a4531d0e37e491f4066df87f9a8f2374d30d5f62d639f10252c471570b9e24d88a9f8816007099fc39e6f846c68ea5a4069ecc785e67101f664da4999037fcbbd93e00b1b85a31528492e8610d75af96a1c8a879865dd36da2b15465\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=6476c6b9b8b23f4ac79973d8088424c6d8018857d0d8c1bf586057a7bd2f4cf00bfc53c79757b35401f05dadee9bc1aeeaed7b669659afa16696062db7d9da45e5fe17a0aaaa33c2394632a8dae6ca3c7f705ce0e7179e44fa245dc34080d87bdd0f41638c2840eba87b178dc43db16ca1a621224a1d991960eb821a99dc34aab25ed7e8457f161bd09fa9efc0eb0642709e3fba6ba412ff68ffe698592c235054ae0d08fd27909150beaf69b93dee3496d2f9254c2f801890fe072442fa2ffe389eeb689b8eb4daf08b4701a535ac6606c7de856761d008da479138abc3e941e0ab682f19fe86cd3f56df63f788c92824aed76fafaae0a546fa796266f26e2e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=22cdbcce477c983c914de1edcef7742fd253a6830e961c2932d5dbb11730c1fe0035579c4158a140cd6ebcdc95e1212ec60a2d54679af8dd662cb7c1ac6249042bb5a95fb324397599bfa4e8a8bf8e4313d14a5ca34e40c677d91f1853b4b7450d3ad043404058db5c1dbab56b8968eab4e3550aa1de20c789084527f8abd67a32aa62788b70975ea828bb312f5a123463c2088a4bad7a0c20d299f59fc0674ed32d36b3f78a1bf2fc6fcd367bf2056e132fbd50e0a597a7da7518be8c9597de6365097490857caba47e84b57940bdc6cca130b6c23ede91c51140cf0672ebdda0957e525416c636c368d1cf04e5efecbba3f052f61bb95d951e52e0f5d31e8c\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=335aafc718f8a89dc2b5d2e75ff750ac302cfacf4238ad91c2a4b140f59dd666d6520fafe885b82706453c2e820c82f0461488ae01e3210a20c555a9ac1242ddd54e17a92d7873211a4dcd69a7fca76c16ad9cd754f6245a8b9f047e9ef8bc428ec243fbde7af59a2b308968a165662e50d4a08740d196d02182d99650e79673e167164dc2869a434159ba3a15c68ddc9e17f5a7234c478ac4ae55a9686740ef260e6c1ab834ca3df361161c8d689acc72b143a6a3345640b2b94aadd1070d3e90a6572d63ae74cf803304b798ea4df61e5f4494f078f565f0d59f57bd6eee0618936a16617455d785ced3ac467b964b5eb9049749fe4cf8f2bf2c72ed72fc79\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -291,37 +303,46 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", - "df_image = df_image.head(5)\n", - "df_image" ] }, { + "id": "429b0117", "cell_type": "markdown", + "source": [ + "### 2. Combine unstructured data with structured data" + ], "metadata": { "id": "b6RRZb3qPi_T" }, - "source": [ - "### 2. Combine unstructured data with structured data" - ] + "execution_count": null }, { + "id": "991fa065", "cell_type": "markdown", + "source": [ + "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." + ], "metadata": { "id": "4YJCdmLtR-qu" }, - "source": [ - "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." - ] + "execution_count": null }, { + "id": "08722ec5", "cell_type": "code", - "execution_count": 7, + "source": [ + "# Combine unstructured data with structured data\n", + "df_image = df_image.head(5)\n", + "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", + "df_image" + ], "metadata": { "id": "YYYVn7NDH0Me" }, + "execution_count": 7, "outputs": [ { "name": "stderr", @@ -338,8 +359,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eauthor\u003c/th\u003e\n", + " \u003cth\u003econtent_type\u003c/th\u003e\n", + " \u003cth\u003esize\u003c/th\u003e\n", + " \u003cth\u003eupdated\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=0f96a5054a9d9d8f14259a2a9155c8124d0b3b6af21d5ba59f61a789cb5a770d1d70e9d7b04094140495675e9eb97ef30d5539ec822bfc7f857fc0be3a3ff193aef72578ffccf7231633d42fbf53752b50a6ab3c4180dd86f62a2c350492239d44b2d5a079d000aa60d99e5656dca1fdc85b2a5b1cea0d6567d47641167ee08bd41bff06e93b35e34c4b8f82e73d589bf29f9ae73b640b8c90b751ca2829f99b2a2fa5a463990660e87e4c4220a8bee9ff9bea44eb621d8a00335892833a0e33cc95560a803df5a13fb710abfe813d11a37610c870c82986b4275831e2ed57cf022af8927cd4a9fc5aea88b54c597b51ee456ab5f22cdcfe8972a0a2c56d9702\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591240\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=50b820b125f2b52c6a205178676d153512a5d62ba04f7f399de86cecaf167492f4db9322e289f8ec077e74e09ba67509bedda9b68b9eb7290a7f52bb78a567139d0a2bb9266764ed941bfa19ba4278bf00647e79c85877e3111dbe3c49cc572d4f47739db1030d6ce0298965d08f2f992b59a0f452ab8d52ea5d783cf13ce6d3b8349ce3fce9c4337ddd00b746ec2e9e8fa6fa0361a644d82c46b7f0860a2404f3fbf17de24cbfd8744a098fdde367d5a4cb918a325ae3fb96abfb187e26bee7bdc267b81ba23949ca4feaf676864431641f3747477293b34541e7c48ca06bbcc45e94ae839d0ce85e3222c38dd39014821881a31b15f3efd06ca426cc4769fb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1182951\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=0d659c9d71b34023e6a7531dc99620cd1543f4a8ec0262cda29549e9f4f604e929be102b44e0a11ef2c148c6a24c3fe9a92257892ca1aa23d7b963551d95093fa74bce067ec12c0237a96a15da0741f76e732d04f85b03d98696c7dc9bcd1bd9de1f2799ceb878e8f87ddfff5609fc6a6db4f27dcc410ad05cdc1066c8dcdae880a950a160189673eba7f3de59cc0e1aa754ebdf7f745a8572216af6f7934b1e96c9aa7cb7cacb6aa836f8e731668b76d0942e04e7e4b81a5972a75ab2ecb0b1859dc4ac4f6a45239dd2b76cc7af4643fb24c64731f28711e6cab8433fe239f0cc2462ae9260b1ae5ae85517300075188d270957eecb31ee8db20ac3ddd97c4e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1520884\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=1124d977a75b85634f2afeb223fba3028c9ad85fd9a2f6f8ce483c9e015a48a242f6810742e0279a46b3388371a9b7d37ca96d9a53d3939f6e0484a474491bece22272b89176e01866fdc8845b75cef28b9ff36a7b2f875e452002a7dd8d13ce38b078ea2aacd76ce8cb560faf078dad6462b3a69130f333ab9119fb6f1d8410a70de76018d2c84f01c6d70e1aa60498b4eb88a35a77a8173d11e545a8f5a7dfa542ec3effcafcd3a9c84934d605ed06107df98032738415e6ef1ed9331796aa802712c2cb4bd733881833bf4ed5d590846db97c7591a2d84acdf87e38752b15a39b711aec5bbe4dcca25a1edfb60626e68497c1fbb8cb0bd707938db378a01e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1235401\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=2f60becdf01864a381c2283fbceaaea023a1011554a61221b5be902cdd08b102d60d0da0275ed491b3396baeef517b8e2336eeb3e3b07da0398cdf4b190ac0ea667e4cd1d1d19e41046824d55ffcef47a2db3aeb4e82da71a655264f14ad5ee553329aa9b32c8c2200f3b66c9a9bb5aa8e5b91795e8d6b6129935f46522fb8dab9ce3a2ba5af019c2410f709472791730ab9ebdf9f901a5bfaf4dcc2c78e07c79743d35eceac59999d841adb60ce15313a70526d98b83e90f2240800c5b96b1b9a032d530fb15bec86425afca0c6fcc1d35d1560ef996cae5411feb67addd1b726026f3d097318b0577a84dab72cae328bb186fc8c97001ff720a43e6fc27610\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591923\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -431,140 +452,29 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Combine unstructured data with structured data\n", - "df_image = df_image.head(5)\n", - "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", - "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", - "df_image[\"size\"] = get_size(df_image[\"image\"])\n", - "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "df_image" ] }, { + "id": "f90826f6", "cell_type": "markdown", - "metadata": {}, "source": [ "### 3. Conduct image transformations" - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "e24c9f8c", "cell_type": "markdown", - "metadata": {}, "source": [ "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "db665049", "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "HhCb8jRsLe9B", - "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageblurred
0
1
2
3
4
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" - ], - "text/plain": [ - " image \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - " blurred \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - "[5 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -577,7 +487,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -638,23 +548,137 @@ " image_blur, 20, 20\n", ")\n", "df_image[[\"image\", \"blurred\"]]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "HhCb8jRsLe9B", + "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" + }, + "execution_count": 8, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eblurred\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=2bb786ea7fda0a778b591d0f85b1d1df003726a26585490c1c8e1cc632bc90e418bc2762993da32c60017b96b36a9bf03ca123f7e74a34e5da98d4a8ae65e340c64872e1ab7c6442435253669103a157ee61c793da79ad0e6ae2a7bdbf54e8d67dce61de37d9dd2f54b0c994f8135d88af27f08eeb99e713b385b37fd0762503244cd0c597479d6925dcf111db4605842c797aabb307736028a5dd802ef08a2697a4bd1c96b5bf8bbecdb8a8f384028cec03bc9d51c1d2dfee1760756e0c6e54d9d753b373115e308cf45de2c082f30f263285af5d5e43fd9bf8c3dae32ceee389ef9427f3e2b06646d48aa570ba1ca0713d0f270ea2f4996e8ad3d03a3e39a6\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334353324\u0026X-Goog-Signature=6f16236d50a82c7ede3c23366389344149faba103443acf1c1a2b0d3b0d91948c8c15a3ad990382ab9b61badf8acee3c4b9e5ec6f96b72d395cd2e18227ad5c051ad189c1af48030fb44c1b9cc84a44a74fec04d576a6d699c1f0e133b2fc5d583872b5577c60a01f58d163de6e95591d260e9246fae0243d7472748e854ccaf4b4c3de80fb4e085f428427fe9914d22fba7416ae8b9e33beb0dfbcb91273609d0333aa1135b835bc69f9e15e1097452133e376b9b3f1bb7b4d7d7e9cb860677ce786e49239e8a8af86f83b7e27e76846212daddd2859f8dec4942b7351c69d396d4fd9a529fd0c48677aa4923063a752f6845801f22d3de227a640946f1e212\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=31ed02660dcead7471a7b47e2f471f4cf515e37525c14027f8d87ba1479982f244df02979b7da03676d4bbbd21221b3f70cefea4501eca3a5a64acb187ef5a2b03b19116729059dc666c2e295fc1ac831cd38864c1673344e04855db279a3c96409550dfade09631b0818e6cb576ff02afba6a87624f7c33077f7f19c891f6413b317cabb834182585d445ce97aa0294beca4de0edd2f9a0f6429747e93db53df875249a9c0543a329dac0f46ef3ed4e64a7e51648d0820ba52b7845b6f98371aca7deb165b4f7b7537be0d659cf33bb34893417666d7c9e9f611332ccba6bae6024be6862350185886cf61354eda3591e96e344c59350ca6f68ce50670fa869\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334573735\u0026X-Goog-Signature=209b62c9c3e2657b9270ed126a6c1e3979471f9e308670ce1595e9c18a0ec7c627a313b1f1c87108355602dc11b804ff609ba3394fada2b4fd186c6fef4138e22222045d0fd0660f103e6378ac83c6bd7d8da27c1a3d306dbfd778f6738c2e492bd8527ba9112e086f15334b7ab7795b88212d3825b3be325606cc507e5504406bc4e9dc41b29fb99e1207fe74c11053e7332ad9fdd65587f55cd10cc1bfd903672f43de38a18cb346977d8498fd751ab8728ca580261a1a0367421437d56df7f5f7dda895ddd370954aee632258304da31fd8e7c33619dd5cf9ee0ebe9e60eba3cc416e871e181525f23e525489ef6dffced9211f7d3681ec24b3ac7c0b05ab\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=8d32ad0137f3b10393aeaf635732f7fe337149323c0a33b634f9dcc592e662d4d268223f5c5cc89956787c5a3c061fa69c2de4d1a3225497dfad466548fcd4df7ee6000c4e60ff0661d0f9b451efa2701df5de55c322b3585c09210171403d47c358e6f41281f245711afd63eed3157e6674e9958ebffc5516f6ccb06b9af1830400e1d6a9bed44538b7212eeb257665aa43eecfa2af7482863f6032002de57f7a01af83cf5051541178297ab4a256710c04fd082cb51af4725dbafb288ee79caa630853dcbf35c49595b2b2e552804cabdbdf955806d57b97451e87035bd3ea89dbb5560a1c14170c531c3222a1d0b59d79d4c45b641887699f0bf5830612cb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334233994\u0026X-Goog-Signature=6eed60a846158365a0c54d4c96516bb8c4011b497da203479fa9066bc2b7c2c7c683aa351d5249e05df64b8f86549f251150327013e41231ee06f22e9a51e1061c577d7b2da4704b80f684776702da13be6122ad6652463f210c53d9149da9e32a253fde4851723cbb616f18374a8f9b7b5cd36a82e66a2e5677ae51b906e6c4649ea27fcbbcba3ef24bf163a4b0f469c10eca8da464f58161ead8371bdb5b0d452ee713b0ec498ca9aaefd521a1d8240d82885a19af58ddefb167146ab9d411b5d0712457b126104b5f42f57ab4ede2da5ae9af74e708840b507a5538571804c60e9c166516e0118aa555a13169401e21f0a5ec303f3b801d6a972ef94426ee\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=472d88289a3910bdde93dd468e6ae9e66c0821fb0d72a356ef887771baa2a80f356b0ac67183873c0e89c87afb3080b3a78122fa7c3e37ad8b54d8aa216e9e04cb6f2af4f200784beba8de4eea7cf3a39faa8d800b7cc6bbf03df55beaeb69727d413266e2e59fb900bc25b6859d7c14db3c22aa6a0aeaa71c331ae0a2bd27c9e2c246fe931a2562210411491bc83bc34e7bef60901189949348eee909e64c90e3aab7b03d7c274ee84c03cca6dd624169f40323a785bcc1a8357810f738b45a637c09267530fbddc413a75622d793876c74c9128b11d8b4ea2fe2b959b34b96e06a9c384f72522ad357557d0a3b1c8b5792c2e94631139eae178fa660a5e8b2\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334678692\u0026X-Goog-Signature=239a77d1c63c4074b2ddc3dd21a5bc77de8ea993fdd911a199b6cca207fef9286307a8321d91d02cffda17060a217f50b2a1d8017bbcb29511339a1187b38332a39795fb1b2c754988948be8849a371fd66a7698f8e35a0f1a3430b6837ede73c37c90da49a062586828f19595d4246e4eabf3e3d629f251b066fb20e0f34ddad80599f5946aa76e0694b859f7d575dc781a850d56e9846d456d285b0023e90862154008154fa13ef1f95096a6161e6a1b314b82ca23bc44fc1b4b7799a9f936dca09ab09830446089d8defe11a2c0e066aa8a65204d9027140f314001c59187593290ae22c4092ee5d0293a6e5da1e1da3d42c32c0aced7db7ad4f774575deb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1077450c755044fd6121e0dd67ec7de75a1785c4bacede635b78dd64d6b982ba02bba7a00065035fa13f003ef03c14d7f622b6a5c9ef19cadc956470e5a62e0f1855fb8cae0bbe723c0352224d152c85e173b234e3f5a1bcc2e1ddd4da065c184e82487eb4aa17464d330144b3b7c17ba357af1fb267f97730b3a798ff42a514ea47b83aa9f2560cfd428778638287c96ed17f95f0327e1cade380d046120d79479aca927d627ba65f3bc009c75d1662cbf93c6d33cea4b030a7906d3689922bac13ca3fab45536f115ae6253715cc9c24769e57e83657bf0393ce55e45f90d9666513a19a939044d3b3ad528757e022624d6601dacd326ae8ab3c9ce4ae6d52\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334302924\u0026X-Goog-Signature=3a25e70e59647f427ae55d72168c576fdb59b05f9ed6b72885a9e9c11f655e6bf10ded55744adcef330659731f0effdda7550ddca99c309cefe46ba864b088f155a5243e579d182a55f916d21d9dd0a83534a5e2efb2955a9146db12b22b4321e3f36b69f8d89d663507d7db83ac96bb2419a2baa0787fd0c6e6079d06652b8a2ac364a0a0a5d8d9de6331658b798abddbaaae1ede3026a4f0d955e74782afe240d31e6748bea8ab332ed945f541ca20c587b8c1449643f4748a3b059aae857334b6249bdf86794d307340b6a07d0dc47d3980e234be9c0549f52636d33776b7474fb95ebc014656c3cc217a777d438612a08849ac498baba0ce4716ac4ea432\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" + ], + "text/plain": [ + " image \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + " blurred \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } ] }, { + "id": "11fcc6ec", "cell_type": "markdown", + "source": [ + "### 4. Use LLM models to ask questions and generate embeddings on images" + ], "metadata": { "id": "Euk5saeVVdTP" }, - "source": [ - "### 4. Use LLM models to ask questions and generate embeddings on images" - ] + "execution_count": null }, { + "id": "793b2f45", "cell_type": "code", - "execution_count": 9, + "source": [ + "from bigframes.ml import llm\n", + "gemini = llm.GeminiTextGenerator()" + ], "metadata": { "id": "mRUGfcaFVW-3" }, + "execution_count": 9, "outputs": [ { "name": "stderr", @@ -666,15 +690,16 @@ " return method(*args, **kwargs)\n" ] } - ], - "source": [ - "from bigframes.ml import llm\n", - "gemini = llm.GeminiTextGenerator()" ] }, { + "id": "13d7cb93", "cell_type": "code", - "execution_count": 10, + "source": [ + "# Ask the same question on the images\n", + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", + "answer[[\"ml_generate_text_llm_result\", \"image\"]]" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -683,6 +708,7 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, + "execution_count": 10, "outputs": [ { "name": "stderr", @@ -705,8 +731,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of K9 Guard Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=1b184636c15ee0a74b099df1903a0a79f2d0832d2cf829c84f9344269dc70408ea48dfaddce2f284cb4862cef857e8a0420627e25ca8c7ecc08d34bc69c695bc030ca8d90417860eecad65826160cbbf1cabe6c90d5e51a09c8b582bc542dfc5b309908be7b945d615eabba73b79912987306b3b110a5b0a9e52ccf900a3d2c490bb4e4572f3108f3acabf9a332e0fa503f74265f44d3b9ff40745afa4c59a3a0f3366ca4e4a800a09a5df0b363909a01705433e31bf2c9b7ccc0cc1e75f3e8ec323907140f29e4033238385eb83815b6d95ec54945cbf829d966510ae2504187f9c167fe70ac9e77231d4b38df380a7f6ec5f973828f21e51f4a95626ea0504\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe item is K9 Guard Dog Hot Spot Spray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=7e3adbf71385c63b494609546b7b2a3ef41dc437772a35d579bfed25594b3dd4716f8170dae6e4c9afcabce49c2af584561111983494d6dff504ff5eed611c3c7712cb667e6f1f3451aff5ac6911c4da9aeb2ba5c9227f9459f7c54085268a515eeb28add1b384a4724159f3d1408278e37716465f11a18f823dd4058004e43f9bdd4cab28fc97e97043adddc53c4b5e3059cf1641f1300bedded5a679f6922a45c021055e413953e2f0b000a6fd5047a0aa2fa5fce5f0ca08b6f93411704c9b10c6534894130c11f8ee9a8ee70d26a6455c0cbaf2088c1b5205328858e22026d1c4efc9e558a33082169f7eec5e4fb406de7de13e3ee72a48421e9896cca6f7\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eThe image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry \u0026 Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=380d369f6fc8a0e4274420f7bb038aaf748111cd654f7dc20818a97d80da8e6b93125884982c656009cdfdc7e70496c1defa42b2de69e1b73d99e0b925953bd5e0e5ae20ddfac13a619f8c6b1bf6c3fe9cdb195910eb08efd96a193dda55488eacc2d0deac4d0a777fd7b7916a0cc1707f22463e1678c83b8ae51d082b8365c2643f85c19c59a9ec34c89fcc87b9c510cf6dbcdda5d0648d6602bd023b0a09e6b4b02c4cea9f6b10f563a14097e86a5cd9898ea3b3606a6c847a23ce97aed46b3154c1c1791da19edf172d7f57cd4e604bb2774ae3dc02d902c826ca9be17972ff17d612106b9ac61e734837646e5d0b40f8162798bf7695dccb0d320e6a58a9\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a cat tree.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=712a70543cff388ba937bc867b4d94e1bfb09579bc7ecf998b8fb5194937cd15f491643f76925582ed5b7c853a9845b77e3c6c248126e211d45c3f6ebe751cc06193ae052999bf9bd827acbb204d2a64ad5d6eae1101fc5b2518f16ae29469ee7213cae403a3a6a29d7081561decc6b189593beb4b649bc7169828f4570a929d8b15c8dd0b3f259bfa4e2680b9d5b88653068357c7aefa0b1f26e10dc309b743da4164d9a5abd1761b00cc9a12380ba6fb3786f141b8e536fdc27e869b632c3e1a130f312ad5185362b0f9b30f473387a02905f22956992278d94fc2ef387a87cb855d35cfcabe9ad5d82c1b4dd85c56152e28438f6631322a4c229a9520adb5\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=70d2709b3b655fb6add8616767e7886e7e304cc96fc891df927085d1e4d90ee9bb13b370762c6c5a8dd43baefa163312267203bc1b371954320bc27c32d0831f7f8937f288da999e506bf6f47d101cd2e49a870f3d5be428d321149f7e1c7d1146569d22f19640d62325665b6d08e7254a89535c021c8b464d65e754312dd47dde08be9ca58856a97d3c3f243030ccfbd8c1bda5ddca2b3618b113f6c1640afa14936b8c16c59d77c44139fe75f3719e2a83924fed36514c61787b02ace0d439f8d3c4fea81c9bf01684f8c06a39f7ec626e93d59262db87f2eea30dd0f849a3436d8dd36d2188f2e52826e8b96bf72614c256cda9867b1905a1d1cd3edebd18\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -780,20 +806,11 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Ask the same question on the images\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", - "answer[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { + "id": "68857305", "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "IG3J3HsKhyBY" - }, - "outputs": [], "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\n", @@ -803,11 +820,19 @@ " \"is it for pets?\",\n", " \"what is the weight of the product?\",\n", "]" - ] + ], + "metadata": { + "id": "IG3J3HsKhyBY" + }, + "execution_count": 11 }, { + "id": "829afc69", "cell_type": "code", - "execution_count": 12, + "source": [ + "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", + "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -816,6 +841,7 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, + "execution_count": 12, "outputs": [ { "name": "stderr", @@ -838,8 +864,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=988967bbfa1e4c70be10a6faa407fde8edbe0e93a7a1c9f9fbe8e81fa55c11bcc27b9673ae4d91b13edbc56ad5e50051a81d1639cafd38946f693f73e81e86a0bf6e945c3a8edf9b3b2d275203caba770fcb9a9fa272b969023fabda363efc8d90ae4b2bbc9a4f420699f3604d0a13607f96694687529b38bd80b211f5998ef0a11ab0f3a0b936a4e6068a9289ec43a9536416b5782ca2a0645c2b43f94ac6b8e31632c62b3714b56f28dce7e5675a66ded7bcc9d1c1a154f5a83f826b3f4b1363b1316536549d959d664931e6cd462a9d83362257e5226ad5b35b5e6c0e6a155fd62d2890b2fc071b59e6e4fa796a22e346bceebc4fb131eee66793f6748699\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=6098f2a2487364290c54d4f6bcc98f29e5097702ab78da4ff9ef97d13c03a7fa3a17bd22324d61ae8264d3a59a6bdb2bf4be55bb7efffcb00c68f0c9b69f413f8b33b2089697456ef919690d59a68548e95ebd68526de53ab9062e18009ab73452fc1934c43f99ad75a45931fb250ef1e78a7ced3e0bfc9d9468ef300a63d750b8e249e7d20afa00895b7e586b16686141799603bcdf731b48237323de166d0a1a4f310519671a4ce6ae56b5d4ebd1887361cc21130b3e8fa57a6107d50239a671319ecbef623719554a9642489d2ea083c2cc466f5d73c55084023567e9db291b40a335e7f65b20da018a70cbf5ef9654aa86ec500aa4df35c1db81116abc83\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eHere are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry \u0026 Blossom Treat Blend** is the product in the purple bag.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=9672e4d0fa6ea5f4bcf1605e35a7642cab21e7decfcfe55e37616c99a7774445cecf69c848ccd0dd92f5ae1925bfaa4bbd62ba413ecfffc835aa9a4596d18036381d595ec3387d4200a08b8fdb02ddfdb7432ebd14d5d4b77abf4628e61899c4383899ddbe6465ce91b856e8a02f37391c850589a971e6045ed0c389133dd63d4bac98d5fc744f83a51b000bf29c51111589bd34e2d287cc835ccd8c009270cef818eb3ed9ee07f9978bf24ce994311af944fc2b7d7066c3c37613fe08e0542efd277ab77df0fe1f168e05d2f6d6749e5d8af573c22032b29c3c47ac2f3b7978a09139ddfd38ab9045b5ae86d8804f23db758a7602225bd749a5b7552bae4eb8\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eYes, it is for pets. It appears to be a cat tree or scratching post.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=5c8e23c4c5f8017e09bf02b1b480200c74c1523aa25297141fe4abf226d7e18c546aed2ea5a2b5b5f8c5b90f2f4c569accf41a73ab8776285b458d230c6df43025eeb3066d40fe141089fa547b5a1f6a12d8c2eed0e614c8667be12c237d7ba4b29a530c16a6c8528cdfc8d8c4761aeae91c5f8452069f88de31cb637aaa34cbea60d8a8e9bd66b9d2f6f2c70a60d7791fc6bac13a4dd3736323da29fb52c5c90068c57c088407935255468513b780c07ea8e922f690645ee5ed8644240f894061c662b7bfe7ab660fd8466280e428424724f62a6c4eb6c781e3bf988059bd9f63fd6ffc2e75d296bca2ef068c6cdda0f6bb994f6799e04b33b4352f66a68224\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe image shows that the weight of the product is 15 oz/ 257g.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=3be42f1f88c163e019f7496107a2f7d69b90a5482dd157a5d05e427fa117dd334a834dd9c72d0c84126e4f2c9e35903521ca8e81645afc945d8dbe47671f6b2f73ccbf494df6e5ad8e19c04ab4c6020859519c663b6fd57b19512fa94562106d01073b9122011ca602279a712fd761dc4ffaed3a8d7b76abcf1f42c2c1bce33228cff0e0454107cb84039a8981a2fb689191c43b7edf54e19354ff2e8c5deeb3a07944285b15db8a4fe474744f52852f8048c377708e2b3b85ee639b952e9292bc8d60d9b1a37c84ab83398ef295b8ba6b3c8e3a500714fb70e91b5fa7f19301fa4ce1bb1041f60a8ce84b3d863d5ba66f14614ed27689cd4daeb293b493172b\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -913,15 +939,17 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { + "id": "e75df430", "cell_type": "code", - "execution_count": 13, + "source": [ + "# Generate embeddings.\n", + "embed_model = llm.MultimodalEmbeddingGenerator()\n", + "embeddings = embed_model.predict(df_image[\"image\"])\n", + "embeddings" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -930,6 +958,7 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, + "execution_count": 13, "outputs": [ { "name": "stderr", @@ -954,8 +983,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_result\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_status\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_start_sec\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_end_sec\u003c/th\u003e\n", + " \u003cth\u003econtent\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00638822 0.01666385 0.00451817 ... -0.02...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00973976 0.02148137 0.0024429 ... 0.00...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.01195884 0.02139394 0.05968047 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e[-0.02621161 0.02797648 0.04416926 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.05918628 0.0125137 0.01907336 ... 0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1034,18 +1063,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1061,39 +1090,24 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Generate embeddings.\n", - "embed_model = llm.MultimodalEmbeddingGenerator()\n", - "embeddings = embed_model.predict(df_image[\"image\"])\n", - "embeddings" ] }, { + "id": "23892b0e", "cell_type": "markdown", - "metadata": { - "id": "iRUi8AjG7cIf" - }, "source": [ "### 5. PDF extraction and chunking function\n", "\n", "This section demonstrates how to extract text and chunk text from PDF files using custom BigQuery Python UDFs and the `pypdf` library." - ] + ], + "metadata": { + "id": "iRUi8AjG7cIf" + }, + "execution_count": null }, { + "id": "136a18b8", "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } - ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1106,7 +1120,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -> str:\n", + "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1133,7 +1147,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1151,7 +1165,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) >= chunk_size:\n", + " while len(curr_chunk) \u003e= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1162,18 +1176,34 @@ " if curr_chunk:\n", " all_text_chunks.append(curr_chunk)\n", " return all_text_chunks" + ], + "metadata": {}, + "execution_count": 14, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } ] }, { + "id": "234a5f86", "cell_type": "code", - "execution_count": 15, + "source": [ + "df_pdf = session._from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\\n\\n# Generate a JSON string containing the runtime information (including signed read URLs)\\naccess_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\\n\\n# Apply PDF extraction\\ndf_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\\n\\n# Apply PDF chunking\\ndf_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\\n\\ndf_pdf[[\"extracted_text\", \"chunked\"]]" + ], "metadata": {}, + "execution_count": 15, "outputs": [ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eextracted_text\u003c/th\u003e\n", + " \u003cth\u003echunked\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eCritterCuisine Pro 5000 - Automatic Pet Feeder...\u003c/td\u003e\n", + " \u003ctd\u003e[\"CritterCuisine Pro 5000 - Automatic Pet Feed...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1219,37 +1249,29 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\n", - "\n", - "# Generate a JSON string containing the runtime information (including signed read URLs)\n", - "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", - "\n", - "# Apply PDF extraction\n", - "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", - "\n", - "# Apply PDF chunking\n", - "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", - "\n", - "df_pdf[[\"extracted_text\", \"chunked\"]]" ] }, { + "id": "d80effbe", "cell_type": "code", - "execution_count": 16, + "source": [ + "# Explode the chunks to see each chunk as a separate row\n", + "chunked = df_pdf[\"chunked\"].explode()\n", + "chunked" + ], "metadata": {}, + "execution_count": 16, "outputs": [ { "data": { "text/html": [ - "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+       "\u003cpre\u003e0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...
" + "0 continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1266,60 +1288,29 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Explode the chunks to see each chunk as a separate row\n", - "chunked = df_pdf[\"chunked\"].explode()\n", - "chunked" ] }, { + "id": "118cf1c7", "cell_type": "markdown", - "metadata": {}, "source": [ "### 6. Audio transcribe" - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "1794c54f", "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], "source": [ - "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n", - "df = bpd.from_glob_path(audio_gcs_path, name=\"audio\")" - ] + "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\\ndf = session._from_glob_path(audio_gcs_path, name=\"audio\")" + ], + "metadata": {}, + "execution_count": 17 }, { + "id": "c9f9d484", "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
0    Now, as all books, not primarily intended as p...
" - ], - "text/plain": [ - "0 Now, as all books, not primarily intended as p...\n", - "Name: transcribed_content, dtype: string" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -1345,28 +1336,39 @@ "\n", "transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n", "transcribed_series" - ] - }, - { - "cell_type": "code", - "execution_count": 19, + ], "metadata": {}, + "execution_count": null, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, { "data": { "text/html": [ - "
0    {'status': '', 'content': 'Now, as all books, ...
" + "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" ], "text/plain": [ - "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct[pyarrow]" + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } - ], + ] + }, + { + "id": "7209a62a", + "cell_type": "code", "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -1381,36 +1383,47 @@ "# Package as a struct for consistent display\n", "transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n", "transcribed_series_verbose" + ], + "metadata": {}, + "execution_count": 19, + "outputs": [ + { + "data": { + "text/html": [ + "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" + ], + "text/plain": [ + "0 {'status': '', 'content': 'Now, as all books, ...\n", + "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } ] }, { + "id": "c8351cc3", "cell_type": "markdown", - "metadata": {}, "source": [ "### 7. Extract EXIF metadata from images" - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "e59670b9", "cell_type": "markdown", - "metadata": {}, "source": [ "This section demonstrates how to extract EXIF metadata from images using a custom BigQuery Python UDF and the `Pillow` library." - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "fda362f4", "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } - ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1426,7 +1439,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -> str:\n", + "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1443,12 +1456,28 @@ " tag_name = ExifTags.TAGS.get(tag, tag)\n", " exif_dict[tag_name] = value\n", " return json.dumps(exif_dict)" + ], + "metadata": {}, + "execution_count": 20, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } ] }, { + "id": "40bb6bc9", "cell_type": "code", - "execution_count": 21, + "source": [ + "# Create a Multimodal DataFrame from the sample image URIs\\nexif_image_df = session._from_glob_path(\\n \"gs://bigframes_blob_test/images_exif/*\",\\n name=\"blob_col\",\\n)\\n\\n# Generate a JSON string containing the runtime information (including signed read URLs)\\n# This allows the UDF to download the images from Google Cloud Storage\\naccess_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\\n\\n# Apply the BigQuery Python UDF to the runtime JSON strings\\n# We cast to string to ensure the input matches the UDF's signature\\nexif_json = access_urls.astype(str).apply(extract_exif)\\n\\n# Parse the resulting JSON strings back into a structured JSON type for easier access\\nexif_data = bbq.parse_json(exif_json)\\n\\nexif_data" + ], "metadata": {}, + "execution_count": 21, "outputs": [ { "name": "stderr", @@ -1462,37 +1491,17 @@ { "data": { "text/html": [ - "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" ], "text/plain": [ "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension>[pyarrow]" + "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Create a Multimodal DataFrame from the sample image URIs\n", - "exif_image_df = bpd.from_glob_path(\n", - " \"gs://bigframes_blob_test/images_exif/*\",\n", - " name=\"blob_col\",\n", - ")\n", - "\n", - "# Generate a JSON string containing the runtime information (including signed read URLs)\n", - "# This allows the UDF to download the images from Google Cloud Storage\n", - "access_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\n", - "\n", - "# Apply the BigQuery Python UDF to the runtime JSON strings\n", - "# We cast to string to ensure the input matches the UDF's signature\n", - "exif_json = access_urls.astype(str).apply(extract_exif)\n", - "\n", - "# Parse the resulting JSON strings back into a structured JSON type for easier access\n", - "exif_data = bbq.parse_json(exif_json)\n", - "\n", - "exif_data" ] } ], @@ -1518,6 +1527,6 @@ "version": "3.13.0" } }, - "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 0, + "nbformat": 4 } From 991691afbed9adb419be1eae6ecee24254f04855 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 04:58:18 +0000 Subject: [PATCH 23/39] fix notebook outputs field --- ...with-bigframes-over-national-jukebox.ipynb | 155 ++++++++++-------- 1 file changed, 91 insertions(+), 64 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index fe68d0107bfd..ac10f68f639a 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -25,7 +25,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "194%" } } @@ -47,13 +47,13 @@ "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + "\"recording" ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "216%" } @@ -73,7 +73,7 @@ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", + "\"audio\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -91,7 +91,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "181%" } @@ -116,7 +116,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "275%" } } @@ -138,7 +138,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "214%" } } @@ -153,21 +153,22 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "fa84ad03", "cell_type": "markdown", "source": [ - "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "4", "zoom": "236%" } @@ -196,7 +197,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "0b0b1cd8", @@ -214,7 +216,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "193%" } } @@ -229,7 +231,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "32e58a7f", @@ -243,7 +246,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "207%" } } @@ -270,7 +273,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "225%" } } @@ -285,7 +288,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "0c1fca97", @@ -298,7 +302,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "122%" } } @@ -316,7 +320,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "4a13e789", @@ -328,7 +333,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "134%" } } @@ -343,7 +348,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "26b8baba", @@ -364,7 +370,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "af84cb21", @@ -393,7 +400,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "161%" } } @@ -411,7 +418,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "085deffd", @@ -429,7 +437,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "f8e653ee", @@ -441,7 +450,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "216%" } } @@ -465,7 +474,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "211%" } } @@ -485,7 +494,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "fae13ec5", @@ -497,7 +507,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "317%" } } @@ -532,7 +542,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "30969ae1", @@ -544,7 +555,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "229%" } } @@ -568,7 +579,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "177%" } } @@ -588,7 +599,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "6cddf53b", @@ -601,7 +613,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "141%" } } @@ -616,7 +628,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "ba0386cc", @@ -632,7 +645,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -650,7 +663,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "61a883b2", @@ -669,7 +683,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -689,7 +703,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "e8a25c46", @@ -705,7 +720,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "181%" } } @@ -729,7 +744,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "163%" } } @@ -744,7 +759,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "5ed7776d", @@ -762,7 +778,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "125%" } } @@ -777,7 +793,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "c96e9832", @@ -792,7 +809,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "178%" } } @@ -812,7 +829,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "0e2a5d7b", @@ -824,7 +842,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "224%" } } @@ -844,7 +862,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "172%" } } @@ -859,7 +877,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "5e16fb14", @@ -876,7 +895,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "183%" } } @@ -901,7 +920,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "92%" } } @@ -919,7 +938,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "8aaaef1f", @@ -938,7 +958,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "127%" } } @@ -953,7 +973,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "908a2340", @@ -974,7 +995,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "175%" } } @@ -994,7 +1015,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "f84ebe70", @@ -1012,7 +1034,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "eeff1c72", @@ -1025,7 +1048,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "158%" } } @@ -1043,7 +1066,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "7ec53675", @@ -1055,7 +1079,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "138%" } } @@ -1070,7 +1094,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "a96552fb", @@ -1101,7 +1126,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "72af7c7f", @@ -1110,7 +1136,8 @@ "metadata": { "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] } ], "metadata": { @@ -1149,4 +1176,4 @@ }, "nbformat_minor": 4, "nbformat": 4 -} +} \ No newline at end of file From 2531f43a3e27e2d229712bce7cdd5575fedc6834 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 18:45:11 +0000 Subject: [PATCH 24/39] fix: add missing outputs to notebook code cells --- .../generative_ai/ai_movie_poster.ipynb | 294 ++++---- .../multimodal/multimodal_dataframe.ipynb | 695 +++++++++--------- 2 files changed, 499 insertions(+), 490 deletions(-) diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index 8a19830358de..8df81706af47 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -21,7 +21,8 @@ "metadata": { "id": "XZpKUoHjXw3_" }, - "execution_count": 1 + "execution_count": 1, + "outputs": [] }, { "id": "ee509844", @@ -38,26 +39,26 @@ "id": "81b8de8d", "cell_type": "markdown", "source": [ - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" + " \n", + "
" ], "metadata": {}, "execution_count": null @@ -115,7 +116,8 @@ "metadata": { "id": "6nqoRHYbPAx3" }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "015a63c1", @@ -167,11 +169,11 @@ "data": { "text/html": [ "\n", - " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", + " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -193,11 +195,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", + " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -211,7 +213,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -220,8 +222,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poster
0
\n", + "

1 rows × 1 columns

\n", + "
[1 rows x 1 columns in total]" ], "text/plain": [ " poster\n", @@ -319,11 +321,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", + " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -337,7 +339,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -346,8 +348,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitle
0Der Student von Prag
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" ], "text/plain": [ " poster title\n", @@ -453,11 +455,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", + " Query processed 1.3 kB in 4 minutes of slot time. [Job bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -471,7 +473,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -480,8 +482,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
0Der Student von Prag1913
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" ], "text/plain": [ " poster title \\\n", @@ -560,8 +562,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", + "

" ], "text/plain": [ - "poster struct\u003curi: string, version: string, authorize...\n", + "poster structJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -677,7 +679,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -686,8 +688,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
8Shoulder Arms1918
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" ], "text/plain": [ " poster title year\n", diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index ebc2cb6bcd30..9f36cfbf4f72 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -19,7 +19,8 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": 1 + "execution_count": 1, + "outputs": [] }, { "id": "816ab253", @@ -27,26 +28,26 @@ "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e\n" + " \n", + "
\n" ], "metadata": { "id": "YOrUAvz6DMw-" @@ -84,7 +85,7 @@ "id": "750954c4", "cell_type": "markdown", "source": [ - "Install the latest bigframes package if bigframes version \u003c 2.4.0" + "Install the latest bigframes package if bigframes version < 2.4.0" ], "metadata": {}, "execution_count": null @@ -96,7 +97,8 @@ "# !pip install bigframes --upgrade" ], "metadata": {}, - "execution_count": 2 + "execution_count": 2, + "outputs": [] }, { "id": "df561d04", @@ -134,7 +136,8 @@ "id": "bGyhLnfEeB0X", "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" }, - "execution_count": 3 + "execution_count": 3, + "outputs": [] }, { "id": "35bd6e6e", @@ -180,7 +183,8 @@ " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ], "metadata": {}, - "execution_count": 4 + "execution_count": 4, + "outputs": [] }, { "id": "be9ce892", @@ -206,7 +210,8 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "execution_count": 5 + "execution_count": 5, + "outputs": [] }, { "id": "2e0436b0", @@ -241,8 +246,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
0
1
2
3
4
\n", + "

5 rows × 1 columns

\n", + "
[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -359,8 +364,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -487,7 +492,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -580,8 +585,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageblurred
0
1
2
3
4
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " image \\\n", @@ -731,8 +736,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -824,7 +829,8 @@ "metadata": { "id": "IG3J3HsKhyBY" }, - "execution_count": 11 + "execution_count": 11, + "outputs": [] }, { "id": "829afc69", @@ -864,8 +870,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -983,8 +989,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1063,18 +1069,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1120,7 +1126,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", + "def pdf_extract(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1147,7 +1153,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1165,7 +1171,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) \u003e= chunk_size:\n", + " while len(curr_chunk) >= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1202,8 +1208,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1265,13 +1271,13 @@ { "data": { "text/html": [ - "\u003cpre\u003e0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", + "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e"
+       "0    continues,\\ncontact customer support.\\nE2: Foo...
" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1306,7 +1312,8 @@ "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\\ndf = session._from_glob_path(audio_gcs_path, name=\"audio\")" ], "metadata": {}, - "execution_count": 17 + "execution_count": 17, + "outputs": [] }, { "id": "c9f9d484", @@ -1353,7 +1360,7 @@ { "data": { "text/html": [ - "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" + "
0    Now, as all books, not primarily intended as p...
" ], "text/plain": [ "0 Now, as all books, not primarily intended as p...\n", @@ -1390,11 +1397,11 @@ { "data": { "text/html": [ - "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" + "
0    {'status': '', 'content': 'Now, as all books, ...
" ], "text/plain": [ "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" + "Name: transcription_results, dtype: struct[pyarrow]" ] }, "execution_count": 19, @@ -1439,7 +1446,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", + "def extract_exif(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1491,11 +1498,11 @@ { "data": { "text/html": [ - "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" + "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" ], "text/plain": [ "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" + "Name: blob_col, dtype: extension>[pyarrow]" ] }, "execution_count": 21, From 7bdfcfcf51c928ec8e3556b4dfc01f2643d4e1fc Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 21:41:50 +0000 Subject: [PATCH 25/39] fix: replace private _from_glob_path with public APIs in notebooks --- .../generative_ai/ai_movie_poster.ipynb | 315 +++---- .../multimodal/multimodal_dataframe.ipynb | 804 ++++++++++-------- 2 files changed, 614 insertions(+), 505 deletions(-) diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index 8df81706af47..c2889ad4f92e 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -21,8 +21,7 @@ "metadata": { "id": "XZpKUoHjXw3_" }, - "execution_count": 1, - "outputs": [] + "execution_count": 1 }, { "id": "ee509844", @@ -39,26 +38,26 @@ "id": "81b8de8d", "cell_type": "markdown", "source": [ - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
" + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e" ], "metadata": {}, "execution_count": null @@ -116,8 +115,7 @@ "metadata": { "id": "6nqoRHYbPAx3" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "015a63c1", @@ -145,7 +143,26 @@ "id": "47acbbfe", "cell_type": "code", "source": [ - "# Replace with your own connection name.\\nMY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\\n\\nimport bigframes.pandas as bpd\\nsession = bpd.get_global_session()\\n\\nmovies = session._from_glob_path(\\n \"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\",\\n connection = MY_CONNECTION,\\n name='poster')\\nmovies.head(1)" + "# Replace with your own connection name.\n", + "MY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\n", + "\n", + "import gcsfs\n", + "import bigframes.pandas as bpd\n", + "import bigframes.bigquery as bbq\n", + "\n", + "session = bpd.get_global_session()\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\")\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "movies = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=MY_CONNECTION)\n", + "movies = movies[['poster']]\n", + "movies.head(1)" ], "metadata": { "colab": { @@ -169,11 +186,11 @@ "data": { "text/html": [ "\n", - " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", + " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -195,11 +212,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", + " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -213,7 +230,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -222,8 +239,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
poster
0
\n", - "

1 rows × 1 columns

\n", - "
[1 rows x 1 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200041Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=9f955e89088240b34a5cbfba751fffacc5dfd7a2df468dcccfae06c939358c702ffbeb940403a69ad36e3fdf321abee60cf2b9795c9c1744bc0b164d6c2eca99666a0853e7afcf7670a07ff115bfe534791c9ab4267cb383e3a46ede9301aeeb8534a42a1d4c8f790f3a60eab06aa72a8fe76ee6cbb88de8e42a0809d8322a0ad8aecd1c64a55b1cc8716acf4f0dc2550a2059e63d98d49707fe27180ada0a277ea9b1827fc261657bcee9ec5cc7117df704f135d983325abb97dc77ee7a270c466e689921fce8ecd23824b515f2811c3c13ee382c5bc3bd34b7dd95a845705a8f654315b2128799efd0509dee5f6db1eb1b773438d3bfc8112d76cbe892e376\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 1 columns in total]" ], "text/plain": [ " poster\n", @@ -321,11 +338,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", + " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -339,7 +356,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -348,8 +365,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitle
0Der Student von Prag
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200057Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=29c8cf20d3f56ab1939ec00dbc1afd26e888b6475808258e34bc60a65e207b877c39853678b0cd1c9918d35e312e151725dbefc4ed6c519e4ec1f2c23c2e307f87442d09c5c8f0bbd49af92eb05e18ff35cd44f2f2954b79a33cf706c7ae1662e23e3220224d6f58b775cb1875213b5050f910cb41a4a8fb312f308b0566448ddf7ef15e22ec2a5261af2570f89e0f6067ac4cbf5874eaf522a6e4d8cf6e0313be3079b172bdc19c2d6901f53bbacf5bee3f2913c7f9f657cd1aed25d786f66a84f96e4dbe36e7f01d8b67887c9ac93edf866495fdf13c6b95152cdfa6b699fd14aeb477ec4a14fcd9f37eaf88ad02eb40a952635f97e7639be764b0007e011e\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], "text/plain": [ " poster title\n", @@ -455,11 +472,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [Job bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", + " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -473,7 +490,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -482,8 +499,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
0Der Student von Prag1913
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200120Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=96035b9c90093c9636f0b406e5ca9daf52bb1019bde4d52e779f3ce7371e6df0430b3f2e991869065e113327a7698e7ce5ad7b4db8781aa65adea890b80976c97b93b3f9deac5002a1e27b4bd2c1df9250ff4167f150c88be2067f70d45b7c94fd6d69f36a90b5a3ad1a3d500e3cc89a4fe4a67157cbea164d5ce34506dd1d2353eedb1c663eb1a4578c8ff1f9af2ab21a7065de4ec3ff1af44e764a3215874e564e6beeb502739468a80a02c79dcc71f7518435686270d855007e01653659804b5f50ab9c43c4627f28625e07572a4b0f30de49397f9f0445571cdacb695747bdb17614addcf33a90036aa48d025baa8a4d6bd5000d0106a788c2c23f1292c8\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003ctd\u003e1913\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" ], "text/plain": [ " poster title \\\n", @@ -562,8 +579,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", - "

" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003ctd\u003estruct\u0026lt;uri: string, version: string, authorize...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003ctd\u003estring[pyarrow]\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003ctd\u003eInt64\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003c/div\u003e\u003cbr\u003e\u003clabel\u003e\u003cb\u003edtype:\u003c/b\u003e object\u003c/label\u003e" ], "text/plain": [ - "poster structJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", + " Query processed 1.3 kB in 6 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:c9bb23f0-5ceb-4d6c-8241-960c496274ae\u0026page=queryresults\"\u003eJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -679,7 +696,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -688,8 +705,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
8Shoulder Arms1918
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e8\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fshoulder_arms.jpeg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200210Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653082560296\u0026X-Goog-Signature=64c1fb48cc9830dd4153bca15d05d8703c770e12a4df99abf4cab9dec02d13c66adf4d1223ffda9a30763ad2b286086dfc8cc9b8d20875b29d0c1639983c3ba08a02364bf49361b4a24c3a6830def8d6d3561eeb04d01604b5bae86e48457dc368fee538d0beea2228fdf5e94b5862e1097f58545d7449fa5df0e93fb9c3c0a32943ca9970911f183adf71a7e13e9275efd41c1f69b8f8453b853a30cbb5e8859d72b95ca653204b5ae8f96a12d88d59e988349f74e3f6db6ef277c066d92a28c50335d494beead9a3c0c796c97ca48c497328ae7ad278161c28743193233b28ac0fcafab2431179f7f6321345d8a67e6af39d7339697a5892f0441a266262ab\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eShoulder Arms\u003c/td\u003e\n", + " \u003ctd\u003e1918\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" ], "text/plain": [ " poster title year\n", diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 9f36cfbf4f72..be81936f7eee 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -19,8 +19,7 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": 1, - "outputs": [] + "execution_count": 1 }, { "id": "816ab253", @@ -28,26 +27,26 @@ "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
\n" + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e\n" ], "metadata": { "id": "YOrUAvz6DMw-" @@ -85,7 +84,7 @@ "id": "750954c4", "cell_type": "markdown", "source": [ - "Install the latest bigframes package if bigframes version < 2.4.0" + "Install the latest bigframes package if bigframes version \u003c 2.4.0" ], "metadata": {}, "execution_count": null @@ -97,8 +96,7 @@ "# !pip install bigframes --upgrade" ], "metadata": {}, - "execution_count": 2, - "outputs": [] + "execution_count": 2 }, { "id": "df561d04", @@ -117,6 +115,8 @@ "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#grant-permissions for setting up connection service account permissions.\n", "# In this Notebook it uses bigframes-default-connection by default. You can also bring in your own connections in each method.\n", "\n", + "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", + "\n", "import bigframes\n", "# Setup project\n", "bigframes.options.bigquery.project = PROJECT\n", @@ -136,8 +136,7 @@ "id": "bGyhLnfEeB0X", "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" }, - "execution_count": 3, - "outputs": [] + "execution_count": 3 }, { "id": "35bd6e6e", @@ -183,14 +182,30 @@ " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ], "metadata": {}, - "execution_count": 4, - "outputs": [] + "execution_count": 4 }, { "id": "be9ce892", "cell_type": "markdown", "source": [ - "import bigframes.pandas as bpd\\nsession = bpd.get_global_session()\\ndf_image = session._from_glob_path(\\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\",\\n connection=FULL_CONNECTION_ID,\\n name=\"image\"\\n)" + "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", + "\n", + "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference.\n", + "\n", + "```python\n", + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_image = df_image[['image']]\n", + "```" ], "metadata": { "id": "ifKOq7VZGtZy" @@ -201,7 +216,20 @@ "id": "871d02f4", "cell_type": "code", "source": [ - "# Create blob columns from wildcard path.\\ndf_image = session._from_glob_path(\\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\\n)" + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# List files using gcsfs (public bucket)\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", + "\n", + "# Read the URIs into a BigQuery DataFrame using UNNEST\n", + "# We take the first 5 for this example\n", + "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_image = df_image[['image']]" ], "metadata": { "colab": { @@ -210,8 +238,7 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "execution_count": 5, - "outputs": [] + "execution_count": 5 }, { "id": "2e0436b0", @@ -246,8 +273,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
image
0
1
2
3
4
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=060fd285eaaa0d1b8888e9e3c648c0c9452df5d33b6c6d7ddea1d5fc3e4bf1f243ff0719ed4e3faaa3ea29e60da198daf1168dfd0efd64f9612f7c032753cfa2842ef88469a49ab23cf261e2b5da052224a33eedddd7c4699d584ec85704b18010ce8f4216f200d3cc8d0301b7aeb6bef37ae9e83a832ea38703f19b5b85e2e34f772420f5436afeb874487b3fb0ae4a17fb8f277a3d233a6d5e6e0d063e37e651061bf4ae33b8ec8b879c5db93e3ce97513054822d1867e28de4f03585da2edd8f4f51e177a4e3d37f5e3ca2f5be0990dd9f58135ec9223da3af7bcd1e67a8d279282d4d245eb7faff094903a82cf7a8b866fe848230a6668b6eaef6a683292\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=34826bd5824786f809b740fd64e154a9feb43b467250c64b9b6686134133e18aabd7213472cb00ae4ef59e5567b8897828aae6ff22e29c4a5d28214fcd2a6f486e69d548d0e482707c4dcd67894feb716ee68e1863e02e36d7e0c6c008b1b989c2a798feb2c4bc3638c6c80069165b2bf51c6028ea2f0a09c1555981b8935435ec1c596975b77942f603e65414328f3f1d180f772015452bfc97e96ffb9a0a016a4dc365a4531d0e37e491f4066df87f9a8f2374d30d5f62d639f10252c471570b9e24d88a9f8816007099fc39e6f846c68ea5a4069ecc785e67101f664da4999037fcbbd93e00b1b85a31528492e8610d75af96a1c8a879865dd36da2b15465\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=6476c6b9b8b23f4ac79973d8088424c6d8018857d0d8c1bf586057a7bd2f4cf00bfc53c79757b35401f05dadee9bc1aeeaed7b669659afa16696062db7d9da45e5fe17a0aaaa33c2394632a8dae6ca3c7f705ce0e7179e44fa245dc34080d87bdd0f41638c2840eba87b178dc43db16ca1a621224a1d991960eb821a99dc34aab25ed7e8457f161bd09fa9efc0eb0642709e3fba6ba412ff68ffe698592c235054ae0d08fd27909150beaf69b93dee3496d2f9254c2f801890fe072442fa2ffe389eeb689b8eb4daf08b4701a535ac6606c7de856761d008da479138abc3e941e0ab682f19fe86cd3f56df63f788c92824aed76fafaae0a546fa796266f26e2e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=22cdbcce477c983c914de1edcef7742fd253a6830e961c2932d5dbb11730c1fe0035579c4158a140cd6ebcdc95e1212ec60a2d54679af8dd662cb7c1ac6249042bb5a95fb324397599bfa4e8a8bf8e4313d14a5ca34e40c677d91f1853b4b7450d3ad043404058db5c1dbab56b8968eab4e3550aa1de20c789084527f8abd67a32aa62788b70975ea828bb312f5a123463c2088a4bad7a0c20d299f59fc0674ed32d36b3f78a1bf2fc6fcd367bf2056e132fbd50e0a597a7da7518be8c9597de6365097490857caba47e84b57940bdc6cca130b6c23ede91c51140cf0672ebdda0957e525416c636c368d1cf04e5efecbba3f052f61bb95d951e52e0f5d31e8c\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=335aafc718f8a89dc2b5d2e75ff750ac302cfacf4238ad91c2a4b140f59dd666d6520fafe885b82706453c2e820c82f0461488ae01e3210a20c555a9ac1242ddd54e17a92d7873211a4dcd69a7fca76c16ad9cd754f6245a8b9f047e9ef8bc428ec243fbde7af59a2b308968a165662e50d4a08740d196d02182d99650e79673e167164dc2869a434159ba3a15c68ddc9e17f5a7234c478ac4ae55a9686740ef260e6c1ab834ca3df361161c8d689acc72b143a6a3345640b2b94aadd1070d3e90a6572d63ae74cf803304b798ea4df61e5f4494f078f565f0d59f57bd6eee0618936a16617455d785ced3ac467b964b5eb9049749fe4cf8f2bf2c72ed72fc79\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -364,8 +391,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eauthor\u003c/th\u003e\n", + " \u003cth\u003econtent_type\u003c/th\u003e\n", + " \u003cth\u003esize\u003c/th\u003e\n", + " \u003cth\u003eupdated\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=0f96a5054a9d9d8f14259a2a9155c8124d0b3b6af21d5ba59f61a789cb5a770d1d70e9d7b04094140495675e9eb97ef30d5539ec822bfc7f857fc0be3a3ff193aef72578ffccf7231633d42fbf53752b50a6ab3c4180dd86f62a2c350492239d44b2d5a079d000aa60d99e5656dca1fdc85b2a5b1cea0d6567d47641167ee08bd41bff06e93b35e34c4b8f82e73d589bf29f9ae73b640b8c90b751ca2829f99b2a2fa5a463990660e87e4c4220a8bee9ff9bea44eb621d8a00335892833a0e33cc95560a803df5a13fb710abfe813d11a37610c870c82986b4275831e2ed57cf022af8927cd4a9fc5aea88b54c597b51ee456ab5f22cdcfe8972a0a2c56d9702\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591240\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=50b820b125f2b52c6a205178676d153512a5d62ba04f7f399de86cecaf167492f4db9322e289f8ec077e74e09ba67509bedda9b68b9eb7290a7f52bb78a567139d0a2bb9266764ed941bfa19ba4278bf00647e79c85877e3111dbe3c49cc572d4f47739db1030d6ce0298965d08f2f992b59a0f452ab8d52ea5d783cf13ce6d3b8349ce3fce9c4337ddd00b746ec2e9e8fa6fa0361a644d82c46b7f0860a2404f3fbf17de24cbfd8744a098fdde367d5a4cb918a325ae3fb96abfb187e26bee7bdc267b81ba23949ca4feaf676864431641f3747477293b34541e7c48ca06bbcc45e94ae839d0ce85e3222c38dd39014821881a31b15f3efd06ca426cc4769fb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1182951\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=0d659c9d71b34023e6a7531dc99620cd1543f4a8ec0262cda29549e9f4f604e929be102b44e0a11ef2c148c6a24c3fe9a92257892ca1aa23d7b963551d95093fa74bce067ec12c0237a96a15da0741f76e732d04f85b03d98696c7dc9bcd1bd9de1f2799ceb878e8f87ddfff5609fc6a6db4f27dcc410ad05cdc1066c8dcdae880a950a160189673eba7f3de59cc0e1aa754ebdf7f745a8572216af6f7934b1e96c9aa7cb7cacb6aa836f8e731668b76d0942e04e7e4b81a5972a75ab2ecb0b1859dc4ac4f6a45239dd2b76cc7af4643fb24c64731f28711e6cab8433fe239f0cc2462ae9260b1ae5ae85517300075188d270957eecb31ee8db20ac3ddd97c4e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1520884\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=1124d977a75b85634f2afeb223fba3028c9ad85fd9a2f6f8ce483c9e015a48a242f6810742e0279a46b3388371a9b7d37ca96d9a53d3939f6e0484a474491bece22272b89176e01866fdc8845b75cef28b9ff36a7b2f875e452002a7dd8d13ce38b078ea2aacd76ce8cb560faf078dad6462b3a69130f333ab9119fb6f1d8410a70de76018d2c84f01c6d70e1aa60498b4eb88a35a77a8173d11e545a8f5a7dfa542ec3effcafcd3a9c84934d605ed06107df98032738415e6ef1ed9331796aa802712c2cb4bd733881833bf4ed5d590846db97c7591a2d84acdf87e38752b15a39b711aec5bbe4dcca25a1edfb60626e68497c1fbb8cb0bd707938db378a01e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1235401\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=2f60becdf01864a381c2283fbceaaea023a1011554a61221b5be902cdd08b102d60d0da0275ed491b3396baeef517b8e2336eeb3e3b07da0398cdf4b190ac0ea667e4cd1d1d19e41046824d55ffcef47a2db3aeb4e82da71a655264f14ad5ee553329aa9b32c8c2200f3b66c9a9bb5aa8e5b91795e8d6b6129935f46522fb8dab9ce3a2ba5af019c2410f709472791730ab9ebdf9f901a5bfaf4dcc2c78e07c79743d35eceac59999d841adb60ce15313a70526d98b83e90f2240800c5b96b1b9a032d530fb15bec86425afca0c6fcc1d35d1560ef996cae5411feb67addd1b726026f3d097318b0577a84dab72cae328bb186fc8c97001ff720a43e6fc27610\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591923\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -492,7 +519,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -585,8 +612,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageblurred
0
1
2
3
4
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eblurred\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=2bb786ea7fda0a778b591d0f85b1d1df003726a26585490c1c8e1cc632bc90e418bc2762993da32c60017b96b36a9bf03ca123f7e74a34e5da98d4a8ae65e340c64872e1ab7c6442435253669103a157ee61c793da79ad0e6ae2a7bdbf54e8d67dce61de37d9dd2f54b0c994f8135d88af27f08eeb99e713b385b37fd0762503244cd0c597479d6925dcf111db4605842c797aabb307736028a5dd802ef08a2697a4bd1c96b5bf8bbecdb8a8f384028cec03bc9d51c1d2dfee1760756e0c6e54d9d753b373115e308cf45de2c082f30f263285af5d5e43fd9bf8c3dae32ceee389ef9427f3e2b06646d48aa570ba1ca0713d0f270ea2f4996e8ad3d03a3e39a6\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334353324\u0026X-Goog-Signature=6f16236d50a82c7ede3c23366389344149faba103443acf1c1a2b0d3b0d91948c8c15a3ad990382ab9b61badf8acee3c4b9e5ec6f96b72d395cd2e18227ad5c051ad189c1af48030fb44c1b9cc84a44a74fec04d576a6d699c1f0e133b2fc5d583872b5577c60a01f58d163de6e95591d260e9246fae0243d7472748e854ccaf4b4c3de80fb4e085f428427fe9914d22fba7416ae8b9e33beb0dfbcb91273609d0333aa1135b835bc69f9e15e1097452133e376b9b3f1bb7b4d7d7e9cb860677ce786e49239e8a8af86f83b7e27e76846212daddd2859f8dec4942b7351c69d396d4fd9a529fd0c48677aa4923063a752f6845801f22d3de227a640946f1e212\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=31ed02660dcead7471a7b47e2f471f4cf515e37525c14027f8d87ba1479982f244df02979b7da03676d4bbbd21221b3f70cefea4501eca3a5a64acb187ef5a2b03b19116729059dc666c2e295fc1ac831cd38864c1673344e04855db279a3c96409550dfade09631b0818e6cb576ff02afba6a87624f7c33077f7f19c891f6413b317cabb834182585d445ce97aa0294beca4de0edd2f9a0f6429747e93db53df875249a9c0543a329dac0f46ef3ed4e64a7e51648d0820ba52b7845b6f98371aca7deb165b4f7b7537be0d659cf33bb34893417666d7c9e9f611332ccba6bae6024be6862350185886cf61354eda3591e96e344c59350ca6f68ce50670fa869\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334573735\u0026X-Goog-Signature=209b62c9c3e2657b9270ed126a6c1e3979471f9e308670ce1595e9c18a0ec7c627a313b1f1c87108355602dc11b804ff609ba3394fada2b4fd186c6fef4138e22222045d0fd0660f103e6378ac83c6bd7d8da27c1a3d306dbfd778f6738c2e492bd8527ba9112e086f15334b7ab7795b88212d3825b3be325606cc507e5504406bc4e9dc41b29fb99e1207fe74c11053e7332ad9fdd65587f55cd10cc1bfd903672f43de38a18cb346977d8498fd751ab8728ca580261a1a0367421437d56df7f5f7dda895ddd370954aee632258304da31fd8e7c33619dd5cf9ee0ebe9e60eba3cc416e871e181525f23e525489ef6dffced9211f7d3681ec24b3ac7c0b05ab\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=8d32ad0137f3b10393aeaf635732f7fe337149323c0a33b634f9dcc592e662d4d268223f5c5cc89956787c5a3c061fa69c2de4d1a3225497dfad466548fcd4df7ee6000c4e60ff0661d0f9b451efa2701df5de55c322b3585c09210171403d47c358e6f41281f245711afd63eed3157e6674e9958ebffc5516f6ccb06b9af1830400e1d6a9bed44538b7212eeb257665aa43eecfa2af7482863f6032002de57f7a01af83cf5051541178297ab4a256710c04fd082cb51af4725dbafb288ee79caa630853dcbf35c49595b2b2e552804cabdbdf955806d57b97451e87035bd3ea89dbb5560a1c14170c531c3222a1d0b59d79d4c45b641887699f0bf5830612cb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334233994\u0026X-Goog-Signature=6eed60a846158365a0c54d4c96516bb8c4011b497da203479fa9066bc2b7c2c7c683aa351d5249e05df64b8f86549f251150327013e41231ee06f22e9a51e1061c577d7b2da4704b80f684776702da13be6122ad6652463f210c53d9149da9e32a253fde4851723cbb616f18374a8f9b7b5cd36a82e66a2e5677ae51b906e6c4649ea27fcbbcba3ef24bf163a4b0f469c10eca8da464f58161ead8371bdb5b0d452ee713b0ec498ca9aaefd521a1d8240d82885a19af58ddefb167146ab9d411b5d0712457b126104b5f42f57ab4ede2da5ae9af74e708840b507a5538571804c60e9c166516e0118aa555a13169401e21f0a5ec303f3b801d6a972ef94426ee\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=472d88289a3910bdde93dd468e6ae9e66c0821fb0d72a356ef887771baa2a80f356b0ac67183873c0e89c87afb3080b3a78122fa7c3e37ad8b54d8aa216e9e04cb6f2af4f200784beba8de4eea7cf3a39faa8d800b7cc6bbf03df55beaeb69727d413266e2e59fb900bc25b6859d7c14db3c22aa6a0aeaa71c331ae0a2bd27c9e2c246fe931a2562210411491bc83bc34e7bef60901189949348eee909e64c90e3aab7b03d7c274ee84c03cca6dd624169f40323a785bcc1a8357810f738b45a637c09267530fbddc413a75622d793876c74c9128b11d8b4ea2fe2b959b34b96e06a9c384f72522ad357557d0a3b1c8b5792c2e94631139eae178fa660a5e8b2\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334678692\u0026X-Goog-Signature=239a77d1c63c4074b2ddc3dd21a5bc77de8ea993fdd911a199b6cca207fef9286307a8321d91d02cffda17060a217f50b2a1d8017bbcb29511339a1187b38332a39795fb1b2c754988948be8849a371fd66a7698f8e35a0f1a3430b6837ede73c37c90da49a062586828f19595d4246e4eabf3e3d629f251b066fb20e0f34ddad80599f5946aa76e0694b859f7d575dc781a850d56e9846d456d285b0023e90862154008154fa13ef1f95096a6161e6a1b314b82ca23bc44fc1b4b7799a9f936dca09ab09830446089d8defe11a2c0e066aa8a65204d9027140f314001c59187593290ae22c4092ee5d0293a6e5da1e1da3d42c32c0aced7db7ad4f774575deb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1077450c755044fd6121e0dd67ec7de75a1785c4bacede635b78dd64d6b982ba02bba7a00065035fa13f003ef03c14d7f622b6a5c9ef19cadc956470e5a62e0f1855fb8cae0bbe723c0352224d152c85e173b234e3f5a1bcc2e1ddd4da065c184e82487eb4aa17464d330144b3b7c17ba357af1fb267f97730b3a798ff42a514ea47b83aa9f2560cfd428778638287c96ed17f95f0327e1cade380d046120d79479aca927d627ba65f3bc009c75d1662cbf93c6d33cea4b030a7906d3689922bac13ca3fab45536f115ae6253715cc9c24769e57e83657bf0393ce55e45f90d9666513a19a939044d3b3ad528757e022624d6601dacd326ae8ab3c9ce4ae6d52\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334302924\u0026X-Goog-Signature=3a25e70e59647f427ae55d72168c576fdb59b05f9ed6b72885a9e9c11f655e6bf10ded55744adcef330659731f0effdda7550ddca99c309cefe46ba864b088f155a5243e579d182a55f916d21d9dd0a83534a5e2efb2955a9146db12b22b4321e3f36b69f8d89d663507d7db83ac96bb2419a2baa0787fd0c6e6079d06652b8a2ac364a0a0a5d8d9de6331658b798abddbaaae1ede3026a4f0d955e74782afe240d31e6748bea8ab332ed945f541ca20c587b8c1449643f4748a3b059aae857334b6249bdf86794d307340b6a07d0dc47d3980e234be9c0549f52636d33776b7474fb95ebc014656c3cc217a777d438612a08849ac498baba0ce4716ac4ea432\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " image \\\n", @@ -736,8 +763,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of K9 Guard Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=1b184636c15ee0a74b099df1903a0a79f2d0832d2cf829c84f9344269dc70408ea48dfaddce2f284cb4862cef857e8a0420627e25ca8c7ecc08d34bc69c695bc030ca8d90417860eecad65826160cbbf1cabe6c90d5e51a09c8b582bc542dfc5b309908be7b945d615eabba73b79912987306b3b110a5b0a9e52ccf900a3d2c490bb4e4572f3108f3acabf9a332e0fa503f74265f44d3b9ff40745afa4c59a3a0f3366ca4e4a800a09a5df0b363909a01705433e31bf2c9b7ccc0cc1e75f3e8ec323907140f29e4033238385eb83815b6d95ec54945cbf829d966510ae2504187f9c167fe70ac9e77231d4b38df380a7f6ec5f973828f21e51f4a95626ea0504\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe item is K9 Guard Dog Hot Spot Spray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=7e3adbf71385c63b494609546b7b2a3ef41dc437772a35d579bfed25594b3dd4716f8170dae6e4c9afcabce49c2af584561111983494d6dff504ff5eed611c3c7712cb667e6f1f3451aff5ac6911c4da9aeb2ba5c9227f9459f7c54085268a515eeb28add1b384a4724159f3d1408278e37716465f11a18f823dd4058004e43f9bdd4cab28fc97e97043adddc53c4b5e3059cf1641f1300bedded5a679f6922a45c021055e413953e2f0b000a6fd5047a0aa2fa5fce5f0ca08b6f93411704c9b10c6534894130c11f8ee9a8ee70d26a6455c0cbaf2088c1b5205328858e22026d1c4efc9e558a33082169f7eec5e4fb406de7de13e3ee72a48421e9896cca6f7\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eThe image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry \u0026 Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=380d369f6fc8a0e4274420f7bb038aaf748111cd654f7dc20818a97d80da8e6b93125884982c656009cdfdc7e70496c1defa42b2de69e1b73d99e0b925953bd5e0e5ae20ddfac13a619f8c6b1bf6c3fe9cdb195910eb08efd96a193dda55488eacc2d0deac4d0a777fd7b7916a0cc1707f22463e1678c83b8ae51d082b8365c2643f85c19c59a9ec34c89fcc87b9c510cf6dbcdda5d0648d6602bd023b0a09e6b4b02c4cea9f6b10f563a14097e86a5cd9898ea3b3606a6c847a23ce97aed46b3154c1c1791da19edf172d7f57cd4e604bb2774ae3dc02d902c826ca9be17972ff17d612106b9ac61e734837646e5d0b40f8162798bf7695dccb0d320e6a58a9\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a cat tree.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=712a70543cff388ba937bc867b4d94e1bfb09579bc7ecf998b8fb5194937cd15f491643f76925582ed5b7c853a9845b77e3c6c248126e211d45c3f6ebe751cc06193ae052999bf9bd827acbb204d2a64ad5d6eae1101fc5b2518f16ae29469ee7213cae403a3a6a29d7081561decc6b189593beb4b649bc7169828f4570a929d8b15c8dd0b3f259bfa4e2680b9d5b88653068357c7aefa0b1f26e10dc309b743da4164d9a5abd1761b00cc9a12380ba6fb3786f141b8e536fdc27e869b632c3e1a130f312ad5185362b0f9b30f473387a02905f22956992278d94fc2ef387a87cb855d35cfcabe9ad5d82c1b4dd85c56152e28438f6631322a4c229a9520adb5\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=70d2709b3b655fb6add8616767e7886e7e304cc96fc891df927085d1e4d90ee9bb13b370762c6c5a8dd43baefa163312267203bc1b371954320bc27c32d0831f7f8937f288da999e506bf6f47d101cd2e49a870f3d5be428d321149f7e1c7d1146569d22f19640d62325665b6d08e7254a89535c021c8b464d65e754312dd47dde08be9ca58856a97d3c3f243030ccfbd8c1bda5ddca2b3618b113f6c1640afa14936b8c16c59d77c44139fe75f3719e2a83924fed36514c61787b02ace0d439f8d3c4fea81c9bf01684f8c06a39f7ec626e93d59262db87f2eea30dd0f849a3436d8dd36d2188f2e52826e8b96bf72614c256cda9867b1905a1d1cd3edebd18\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -829,8 +856,7 @@ "metadata": { "id": "IG3J3HsKhyBY" }, - "execution_count": 11, - "outputs": [] + "execution_count": 11 }, { "id": "829afc69", @@ -870,8 +896,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=988967bbfa1e4c70be10a6faa407fde8edbe0e93a7a1c9f9fbe8e81fa55c11bcc27b9673ae4d91b13edbc56ad5e50051a81d1639cafd38946f693f73e81e86a0bf6e945c3a8edf9b3b2d275203caba770fcb9a9fa272b969023fabda363efc8d90ae4b2bbc9a4f420699f3604d0a13607f96694687529b38bd80b211f5998ef0a11ab0f3a0b936a4e6068a9289ec43a9536416b5782ca2a0645c2b43f94ac6b8e31632c62b3714b56f28dce7e5675a66ded7bcc9d1c1a154f5a83f826b3f4b1363b1316536549d959d664931e6cd462a9d83362257e5226ad5b35b5e6c0e6a155fd62d2890b2fc071b59e6e4fa796a22e346bceebc4fb131eee66793f6748699\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=6098f2a2487364290c54d4f6bcc98f29e5097702ab78da4ff9ef97d13c03a7fa3a17bd22324d61ae8264d3a59a6bdb2bf4be55bb7efffcb00c68f0c9b69f413f8b33b2089697456ef919690d59a68548e95ebd68526de53ab9062e18009ab73452fc1934c43f99ad75a45931fb250ef1e78a7ced3e0bfc9d9468ef300a63d750b8e249e7d20afa00895b7e586b16686141799603bcdf731b48237323de166d0a1a4f310519671a4ce6ae56b5d4ebd1887361cc21130b3e8fa57a6107d50239a671319ecbef623719554a9642489d2ea083c2cc466f5d73c55084023567e9db291b40a335e7f65b20da018a70cbf5ef9654aa86ec500aa4df35c1db81116abc83\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eHere are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry \u0026 Blossom Treat Blend** is the product in the purple bag.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=9672e4d0fa6ea5f4bcf1605e35a7642cab21e7decfcfe55e37616c99a7774445cecf69c848ccd0dd92f5ae1925bfaa4bbd62ba413ecfffc835aa9a4596d18036381d595ec3387d4200a08b8fdb02ddfdb7432ebd14d5d4b77abf4628e61899c4383899ddbe6465ce91b856e8a02f37391c850589a971e6045ed0c389133dd63d4bac98d5fc744f83a51b000bf29c51111589bd34e2d287cc835ccd8c009270cef818eb3ed9ee07f9978bf24ce994311af944fc2b7d7066c3c37613fe08e0542efd277ab77df0fe1f168e05d2f6d6749e5d8af573c22032b29c3c47ac2f3b7978a09139ddfd38ab9045b5ae86d8804f23db758a7602225bd749a5b7552bae4eb8\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eYes, it is for pets. It appears to be a cat tree or scratching post.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=5c8e23c4c5f8017e09bf02b1b480200c74c1523aa25297141fe4abf226d7e18c546aed2ea5a2b5b5f8c5b90f2f4c569accf41a73ab8776285b458d230c6df43025eeb3066d40fe141089fa547b5a1f6a12d8c2eed0e614c8667be12c237d7ba4b29a530c16a6c8528cdfc8d8c4761aeae91c5f8452069f88de31cb637aaa34cbea60d8a8e9bd66b9d2f6f2c70a60d7791fc6bac13a4dd3736323da29fb52c5c90068c57c088407935255468513b780c07ea8e922f690645ee5ed8644240f894061c662b7bfe7ab660fd8466280e428424724f62a6c4eb6c781e3bf988059bd9f63fd6ffc2e75d296bca2ef068c6cdda0f6bb994f6799e04b33b4352f66a68224\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe image shows that the weight of the product is 15 oz/ 257g.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=3be42f1f88c163e019f7496107a2f7d69b90a5482dd157a5d05e427fa117dd334a834dd9c72d0c84126e4f2c9e35903521ca8e81645afc945d8dbe47671f6b2f73ccbf494df6e5ad8e19c04ab4c6020859519c663b6fd57b19512fa94562106d01073b9122011ca602279a712fd761dc4ffaed3a8d7b76abcf1f42c2c1bce33228cff0e0454107cb84039a8981a2fb689191c43b7edf54e19354ff2e8c5deeb3a07944285b15db8a4fe474744f52852f8048c377708e2b3b85ee639b952e9292bc8d60d9b1a37c84ab83398ef295b8ba6b3c8e3a500714fb70e91b5fa7f19301fa4ce1bb1041f60a8ce84b3d863d5ba66f14614ed27689cd4daeb293b493172b\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -989,8 +1015,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_result\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_status\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_start_sec\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_end_sec\u003c/th\u003e\n", + " \u003cth\u003econtent\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00638822 0.01666385 0.00451817 ... -0.02...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00973976 0.02148137 0.0024429 ... 0.00...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.01195884 0.02139394 0.05968047 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e[-0.02621161 0.02797648 0.04416926 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.05918628 0.0125137 0.01907336 ... 0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1069,18 +1095,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1126,7 +1152,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -> str:\n", + "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1153,7 +1179,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1171,7 +1197,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) >= chunk_size:\n", + " while len(curr_chunk) \u003e= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1200,7 +1226,30 @@ "id": "234a5f86", "cell_type": "code", "source": [ - "df_pdf = session._from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\\n\\n# Generate a JSON string containing the runtime information (including signed read URLs)\\naccess_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\\n\\n# Apply PDF extraction\\ndf_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\\n\\n# Apply PDF chunking\\ndf_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\\n\\ndf_pdf[[\"extracted_text\", \"chunked\"]]" + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_pdf = df_pdf[['pdf']]\n", + "\n", + "# Generate a JSON string containing the runtime information (including signed read URLs)\n", + "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", + "\n", + "# Apply PDF extraction\n", + "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", + "\n", + "# Apply PDF chunking\n", + "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", + "\n", + "df_pdf[[\"extracted_text\", \"chunked\"]]" ], "metadata": {}, "execution_count": 15, @@ -1208,8 +1257,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eextracted_text\u003c/th\u003e\n", + " \u003cth\u003echunked\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eCritterCuisine Pro 5000 - Automatic Pet Feeder...\u003c/td\u003e\n", + " \u003ctd\u003e[\"CritterCuisine Pro 5000 - Automatic Pet Feed...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1271,13 +1320,13 @@ { "data": { "text/html": [ - "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+       "\u003cpre\u003e0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...
" + "0 continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1309,11 +1358,29 @@ "id": "1794c54f", "cell_type": "code", "source": [ - "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\\ndf = session._from_glob_path(audio_gcs_path, name=\"audio\")" + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem()\n", + "uris = fs.glob(audio_gcs_path)\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "# If the bucket is empty or doesn't exist, this will result in an empty DataFrame\n", + "if not uris:\n", + " # Fallback to a dummy list or just let it be empty\n", + " uris = [\"gs://bigframes_blob_test/audio/dummy.mp3\"]\n", + "\n", + "df = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df['audio'] = bbq.obj.make_ref(df['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df = df[['audio']]" ], "metadata": {}, - "execution_count": 17, - "outputs": [] + "execution_count": 17 }, { "id": "c9f9d484", @@ -1360,7 +1427,7 @@ { "data": { "text/html": [ - "
0    Now, as all books, not primarily intended as p...
" + "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" ], "text/plain": [ "0 Now, as all books, not primarily intended as p...\n", @@ -1397,11 +1464,11 @@ { "data": { "text/html": [ - "
0    {'status': '', 'content': 'Now, as all books, ...
" + "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" ], "text/plain": [ "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct[pyarrow]" + "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" ] }, "execution_count": 19, @@ -1446,7 +1513,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -> str:\n", + "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1481,7 +1548,32 @@ "id": "40bb6bc9", "cell_type": "code", "source": [ - "# Create a Multimodal DataFrame from the sample image URIs\\nexif_image_df = session._from_glob_path(\\n \"gs://bigframes_blob_test/images_exif/*\",\\n name=\"blob_col\",\\n)\\n\\n# Generate a JSON string containing the runtime information (including signed read URLs)\\n# This allows the UDF to download the images from Google Cloud Storage\\naccess_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\\n\\n# Apply the BigQuery Python UDF to the runtime JSON strings\\n# We cast to string to ensure the input matches the UDF's signature\\nexif_json = access_urls.astype(str).apply(extract_exif)\\n\\n# Parse the resulting JSON strings back into a structured JSON type for easier access\\nexif_data = bbq.parse_json(exif_json)\\n\\nexif_data" + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# Create a Multimodal DataFrame from the sample image URIs\n", + "fs = gcsfs.GCSFileSystem()\n", + "uris = fs.glob(\"gs://bigframes_blob_test/images_exif/*\")\n", + "\n", + "if not uris:\n", + " uris = [\"gs://bigframes_blob_test/images_exif/dummy.jpg\"]\n", + "\n", + "exif_image_df = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "exif_image_df['blob_col'] = bbq.obj.make_ref(exif_image_df['uri'], authorizer=FULL_CONNECTION_ID)\n", + "exif_image_df = exif_image_df[['blob_col']]\n", + "\n", + "# Generate a JSON string containing the runtime information (including signed read URLs)\n", + "# This allows the UDF to download the images from Google Cloud Storage\n", + "access_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\n", + "\n", + "# Apply the BigQuery Python UDF to the runtime JSON strings\n", + "# We cast to string to ensure the input matches the UDF's signature\n", + "exif_json = access_urls.astype(str).apply(extract_exif)\n", + "\n", + "# Parse the resulting JSON strings back into a structured JSON type for easier access\n", + "exif_data = bbq.parse_json(exif_json)\n", + "\n", + "exif_data" ], "metadata": {}, "execution_count": 21, @@ -1498,11 +1590,11 @@ { "data": { "text/html": [ - "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" ], "text/plain": [ "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension>[pyarrow]" + "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" ] }, "execution_count": 21, From 05384eca1a32e84fdab9ab5b76246ceb8721b8b5 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 23:26:55 +0000 Subject: [PATCH 26/39] fix: replace private _from_glob_path and update notebook format --- .../generative_ai/ai_movie_poster.ipynb | 3 + .../multimodal/multimodal_dataframe.ipynb | 1386 ++++++++--------- 2 files changed, 691 insertions(+), 698 deletions(-) diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index c2889ad4f92e..20617012487c 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -156,6 +156,9 @@ "fs = gcsfs.GCSFileSystem(anon=True)\n", "uris = fs.glob(\"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\")\n", "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", "# Read the URIs into a BigQuery DataFrame\n", "movies = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", "\n", diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index be81936f7eee..56a59305cfa6 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -1,8 +1,11 @@ { "cells": [ { - "id": "9edad7a6", "cell_type": "code", + "execution_count": 1, + "id": "9edad7a6", + "metadata": {}, + "outputs": [], "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -17,45 +20,43 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ], - "metadata": {}, - "execution_count": 1 + ] }, { - "id": "816ab253", "cell_type": "markdown", + "id": "816ab253", + "metadata": { + "id": "YOrUAvz6DMw-" + }, "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e\n" - ], - "metadata": { - "id": "YOrUAvz6DMw-" - }, - "execution_count": null + " \n", + "
\n" + ] }, { - "id": "77d821d4", "cell_type": "markdown", + "id": "77d821d4", + "metadata": {}, "source": [ "This notebook is introducing BigFrames Multimodal features:\n", "1. Create Multimodal DataFrame\n", @@ -65,42 +66,48 @@ "5. PDF chunking function\n", "6. Transcribe audio\n", "7. Extract EXIF metadata from images" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "75ab1c13", "cell_type": "markdown", - "source": [ - "## Setup" - ], + "id": "75ab1c13", "metadata": { "id": "PEAJQQ6AFg-n" }, - "execution_count": null + "source": [ + "## Setup" + ] }, { - "id": "750954c4", "cell_type": "markdown", - "source": [ - "Install the latest bigframes package if bigframes version \u003c 2.4.0" - ], + "id": "750954c4", "metadata": {}, - "execution_count": null + "source": [ + "Install the latest bigframes package if bigframes version < 2.4.0" + ] }, { - "id": "2a6fafb1", "cell_type": "code", + "execution_count": 2, + "id": "2a6fafb1", + "metadata": {}, + "outputs": [], "source": [ "# !pip install bigframes --upgrade" - ], - "metadata": {}, - "execution_count": 2 + ] }, { - "id": "df561d04", "cell_type": "code", + "execution_count": 3, + "id": "df561d04", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bGyhLnfEeB0X", + "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" + }, + "outputs": [], "source": [ "PROJECT = \"bigframes-dev\" # replace with your project. \n", "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", @@ -128,19 +135,14 @@ "\n", "import bigframes.pandas as bpd\n", "import bigframes.bigquery as bbq" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bGyhLnfEeB0X", - "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" - }, - "execution_count": 3 + ] }, { - "id": "35bd6e6e", "cell_type": "code", + "execution_count": 4, + "id": "35bd6e6e", + "metadata": {}, + "outputs": [], "source": [ "import bigframes.bigquery as bbq\n", "\n", @@ -180,41 +182,32 @@ "\n", "def get_updated(series):\n", " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" - ], - "metadata": {}, - "execution_count": 4 + ] }, { - "id": "be9ce892", "cell_type": "markdown", - "source": [ - "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", - "\n", - "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference.\n", - "\n", - "```python\n", - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", - "\n", - "# Read the URIs into a BigQuery DataFrame\n", - "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df_image = df_image[['image']]\n", - "```" - ], + "id": "be9ce892", "metadata": { "id": "ifKOq7VZGtZy" }, - "execution_count": null + "source": [ + "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", + "\n", + "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference." + ] }, { - "id": "871d02f4", "cell_type": "code", + "execution_count": 5, + "id": "871d02f4", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fx6YcZJbeYru", + "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" + }, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -223,6 +216,9 @@ "fs = gcsfs.GCSFileSystem(anon=True)\n", "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", "# Read the URIs into a BigQuery DataFrame using UNNEST\n", "# We take the first 5 for this example\n", "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", @@ -230,24 +226,12 @@ "# Create the object reference column\n", "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", "df_image = df_image[['image']]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fx6YcZJbeYru", - "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" - }, - "execution_count": 5 + ] }, { - "id": "2e0436b0", "cell_type": "code", - "source": [ - "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", - "df_image = df_image.head(5)\n", - "df_image" - ], + "execution_count": 6, + "id": "2e0436b0", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -256,7 +240,6 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, - "execution_count": 6, "outputs": [ { "name": "stderr", @@ -273,8 +256,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
0
1
2
3
4
\n", + "

5 rows × 1 columns

\n", + "
[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -335,46 +318,40 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "df_image" ] }, { - "id": "429b0117", "cell_type": "markdown", - "source": [ - "### 2. Combine unstructured data with structured data" - ], + "id": "429b0117", "metadata": { "id": "b6RRZb3qPi_T" }, - "execution_count": null + "source": [ + "### 2. Combine unstructured data with structured data" + ] }, { - "id": "991fa065", "cell_type": "markdown", - "source": [ - "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." - ], + "id": "991fa065", "metadata": { "id": "4YJCdmLtR-qu" }, - "execution_count": null + "source": [ + "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." + ] }, { - "id": "08722ec5", "cell_type": "code", - "source": [ - "# Combine unstructured data with structured data\n", - "df_image = df_image.head(5)\n", - "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", - "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", - "df_image[\"size\"] = get_size(df_image[\"image\"])\n", - "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "df_image" - ], + "execution_count": 7, + "id": "08722ec5", "metadata": { "id": "YYYVn7NDH0Me" }, - "execution_count": 7, "outputs": [ { "name": "stderr", @@ -391,8 +368,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -484,29 +461,143 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Combine unstructured data with structured data\n", + "df_image = df_image.head(5)\n", + "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", + "df_image" ] }, { - "id": "f90826f6", "cell_type": "markdown", + "id": "f90826f6", + "metadata": {}, "source": [ "### 3. Conduct image transformations" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "e24c9f8c", "cell_type": "markdown", + "id": "e24c9f8c", + "metadata": {}, "source": [ "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "db665049", "cell_type": "code", + "execution_count": 8, + "id": "db665049", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "HhCb8jRsLe9B", + "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageblurred
0
1
2
3
4
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" + ], + "text/plain": [ + " image \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + " blurred \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -519,7 +610,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -580,137 +671,25 @@ " image_blur, 20, 20\n", ")\n", "df_image[[\"image\", \"blurred\"]]" - ], + ] + }, + { + "cell_type": "markdown", + "id": "11fcc6ec", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "HhCb8jRsLe9B", - "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" + "id": "Euk5saeVVdTP" }, - "execution_count": 8, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eimage\u003c/th\u003e\n", - " \u003cth\u003eblurred\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=2bb786ea7fda0a778b591d0f85b1d1df003726a26585490c1c8e1cc632bc90e418bc2762993da32c60017b96b36a9bf03ca123f7e74a34e5da98d4a8ae65e340c64872e1ab7c6442435253669103a157ee61c793da79ad0e6ae2a7bdbf54e8d67dce61de37d9dd2f54b0c994f8135d88af27f08eeb99e713b385b37fd0762503244cd0c597479d6925dcf111db4605842c797aabb307736028a5dd802ef08a2697a4bd1c96b5bf8bbecdb8a8f384028cec03bc9d51c1d2dfee1760756e0c6e54d9d753b373115e308cf45de2c082f30f263285af5d5e43fd9bf8c3dae32ceee389ef9427f3e2b06646d48aa570ba1ca0713d0f270ea2f4996e8ad3d03a3e39a6\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334353324\u0026X-Goog-Signature=6f16236d50a82c7ede3c23366389344149faba103443acf1c1a2b0d3b0d91948c8c15a3ad990382ab9b61badf8acee3c4b9e5ec6f96b72d395cd2e18227ad5c051ad189c1af48030fb44c1b9cc84a44a74fec04d576a6d699c1f0e133b2fc5d583872b5577c60a01f58d163de6e95591d260e9246fae0243d7472748e854ccaf4b4c3de80fb4e085f428427fe9914d22fba7416ae8b9e33beb0dfbcb91273609d0333aa1135b835bc69f9e15e1097452133e376b9b3f1bb7b4d7d7e9cb860677ce786e49239e8a8af86f83b7e27e76846212daddd2859f8dec4942b7351c69d396d4fd9a529fd0c48677aa4923063a752f6845801f22d3de227a640946f1e212\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e1\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=31ed02660dcead7471a7b47e2f471f4cf515e37525c14027f8d87ba1479982f244df02979b7da03676d4bbbd21221b3f70cefea4501eca3a5a64acb187ef5a2b03b19116729059dc666c2e295fc1ac831cd38864c1673344e04855db279a3c96409550dfade09631b0818e6cb576ff02afba6a87624f7c33077f7f19c891f6413b317cabb834182585d445ce97aa0294beca4de0edd2f9a0f6429747e93db53df875249a9c0543a329dac0f46ef3ed4e64a7e51648d0820ba52b7845b6f98371aca7deb165b4f7b7537be0d659cf33bb34893417666d7c9e9f611332ccba6bae6024be6862350185886cf61354eda3591e96e344c59350ca6f68ce50670fa869\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334573735\u0026X-Goog-Signature=209b62c9c3e2657b9270ed126a6c1e3979471f9e308670ce1595e9c18a0ec7c627a313b1f1c87108355602dc11b804ff609ba3394fada2b4fd186c6fef4138e22222045d0fd0660f103e6378ac83c6bd7d8da27c1a3d306dbfd778f6738c2e492bd8527ba9112e086f15334b7ab7795b88212d3825b3be325606cc507e5504406bc4e9dc41b29fb99e1207fe74c11053e7332ad9fdd65587f55cd10cc1bfd903672f43de38a18cb346977d8498fd751ab8728ca580261a1a0367421437d56df7f5f7dda895ddd370954aee632258304da31fd8e7c33619dd5cf9ee0ebe9e60eba3cc416e871e181525f23e525489ef6dffced9211f7d3681ec24b3ac7c0b05ab\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e2\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=8d32ad0137f3b10393aeaf635732f7fe337149323c0a33b634f9dcc592e662d4d268223f5c5cc89956787c5a3c061fa69c2de4d1a3225497dfad466548fcd4df7ee6000c4e60ff0661d0f9b451efa2701df5de55c322b3585c09210171403d47c358e6f41281f245711afd63eed3157e6674e9958ebffc5516f6ccb06b9af1830400e1d6a9bed44538b7212eeb257665aa43eecfa2af7482863f6032002de57f7a01af83cf5051541178297ab4a256710c04fd082cb51af4725dbafb288ee79caa630853dcbf35c49595b2b2e552804cabdbdf955806d57b97451e87035bd3ea89dbb5560a1c14170c531c3222a1d0b59d79d4c45b641887699f0bf5830612cb\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334233994\u0026X-Goog-Signature=6eed60a846158365a0c54d4c96516bb8c4011b497da203479fa9066bc2b7c2c7c683aa351d5249e05df64b8f86549f251150327013e41231ee06f22e9a51e1061c577d7b2da4704b80f684776702da13be6122ad6652463f210c53d9149da9e32a253fde4851723cbb616f18374a8f9b7b5cd36a82e66a2e5677ae51b906e6c4649ea27fcbbcba3ef24bf163a4b0f469c10eca8da464f58161ead8371bdb5b0d452ee713b0ec498ca9aaefd521a1d8240d82885a19af58ddefb167146ab9d411b5d0712457b126104b5f42f57ab4ede2da5ae9af74e708840b507a5538571804c60e9c166516e0118aa555a13169401e21f0a5ec303f3b801d6a972ef94426ee\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e3\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=472d88289a3910bdde93dd468e6ae9e66c0821fb0d72a356ef887771baa2a80f356b0ac67183873c0e89c87afb3080b3a78122fa7c3e37ad8b54d8aa216e9e04cb6f2af4f200784beba8de4eea7cf3a39faa8d800b7cc6bbf03df55beaeb69727d413266e2e59fb900bc25b6859d7c14db3c22aa6a0aeaa71c331ae0a2bd27c9e2c246fe931a2562210411491bc83bc34e7bef60901189949348eee909e64c90e3aab7b03d7c274ee84c03cca6dd624169f40323a785bcc1a8357810f738b45a637c09267530fbddc413a75622d793876c74c9128b11d8b4ea2fe2b959b34b96e06a9c384f72522ad357557d0a3b1c8b5792c2e94631139eae178fa660a5e8b2\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334678692\u0026X-Goog-Signature=239a77d1c63c4074b2ddc3dd21a5bc77de8ea993fdd911a199b6cca207fef9286307a8321d91d02cffda17060a217f50b2a1d8017bbcb29511339a1187b38332a39795fb1b2c754988948be8849a371fd66a7698f8e35a0f1a3430b6837ede73c37c90da49a062586828f19595d4246e4eabf3e3d629f251b066fb20e0f34ddad80599f5946aa76e0694b859f7d575dc781a850d56e9846d456d285b0023e90862154008154fa13ef1f95096a6161e6a1b314b82ca23bc44fc1b4b7799a9f936dca09ab09830446089d8defe11a2c0e066aa8a65204d9027140f314001c59187593290ae22c4092ee5d0293a6e5da1e1da3d42c32c0aced7db7ad4f774575deb\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e4\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1077450c755044fd6121e0dd67ec7de75a1785c4bacede635b78dd64d6b982ba02bba7a00065035fa13f003ef03c14d7f622b6a5c9ef19cadc956470e5a62e0f1855fb8cae0bbe723c0352224d152c85e173b234e3f5a1bcc2e1ddd4da065c184e82487eb4aa17464d330144b3b7c17ba357af1fb267f97730b3a798ff42a514ea47b83aa9f2560cfd428778638287c96ed17f95f0327e1cade380d046120d79479aca927d627ba65f3bc009c75d1662cbf93c6d33cea4b030a7906d3689922bac13ca3fab45536f115ae6253715cc9c24769e57e83657bf0393ce55e45f90d9666513a19a939044d3b3ad528757e022624d6601dacd326ae8ab3c9ce4ae6d52\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334302924\u0026X-Goog-Signature=3a25e70e59647f427ae55d72168c576fdb59b05f9ed6b72885a9e9c11f655e6bf10ded55744adcef330659731f0effdda7550ddca99c309cefe46ba864b088f155a5243e579d182a55f916d21d9dd0a83534a5e2efb2955a9146db12b22b4321e3f36b69f8d89d663507d7db83ac96bb2419a2baa0787fd0c6e6079d06652b8a2ac364a0a0a5d8d9de6331658b798abddbaaae1ede3026a4f0d955e74782afe240d31e6748bea8ab332ed945f541ca20c587b8c1449643f4748a3b059aae857334b6249bdf86794d307340b6a07d0dc47d3980e234be9c0549f52636d33776b7474fb95ebc014656c3cc217a777d438612a08849ac498baba0ce4716ac4ea432\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", - "\u003c/div\u003e[5 rows x 2 columns in total]" - ], - "text/plain": [ - " image \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - " blurred \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - "[5 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ] - }, - { - "id": "11fcc6ec", - "cell_type": "markdown", "source": [ "### 4. Use LLM models to ask questions and generate embeddings on images" - ], - "metadata": { - "id": "Euk5saeVVdTP" - }, - "execution_count": null + ] }, { - "id": "793b2f45", "cell_type": "code", - "source": [ - "from bigframes.ml import llm\n", - "gemini = llm.GeminiTextGenerator()" - ], + "execution_count": 9, + "id": "793b2f45", "metadata": { "id": "mRUGfcaFVW-3" }, - "execution_count": 9, "outputs": [ { "name": "stderr", @@ -722,16 +701,16 @@ " return method(*args, **kwargs)\n" ] } + ], + "source": [ + "from bigframes.ml import llm\n", + "gemini = llm.GeminiTextGenerator()" ] }, { - "id": "13d7cb93", "cell_type": "code", - "source": [ - "# Ask the same question on the images\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", - "answer[[\"ml_generate_text_llm_result\", \"image\"]]" - ], + "execution_count": 10, + "id": "13d7cb93", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -740,7 +719,6 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, - "execution_count": 10, "outputs": [ { "name": "stderr", @@ -763,8 +741,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -838,11 +816,21 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Ask the same question on the images\n", + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", + "answer[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { - "id": "68857305", "cell_type": "code", + "execution_count": 11, + "id": "68857305", + "metadata": { + "id": "IG3J3HsKhyBY" + }, + "outputs": [], "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\n", @@ -852,19 +840,12 @@ " \"is it for pets?\",\n", " \"what is the weight of the product?\",\n", "]" - ], - "metadata": { - "id": "IG3J3HsKhyBY" - }, - "execution_count": 11 + ] }, { - "id": "829afc69", "cell_type": "code", - "source": [ - "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" - ], + "execution_count": 12, + "id": "829afc69", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -873,7 +854,6 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, - "execution_count": 12, "outputs": [ { "name": "stderr", @@ -896,8 +876,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -971,17 +951,16 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", + "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { - "id": "e75df430", "cell_type": "code", - "source": [ - "# Generate embeddings.\n", - "embed_model = llm.MultimodalEmbeddingGenerator()\n", - "embeddings = embed_model.predict(df_image[\"image\"])\n", - "embeddings" - ], + "execution_count": 13, + "id": "e75df430", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -990,7 +969,6 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "execution_count": 13, "outputs": [ { "name": "stderr", @@ -1015,8 +993,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1095,18 +1073,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1122,24 +1100,41 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Generate embeddings.\n", + "embed_model = llm.MultimodalEmbeddingGenerator()\n", + "embeddings = embed_model.predict(df_image[\"image\"])\n", + "embeddings" ] }, { - "id": "23892b0e", "cell_type": "markdown", + "id": "23892b0e", + "metadata": { + "id": "iRUi8AjG7cIf" + }, "source": [ "### 5. PDF extraction and chunking function\n", "\n", "This section demonstrates how to extract text and chunk text from PDF files using custom BigQuery Python UDFs and the `pypdf` library." - ], - "metadata": { - "id": "iRUi8AjG7cIf" - }, - "execution_count": null + ] }, { - "id": "136a18b8", "cell_type": "code", + "execution_count": 14, + "id": "136a18b8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1152,7 +1147,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", + "def pdf_extract(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1179,7 +1174,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1197,7 +1192,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) \u003e= chunk_size:\n", + " while len(curr_chunk) >= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1208,57 +1203,19 @@ " if curr_chunk:\n", " all_text_chunks.append(curr_chunk)\n", " return all_text_chunks" - ], - "metadata": {}, - "execution_count": 14, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } ] }, { - "id": "234a5f86", "cell_type": "code", - "source": [ - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", - "# List files using gcsfs\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", - "\n", - "# Read the URIs into a BigQuery DataFrame\n", - "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df_pdf = df_pdf[['pdf']]\n", - "\n", - "# Generate a JSON string containing the runtime information (including signed read URLs)\n", - "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", - "\n", - "# Apply PDF extraction\n", - "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", - "\n", - "# Apply PDF chunking\n", - "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", - "\n", - "df_pdf[[\"extracted_text\", \"chunked\"]]" - ], - "metadata": {}, "execution_count": 15, + "id": "234a5f86", + "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1304,29 +1261,53 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", + "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_pdf = df_pdf[['pdf']]\n", + "\n", + "# Generate a JSON string containing the runtime information (including signed read URLs)\n", + "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", + "\n", + "# Apply PDF extraction\n", + "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", + "\n", + "# Apply PDF chunking\n", + "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", + "\n", + "df_pdf[[\"extracted_text\", \"chunked\"]]" ] }, { - "id": "d80effbe", "cell_type": "code", - "source": [ - "# Explode the chunks to see each chunk as a separate row\n", - "chunked = df_pdf[\"chunked\"].explode()\n", - "chunked" - ], - "metadata": {}, "execution_count": 16, + "id": "d80effbe", + "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\u003cpre\u003e0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", + "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e"
+       "0    continues,\\ncontact customer support.\\nE2: Foo...
" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1343,20 +1324,27 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Explode the chunks to see each chunk as a separate row\n", + "chunked = df_pdf[\"chunked\"].explode()\n", + "chunked" ] }, { - "id": "118cf1c7", "cell_type": "markdown", + "id": "118cf1c7", + "metadata": {}, "source": [ "### 6. Audio transcribe" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "1794c54f", "cell_type": "code", + "execution_count": 17, + "id": "1794c54f", + "metadata": {}, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -1367,6 +1355,9 @@ "fs = gcsfs.GCSFileSystem()\n", "uris = fs.glob(audio_gcs_path)\n", "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", "# Read the URIs into a BigQuery DataFrame\n", "# If the bucket is empty or doesn't exist, this will result in an empty DataFrame\n", "if not uris:\n", @@ -1378,13 +1369,39 @@ "# Create the object reference column\n", "df['audio'] = bbq.obj.make_ref(df['uri'], authorizer=FULL_CONNECTION_ID)\n", "df = df[['audio']]" - ], - "metadata": {}, - "execution_count": 17 + ] }, { - "id": "c9f9d484", "cell_type": "code", + "execution_count": null, + "id": "c9f9d484", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
0    Now, as all books, not primarily intended as p...
" + ], + "text/plain": [ + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -1410,39 +1427,29 @@ "\n", "transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n", "transcribed_series" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "7209a62a", "metadata": {}, - "execution_count": null, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, { "data": { "text/html": [ - "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" + "
0    {'status': '', 'content': 'Now, as all books, ...
" ], "text/plain": [ - "0 Now, as all books, not primarily intended as p...\n", - "Name: transcribed_content, dtype: string" + "0 {'status': '', 'content': 'Now, as all books, ...\n", + "Name: transcription_results, dtype: struct[pyarrow]" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } - ] - }, - { - "id": "7209a62a", - "cell_type": "code", + ], "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -1457,47 +1464,39 @@ "# Package as a struct for consistent display\n", "transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n", "transcribed_series_verbose" - ], - "metadata": {}, - "execution_count": 19, - "outputs": [ - { - "data": { - "text/html": [ - "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" - ], - "text/plain": [ - "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } ] }, { - "id": "c8351cc3", "cell_type": "markdown", + "id": "c8351cc3", + "metadata": {}, "source": [ "### 7. Extract EXIF metadata from images" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "e59670b9", "cell_type": "markdown", + "id": "e59670b9", + "metadata": {}, "source": [ "This section demonstrates how to extract EXIF metadata from images using a custom BigQuery Python UDF and the `Pillow` library." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "fda362f4", "cell_type": "code", + "execution_count": 20, + "id": "fda362f4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1513,7 +1512,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", + "def extract_exif(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1530,23 +1529,38 @@ " tag_name = ExifTags.TAGS.get(tag, tag)\n", " exif_dict[tag_name] = value\n", " return json.dumps(exif_dict)" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "40bb6bc9", "metadata": {}, - "execution_count": 20, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", + "change in future versions.\n", + " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" ] + }, + { + "data": { + "text/html": [ + "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + ], + "text/plain": [ + "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", + "Name: blob_col, dtype: extension>[pyarrow]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } - ] - }, - { - "id": "40bb6bc9", - "cell_type": "code", + ], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -1555,6 +1569,9 @@ "fs = gcsfs.GCSFileSystem()\n", "uris = fs.glob(\"gs://bigframes_blob_test/images_exif/*\")\n", "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", "if not uris:\n", " uris = [\"gs://bigframes_blob_test/images_exif/dummy.jpg\"]\n", "\n", @@ -1574,33 +1591,6 @@ "exif_data = bbq.parse_json(exif_json)\n", "\n", "exif_data" - ], - "metadata": {}, - "execution_count": 21, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", - "change in future versions.\n", - " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" - ], - "text/plain": [ - "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } ] } ], @@ -1626,6 +1616,6 @@ "version": "3.13.0" } }, - "nbformat_minor": 0, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 0 } From 46c95ab6e3b45480952dc1a7c444aca1c8bc1062 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 23:34:16 +0000 Subject: [PATCH 27/39] fix: replace private _read_gbq_colab and update notebook format --- .../notebooks/dataframes/anywidget_mode.ipynb | 747 ++++----- .../multimodal/multimodal_dataframe.ipynb | 1385 +++++++++-------- 2 files changed, 1075 insertions(+), 1057 deletions(-) diff --git a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb index a0efa571a7d7..5289aee1ed77 100644 --- a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb +++ b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb @@ -1,11 +1,8 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 1, "id": "d10bfca4", - "metadata": {}, - "outputs": [], + "cell_type": "code", "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -20,74 +17,82 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ] + ], + "metadata": {}, + "execution_count": 1 }, { - "cell_type": "markdown", "id": "acca43ae", - "metadata": {}, + "cell_type": "markdown", "source": [ "# Demo to Show Anywidget mode" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 2, "id": "ca22f059", - "metadata": {}, - "outputs": [], + "cell_type": "code", "source": [ "import bigframes.pandas as bpd" - ] + ], + "metadata": {}, + "execution_count": 2 }, { - "cell_type": "markdown", "id": "04406a4d", - "metadata": {}, + "cell_type": "markdown", "source": [ "This notebook demonstrates the **anywidget** display mode for BigQuery DataFrames. This mode provides an interactive table experience for exploring your data directly within the notebook.\n", "\n", "**Key features:**\n", - "- **Rich DataFrames & Series:** Both DataFrames and Series are displayed as interactive widgets.\n", + "- **Rich DataFrames \u0026 Series:** Both DataFrames and Series are displayed as interactive widgets.\n", "- **Pagination:** Navigate through large datasets page by page without overwhelming the output.\n", "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views. Use **Shift + Click** to sort by multiple columns.\n", "- **Column Resizing:** Drag the dividers between column headers to adjust their width.\n", "- **Max Columns Control:** Limit the number of displayed columns to improve performance and readability for wide datasets." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 3, "id": "1bc5aaf3", - "metadata": {}, - "outputs": [], + "cell_type": "code", "source": [ "bpd.options.bigquery.ordering_mode = \"partial\"\n", "bpd.options.display.render_mode = \"anywidget\"" - ] + ], + "metadata": {}, + "execution_count": 3 }, { - "cell_type": "markdown", "id": "0a354c69", - "metadata": {}, + "cell_type": "markdown", "source": [ "Load Sample Data" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "markdown", "id": "interactive-df-header", - "metadata": {}, + "cell_type": "markdown", "source": [ "## 1. Interactive DataFrame Display\n", "Loading a dataset from BigQuery automatically renders the interactive widget." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 4, "id": "f289d250", + "cell_type": "code", + "source": [ + "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n", + "print(df)" + ], "metadata": {}, + "execution_count": 4, "outputs": [ { "data": { @@ -97,7 +102,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -123,17 +128,16 @@ "[5552452 rows x 5 columns]\n" ] } - ], - "source": [ - "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n", - "print(df)" ] }, { - "cell_type": "code", - "execution_count": 5, "id": "220340b0", + "cell_type": "code", + "source": [ + "df" + ], "metadata": {}, + "execution_count": 5, "outputs": [ { "data": { @@ -143,8 +147,8 @@ "version_minor": 1 }, "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
stategenderyearnamenumber
0ALF1910Annie482
1ALF1910Myrtle104
2ARF1910Lillian56
3CTF1910Anne38
4CTF1910Frances45
5FLF1910Margaret53
6GAF1910Mae73
7GAF1910Beatrice96
8GAF1910Lola47
9IAF1910Viola49
\n", - "

10 rows × 5 columns

\n", - "
[5552452 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003estate\u003c/th\u003e\n", + " \u003cth\u003egender\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003cth\u003ename\u003c/th\u003e\n", + " \u003cth\u003enumber\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eAL\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eAnnie\u003c/td\u003e\n", + " \u003ctd\u003e482\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eAL\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eMyrtle\u003c/td\u003e\n", + " \u003ctd\u003e104\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eAR\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eLillian\u003c/td\u003e\n", + " \u003ctd\u003e56\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eCT\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eAnne\u003c/td\u003e\n", + " \u003ctd\u003e38\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eCT\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eFrances\u003c/td\u003e\n", + " \u003ctd\u003e45\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e5\u003c/th\u003e\n", + " \u003ctd\u003eFL\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eMargaret\u003c/td\u003e\n", + " \u003ctd\u003e53\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e6\u003c/th\u003e\n", + " \u003ctd\u003eGA\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eMae\u003c/td\u003e\n", + " \u003ctd\u003e73\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e7\u003c/th\u003e\n", + " \u003ctd\u003eGA\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eBeatrice\u003c/td\u003e\n", + " \u003ctd\u003e96\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e8\u003c/th\u003e\n", + " \u003ctd\u003eGA\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eLola\u003c/td\u003e\n", + " \u003ctd\u003e47\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e9\u003c/th\u003e\n", + " \u003ctd\u003eIA\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eViola\u003c/td\u003e\n", + " \u003ctd\u003e49\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e10 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5552452 rows x 5 columns in total]" ], "text/plain": [ "state gender year name number\n", @@ -275,35 +279,38 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "df" ] }, { - "cell_type": "markdown", "id": "3a73e472", - "metadata": {}, + "cell_type": "markdown", "source": [ "## 2. Interactive Series Display\n", "BigQuery DataFrames `Series` objects now also support the full interactive widget experience, including pagination and formatting." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 6, "id": "42bb02ab", + "cell_type": "code", + "source": [ + "test_series = df[\"year\"]\n", + "# Displaying the series triggers the interactive widget\n", + "print(test_series)" + ], "metadata": {}, + "execution_count": 6, "outputs": [ { "data": { "text/html": [ "\n", - " Query processed 171.4 MB in 46 seconds of slot time. [Job bigframes-dev:US.dcf260e0-eaad-4979-9ec6-12f2436698e4 details]\n", + " Query processed 171.4 MB in 46 seconds of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:dcf260e0-eaad-4979-9ec6-12f2436698e4\u0026page=queryresults\"\u003eJob bigframes-dev:US.dcf260e0-eaad-4979-9ec6-12f2436698e4 details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -317,7 +324,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -343,26 +350,25 @@ "[5552452 rows]\n" ] } - ], - "source": [ - "test_series = df[\"year\"]\n", - "# Displaying the series triggers the interactive widget\n", - "print(test_series)" ] }, { - "cell_type": "markdown", "id": "7bcf1bb7", - "metadata": {}, + "cell_type": "markdown", "source": [ "Display with Pagination" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 7, "id": "da23e0f3", + "cell_type": "code", + "source": [ + "test_series" + ], "metadata": {}, + "execution_count": 7, "outputs": [ { "data": { @@ -372,7 +378,7 @@ "version_minor": 1 }, "text/html": [ - "
0    1910\n",
+       "\u003cpre\u003e0    1910\n",
        "1    1910\n",
        "2    1910\n",
        "3    1910\n",
@@ -381,7 +387,7 @@
        "6    1910\n",
        "7    1910\n",
        "8    1910\n",
-       "9    1910

[5552452 rows]

" + "9 1910\u003c/pre\u003e\u003cp\u003e[5552452 rows]\u003c/p\u003e" ], "text/plain": [ "1910\n", @@ -404,15 +410,11 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "test_series" ] }, { - "cell_type": "markdown", "id": "sorting-intro", - "metadata": {}, + "cell_type": "markdown", "source": [ "### Sorting by Column(s)\n", "You can sort the table by clicking on the headers of columns that have orderable data types (like numbers, strings, and dates). Non-orderable columns (like arrays or structs) do not have sorting controls.\n", @@ -428,42 +430,58 @@ "- **Shift + Click:** Hold the `Shift` key while clicking additional column headers to add them to the sort order. \n", "- Each column in a multi-sort also cycles through the three states (Ascending, Descending, Unsorted).\n", "- **Indicator visibility:** Sorting indicators (▲, ▼) are always visible for all columns currently included in the sort. The unsorted indicator (●) is only visible when you hover over an unsorted column header." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "markdown", "id": "adjustable-width-intro", - "metadata": {}, + "cell_type": "markdown", "source": [ "### Adjustable Column Widths\n", "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view.\n", "\n", "### Control Maximum Columns\n", "You can control the number of columns displayed in the widget using the **Max columns** dropdown in the footer. This is useful for wide DataFrames where you want to focus on a subset of columns or improve rendering performance. Options include 3, 5, 7, 10, 20, or All." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "markdown", "id": "bb15bab6", - "metadata": {}, + "cell_type": "markdown", "source": [ "Programmatic Navigation Demo" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "markdown", "id": "programmatic-header", - "metadata": {}, + "cell_type": "markdown", "source": [ "## 3. Programmatic Widget Control\n", "You can also instantiate the `TableWidget` directly for more control, such as checking page counts or driving navigation programmatically." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 8, "id": "6920d49b", + "cell_type": "code", + "source": [ + "from bigframes.display.anywidget import TableWidget\n", + "import math\n", + " \n", + "# Create widget programmatically \n", + "widget = TableWidget(df)\n", + "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", + " \n", + "# Display the widget\n", + "widget" + ], "metadata": {}, + "execution_count": 8, "outputs": [ { "name": "stdout", @@ -480,50 +498,27 @@ "version_minor": 1 }, "text/plain": [ - "" + "\u003cbigframes.display.anywidget.TableWidget object at 0x7f50500e2ad0\u003e" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "from bigframes.display.anywidget import TableWidget\n", - "import math\n", - " \n", - "# Create widget programmatically \n", - "widget = TableWidget(df)\n", - "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", - " \n", - "# Display the widget\n", - "widget" ] }, { - "cell_type": "markdown", "id": "02cbd1be", - "metadata": {}, + "cell_type": "markdown", "source": [ "Test Navigation Programmatically" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 9, "id": "12b68f15", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current page: 0\n", - "After next: 1\n", - "After prev: 0\n" - ] - } - ], + "cell_type": "code", "source": [ "# Simulate button clicks programmatically\n", "print(\"Current page:\", widget.page)\n", @@ -535,22 +530,43 @@ "# Go to previous page\n", "widget.page = 0\n", "print(\"After prev:\", widget.page)" + ], + "metadata": {}, + "execution_count": 9, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current page: 0\n", + "After next: 1\n", + "After prev: 0\n" + ] + } ] }, { - "cell_type": "markdown", "id": "9d310138", - "metadata": {}, + "cell_type": "markdown", "source": [ "## 4. Edge Cases\n", "The widget handles small datasets gracefully, disabling unnecessary pagination controls." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 10, "id": "a9d5d13a", + "cell_type": "code", + "source": [ + "# Test with very small dataset\n", + "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n", + "small_widget = TableWidget(small_df)\n", + "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", + "small_widget" + ], "metadata": {}, + "execution_count": 10, "outputs": [ { "name": "stdout", @@ -567,45 +583,52 @@ "version_minor": 1 }, "text/plain": [ - "" + "\u003cbigframes.display.anywidget.TableWidget object at 0x7f505016e190\u003e" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Test with very small dataset\n", - "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n", - "small_widget = TableWidget(small_df)\n", - "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", - "small_widget" ] }, { - "cell_type": "markdown", "id": "added-cell-2", - "metadata": {}, + "cell_type": "markdown", "source": [ "### Displaying Generative AI results containing JSON\n", "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "markdown", "id": "ai-header", - "metadata": {}, + "cell_type": "markdown", "source": [ "## 5. Advanced Data Types (JSON/Structs)\n", "The `AI.GENERATE` function in BigQuery returns results in a JSON column. BigQuery Dataframes automatically handles complex types like JSON strings for display, allowing you to view generative AI results seamlessly." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 11, "id": "added-cell-1", + "cell_type": "code", + "source": [ + "bpd.read_gbq(\"\"\"\n", + " SELECT\n", + " AI.GENERATE(\n", + " prompt=\u003e(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"us.conn\")), \"r\")),\n", + " connection_id=\u003e\"bigframes-dev.us.bigframes-default-connection\",\n", + " output_schema=\u003e\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n", + " *\n", + " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", + " LIMIT 5;\n", + "\"\"\")" + ], "metadata": {}, + "execution_count": 11, "outputs": [ { "data": { @@ -615,7 +638,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -629,8 +652,8 @@ "version_minor": 1 }, "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
\n", - "

5 rows × 15 columns

\n", - "
[5 rows x 15 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eresult\u003c/th\u003e\n", + " \u003cth\u003egcs_path\u003c/th\u003e\n", + " \u003cth\u003eissuer\u003c/th\u003e\n", + " \u003cth\u003elanguage\u003c/th\u003e\n", + " \u003cth\u003epublication_date\u003c/th\u003e\n", + " \u003cth\u003eclass_international\u003c/th\u003e\n", + " \u003cth\u003eclass_us\u003c/th\u003e\n", + " \u003cth\u003eapplication_number\u003c/th\u003e\n", + " \u003cth\u003efiling_date\u003c/th\u003e\n", + " \u003cth\u003epriority_date_eu\u003c/th\u003e\n", + " \u003cth\u003erepresentative_line_1_eu\u003c/th\u003e\n", + " \u003cth\u003eapplicant_line_1\u003c/th\u003e\n", + " \u003cth\u003einventor_line_1\u003c/th\u003e\n", + " \u003cth\u003etitle_line_1\u003c/th\u003e\n", + " \u003cth\u003enumber\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e29.08.018\u003c/td\u003e\n", + " \u003ctd\u003eE04H 6/12\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18157874.1\u003c/td\u003e\n", + " \u003ctd\u003e21.02.2018\u003c/td\u003e\n", + " \u003ctd\u003e22.02.2017\u003c/td\u003e\n", + " \u003ctd\u003eLiedtke \u0026amp; Partner Patentanw√§lte\u003c/td\u003e\n", + " \u003ctd\u003eSHB Hebezeugbau GmbH\u003c/td\u003e\n", + " \u003ctd\u003eVOLGER, Alexander\u003c/td\u003e\n", + " \u003ctd\u003eSTEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 366 869 A1\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e03.10.2018\u003c/td\u003e\n", + " \u003ctd\u003eG06F 11/30\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18157347.8\u003c/td\u003e\n", + " \u003ctd\u003e19.02.2018\u003c/td\u003e\n", + " \u003ctd\u003e31.03.2017\u003c/td\u003e\n", + " \u003ctd\u003eHoffmann Eitle\u003c/td\u003e\n", + " \u003ctd\u003eFUJITSU LIMITED\u003c/td\u003e\n", + " \u003ctd\u003eKukihara, Kensuke\u003c/td\u003e\n", + " \u003ctd\u003eMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 382 553 A1\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e03.10.2018\u003c/td\u003e\n", + " \u003ctd\u003eA01K 31/00\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18171005.4\u003c/td\u003e\n", + " \u003ctd\u003e05.02.2015\u003c/td\u003e\n", + " \u003ctd\u003e05.02.2014\u003c/td\u003e\n", + " \u003ctd\u003eStork Bamberger Patentanw√§lte\u003c/td\u003e\n", + " \u003ctd\u003eLinco Food Systems A/S\u003c/td\u003e\n", + " \u003ctd\u003eThrane, Uffe\u003c/td\u003e\n", + " \u003ctd\u003eMASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 381 276 A1\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e03.10.2018\u003c/td\u003e\n", + " \u003ctd\u003eH05B 6/12\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18165514.3\u003c/td\u003e\n", + " \u003ctd\u003e03.04.2018\u003c/td\u003e\n", + " \u003ctd\u003e30.03.2017\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003eBSH Hausger√§te GmbH\u003c/td\u003e\n", + " \u003ctd\u003eAcero Acero, Jesus\u003c/td\u003e\n", + " \u003ctd\u003eVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 383 141 A2\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e03.10.2018\u003c/td\u003e\n", + " \u003ctd\u003eH01L 21/20\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18166536.5\u003c/td\u003e\n", + " \u003ctd\u003e16.02.2016\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003eScheider, Sascha et al\u003c/td\u003e\n", + " \u003ctd\u003eEV Group E. Thallner GmbH\u003c/td\u003e\n", + " \u003ctd\u003eKurz, Florian\u003c/td\u003e\n", + " \u003ctd\u003eVORRICHTUNG ZUM BONDEN VON SUBSTRATEN\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 382 744 A1\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 15 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 15 columns in total]" ], "text/plain": [ " result \\\n", @@ -776,18 +799,18 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 18157874.1 \n", - "1 03.10.2018 G06F 11/30 18157347.8 \n", - "2 03.10.2018 A01K 31/00 18171005.4 \n", - "3 03.10.2018 H05B 6/12 18165514.3 \n", - "4 03.10.2018 H01L 21/20 18166536.5 \n", + "0 29.08.018 E04H 6/12 \u003cNA\u003e 18157874.1 \n", + "1 03.10.2018 G06F 11/30 \u003cNA\u003e 18157347.8 \n", + "2 03.10.2018 A01K 31/00 \u003cNA\u003e 18171005.4 \n", + "3 03.10.2018 H05B 6/12 \u003cNA\u003e 18165514.3 \n", + "4 03.10.2018 H01L 21/20 \u003cNA\u003e 18166536.5 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "0 21.02.2018 22.02.2017 Liedtke \u0026 Partner Patentanw√§lte \n", "1 19.02.2018 31.03.2017 Hoffmann Eitle \n", "2 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", - "3 03.04.2018 30.03.2017 \n", - "4 16.02.2016 Scheider, Sascha et al \n", + "3 03.04.2018 30.03.2017 \u003cNA\u003e \n", + "4 16.02.2016 \u003cNA\u003e Scheider, Sascha et al \n", "\n", " applicant_line_1 inventor_line_1 \\\n", "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", @@ -810,18 +833,6 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "bpd._read_gbq_colab(\"\"\"\n", - " SELECT\n", - " AI.GENERATE(\n", - " prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n", - " connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n", - " output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n", - " *\n", - " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", - " LIMIT 5;\n", - "\"\"\")" ] } ], @@ -844,6 +855,6 @@ "version": "3.13.0" } }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat_minor": 5, + "nbformat": 4 } diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 56a59305cfa6..edea6eba7bf7 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -1,11 +1,8 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 1, "id": "9edad7a6", - "metadata": {}, - "outputs": [], + "cell_type": "code", "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -20,43 +17,45 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ] + ], + "metadata": {}, + "execution_count": 1 }, { - "cell_type": "markdown", "id": "816ab253", - "metadata": { - "id": "YOrUAvz6DMw-" - }, + "cell_type": "markdown", "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
\n" - ] + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e\n" + ], + "metadata": { + "id": "YOrUAvz6DMw-" + }, + "execution_count": null }, { - "cell_type": "markdown", "id": "77d821d4", - "metadata": {}, + "cell_type": "markdown", "source": [ "This notebook is introducing BigFrames Multimodal features:\n", "1. Create Multimodal DataFrame\n", @@ -66,48 +65,42 @@ "5. PDF chunking function\n", "6. Transcribe audio\n", "7. Extract EXIF metadata from images" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "markdown", "id": "75ab1c13", + "cell_type": "markdown", + "source": [ + "## Setup" + ], "metadata": { "id": "PEAJQQ6AFg-n" }, - "source": [ - "## Setup" - ] + "execution_count": null }, { - "cell_type": "markdown", "id": "750954c4", - "metadata": {}, + "cell_type": "markdown", "source": [ - "Install the latest bigframes package if bigframes version < 2.4.0" - ] + "Install the latest bigframes package if bigframes version \u003c 2.4.0" + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 2, "id": "2a6fafb1", - "metadata": {}, - "outputs": [], + "cell_type": "code", "source": [ "# !pip install bigframes --upgrade" - ] + ], + "metadata": {}, + "execution_count": 2 }, { - "cell_type": "code", - "execution_count": 3, "id": "df561d04", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bGyhLnfEeB0X", - "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" - }, - "outputs": [], + "cell_type": "code", "source": [ "PROJECT = \"bigframes-dev\" # replace with your project. \n", "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", @@ -135,14 +128,19 @@ "\n", "import bigframes.pandas as bpd\n", "import bigframes.bigquery as bbq" - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bGyhLnfEeB0X", + "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" + }, + "execution_count": 3 }, { - "cell_type": "code", - "execution_count": 4, "id": "35bd6e6e", - "metadata": {}, - "outputs": [], + "cell_type": "code", "source": [ "import bigframes.bigquery as bbq\n", "\n", @@ -182,32 +180,26 @@ "\n", "def get_updated(series):\n", " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" - ] + ], + "metadata": {}, + "execution_count": 4 }, { - "cell_type": "markdown", "id": "be9ce892", - "metadata": { - "id": "ifKOq7VZGtZy" - }, + "cell_type": "markdown", "source": [ "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", "\n", "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference." - ] + ], + "metadata": { + "id": "ifKOq7VZGtZy" + }, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 5, "id": "871d02f4", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fx6YcZJbeYru", - "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" - }, - "outputs": [], + "cell_type": "code", "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -226,12 +218,24 @@ "# Create the object reference column\n", "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", "df_image = df_image[['image']]" - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fx6YcZJbeYru", + "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" + }, + "execution_count": 5 }, { - "cell_type": "code", - "execution_count": 6, "id": "2e0436b0", + "cell_type": "code", + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "df_image" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -240,6 +244,7 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, + "execution_count": 6, "outputs": [ { "name": "stderr", @@ -256,8 +261,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
image
0
1
2
3
4
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=060fd285eaaa0d1b8888e9e3c648c0c9452df5d33b6c6d7ddea1d5fc3e4bf1f243ff0719ed4e3faaa3ea29e60da198daf1168dfd0efd64f9612f7c032753cfa2842ef88469a49ab23cf261e2b5da052224a33eedddd7c4699d584ec85704b18010ce8f4216f200d3cc8d0301b7aeb6bef37ae9e83a832ea38703f19b5b85e2e34f772420f5436afeb874487b3fb0ae4a17fb8f277a3d233a6d5e6e0d063e37e651061bf4ae33b8ec8b879c5db93e3ce97513054822d1867e28de4f03585da2edd8f4f51e177a4e3d37f5e3ca2f5be0990dd9f58135ec9223da3af7bcd1e67a8d279282d4d245eb7faff094903a82cf7a8b866fe848230a6668b6eaef6a683292\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=34826bd5824786f809b740fd64e154a9feb43b467250c64b9b6686134133e18aabd7213472cb00ae4ef59e5567b8897828aae6ff22e29c4a5d28214fcd2a6f486e69d548d0e482707c4dcd67894feb716ee68e1863e02e36d7e0c6c008b1b989c2a798feb2c4bc3638c6c80069165b2bf51c6028ea2f0a09c1555981b8935435ec1c596975b77942f603e65414328f3f1d180f772015452bfc97e96ffb9a0a016a4dc365a4531d0e37e491f4066df87f9a8f2374d30d5f62d639f10252c471570b9e24d88a9f8816007099fc39e6f846c68ea5a4069ecc785e67101f664da4999037fcbbd93e00b1b85a31528492e8610d75af96a1c8a879865dd36da2b15465\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=6476c6b9b8b23f4ac79973d8088424c6d8018857d0d8c1bf586057a7bd2f4cf00bfc53c79757b35401f05dadee9bc1aeeaed7b669659afa16696062db7d9da45e5fe17a0aaaa33c2394632a8dae6ca3c7f705ce0e7179e44fa245dc34080d87bdd0f41638c2840eba87b178dc43db16ca1a621224a1d991960eb821a99dc34aab25ed7e8457f161bd09fa9efc0eb0642709e3fba6ba412ff68ffe698592c235054ae0d08fd27909150beaf69b93dee3496d2f9254c2f801890fe072442fa2ffe389eeb689b8eb4daf08b4701a535ac6606c7de856761d008da479138abc3e941e0ab682f19fe86cd3f56df63f788c92824aed76fafaae0a546fa796266f26e2e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=22cdbcce477c983c914de1edcef7742fd253a6830e961c2932d5dbb11730c1fe0035579c4158a140cd6ebcdc95e1212ec60a2d54679af8dd662cb7c1ac6249042bb5a95fb324397599bfa4e8a8bf8e4313d14a5ca34e40c677d91f1853b4b7450d3ad043404058db5c1dbab56b8968eab4e3550aa1de20c789084527f8abd67a32aa62788b70975ea828bb312f5a123463c2088a4bad7a0c20d299f59fc0674ed32d36b3f78a1bf2fc6fcd367bf2056e132fbd50e0a597a7da7518be8c9597de6365097490857caba47e84b57940bdc6cca130b6c23ede91c51140cf0672ebdda0957e525416c636c368d1cf04e5efecbba3f052f61bb95d951e52e0f5d31e8c\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=335aafc718f8a89dc2b5d2e75ff750ac302cfacf4238ad91c2a4b140f59dd666d6520fafe885b82706453c2e820c82f0461488ae01e3210a20c555a9ac1242ddd54e17a92d7873211a4dcd69a7fca76c16ad9cd754f6245a8b9f047e9ef8bc428ec243fbde7af59a2b308968a165662e50d4a08740d196d02182d99650e79673e167164dc2869a434159ba3a15c68ddc9e17f5a7234c478ac4ae55a9686740ef260e6c1ab834ca3df361161c8d689acc72b143a6a3345640b2b94aadd1070d3e90a6572d63ae74cf803304b798ea4df61e5f4494f078f565f0d59f57bd6eee0618936a16617455d785ced3ac467b964b5eb9049749fe4cf8f2bf2c72ed72fc79\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -318,40 +323,46 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", - "df_image = df_image.head(5)\n", - "df_image" ] }, { - "cell_type": "markdown", "id": "429b0117", + "cell_type": "markdown", + "source": [ + "### 2. Combine unstructured data with structured data" + ], "metadata": { "id": "b6RRZb3qPi_T" }, - "source": [ - "### 2. Combine unstructured data with structured data" - ] + "execution_count": null }, { - "cell_type": "markdown", "id": "991fa065", + "cell_type": "markdown", + "source": [ + "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." + ], "metadata": { "id": "4YJCdmLtR-qu" }, - "source": [ - "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": 7, "id": "08722ec5", + "cell_type": "code", + "source": [ + "# Combine unstructured data with structured data\n", + "df_image = df_image.head(5)\n", + "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", + "df_image" + ], "metadata": { "id": "YYYVn7NDH0Me" }, + "execution_count": 7, "outputs": [ { "name": "stderr", @@ -368,8 +379,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eauthor\u003c/th\u003e\n", + " \u003cth\u003econtent_type\u003c/th\u003e\n", + " \u003cth\u003esize\u003c/th\u003e\n", + " \u003cth\u003eupdated\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=0f96a5054a9d9d8f14259a2a9155c8124d0b3b6af21d5ba59f61a789cb5a770d1d70e9d7b04094140495675e9eb97ef30d5539ec822bfc7f857fc0be3a3ff193aef72578ffccf7231633d42fbf53752b50a6ab3c4180dd86f62a2c350492239d44b2d5a079d000aa60d99e5656dca1fdc85b2a5b1cea0d6567d47641167ee08bd41bff06e93b35e34c4b8f82e73d589bf29f9ae73b640b8c90b751ca2829f99b2a2fa5a463990660e87e4c4220a8bee9ff9bea44eb621d8a00335892833a0e33cc95560a803df5a13fb710abfe813d11a37610c870c82986b4275831e2ed57cf022af8927cd4a9fc5aea88b54c597b51ee456ab5f22cdcfe8972a0a2c56d9702\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591240\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=50b820b125f2b52c6a205178676d153512a5d62ba04f7f399de86cecaf167492f4db9322e289f8ec077e74e09ba67509bedda9b68b9eb7290a7f52bb78a567139d0a2bb9266764ed941bfa19ba4278bf00647e79c85877e3111dbe3c49cc572d4f47739db1030d6ce0298965d08f2f992b59a0f452ab8d52ea5d783cf13ce6d3b8349ce3fce9c4337ddd00b746ec2e9e8fa6fa0361a644d82c46b7f0860a2404f3fbf17de24cbfd8744a098fdde367d5a4cb918a325ae3fb96abfb187e26bee7bdc267b81ba23949ca4feaf676864431641f3747477293b34541e7c48ca06bbcc45e94ae839d0ce85e3222c38dd39014821881a31b15f3efd06ca426cc4769fb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1182951\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=0d659c9d71b34023e6a7531dc99620cd1543f4a8ec0262cda29549e9f4f604e929be102b44e0a11ef2c148c6a24c3fe9a92257892ca1aa23d7b963551d95093fa74bce067ec12c0237a96a15da0741f76e732d04f85b03d98696c7dc9bcd1bd9de1f2799ceb878e8f87ddfff5609fc6a6db4f27dcc410ad05cdc1066c8dcdae880a950a160189673eba7f3de59cc0e1aa754ebdf7f745a8572216af6f7934b1e96c9aa7cb7cacb6aa836f8e731668b76d0942e04e7e4b81a5972a75ab2ecb0b1859dc4ac4f6a45239dd2b76cc7af4643fb24c64731f28711e6cab8433fe239f0cc2462ae9260b1ae5ae85517300075188d270957eecb31ee8db20ac3ddd97c4e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1520884\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=1124d977a75b85634f2afeb223fba3028c9ad85fd9a2f6f8ce483c9e015a48a242f6810742e0279a46b3388371a9b7d37ca96d9a53d3939f6e0484a474491bece22272b89176e01866fdc8845b75cef28b9ff36a7b2f875e452002a7dd8d13ce38b078ea2aacd76ce8cb560faf078dad6462b3a69130f333ab9119fb6f1d8410a70de76018d2c84f01c6d70e1aa60498b4eb88a35a77a8173d11e545a8f5a7dfa542ec3effcafcd3a9c84934d605ed06107df98032738415e6ef1ed9331796aa802712c2cb4bd733881833bf4ed5d590846db97c7591a2d84acdf87e38752b15a39b711aec5bbe4dcca25a1edfb60626e68497c1fbb8cb0bd707938db378a01e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1235401\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=2f60becdf01864a381c2283fbceaaea023a1011554a61221b5be902cdd08b102d60d0da0275ed491b3396baeef517b8e2336eeb3e3b07da0398cdf4b190ac0ea667e4cd1d1d19e41046824d55ffcef47a2db3aeb4e82da71a655264f14ad5ee553329aa9b32c8c2200f3b66c9a9bb5aa8e5b91795e8d6b6129935f46522fb8dab9ce3a2ba5af019c2410f709472791730ab9ebdf9f901a5bfaf4dcc2c78e07c79743d35eceac59999d841adb60ce15313a70526d98b83e90f2240800c5b96b1b9a032d530fb15bec86425afca0c6fcc1d35d1560ef996cae5411feb67addd1b726026f3d097318b0577a84dab72cae328bb186fc8c97001ff720a43e6fc27610\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591923\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -461,143 +472,29 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Combine unstructured data with structured data\n", - "df_image = df_image.head(5)\n", - "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", - "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", - "df_image[\"size\"] = get_size(df_image[\"image\"])\n", - "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "df_image" ] }, { - "cell_type": "markdown", "id": "f90826f6", - "metadata": {}, + "cell_type": "markdown", "source": [ "### 3. Conduct image transformations" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "markdown", "id": "e24c9f8c", - "metadata": {}, + "cell_type": "markdown", "source": [ "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 8, "id": "db665049", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "HhCb8jRsLe9B", - "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageblurred
0
1
2
3
4
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" - ], - "text/plain": [ - " image \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - " blurred \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - "[5 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "cell_type": "code", "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -610,7 +507,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -655,7 +552,7 @@ " metadata = bbq.obj.fetch_metadata(series)\n", " current_uri = metadata.struct.field(\"uri\")\n", " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", - " dst_blob = dst_uri.str.to_blob(connection=FULL_CONNECTION_ID)\n", + " dst_blob = bbq.obj.make_ref(dst_uri, authorizer=FULL_CONNECTION_ID)\n", " df_transform = bpd.DataFrame({\n", " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", @@ -663,7 +560,7 @@ " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n", " udf, axis=1, args=args\n", " )\n", - " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", + " return res if verbose else bbq.obj.make_ref(res, authorizer=FULL_CONNECTION_ID)\n", "\n", "# Apply transformations\n", "df_image[\"blurred\"] = apply_transformation(\n", @@ -671,46 +568,158 @@ " image_blur, 20, 20\n", ")\n", "df_image[[\"image\", \"blurred\"]]" - ] - }, - { - "cell_type": "markdown", - "id": "11fcc6ec", - "metadata": { - "id": "Euk5saeVVdTP" - }, - "source": [ - "### 4. Use LLM models to ask questions and generate embeddings on images" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "793b2f45", + ], "metadata": { - "id": "mRUGfcaFVW-3" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "HhCb8jRsLe9B", + "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, + "execution_count": 8, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", - "default model will be removed in BigFrames 3.0. Please supply an\n", - "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n" - ] + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eblurred\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=2bb786ea7fda0a778b591d0f85b1d1df003726a26585490c1c8e1cc632bc90e418bc2762993da32c60017b96b36a9bf03ca123f7e74a34e5da98d4a8ae65e340c64872e1ab7c6442435253669103a157ee61c793da79ad0e6ae2a7bdbf54e8d67dce61de37d9dd2f54b0c994f8135d88af27f08eeb99e713b385b37fd0762503244cd0c597479d6925dcf111db4605842c797aabb307736028a5dd802ef08a2697a4bd1c96b5bf8bbecdb8a8f384028cec03bc9d51c1d2dfee1760756e0c6e54d9d753b373115e308cf45de2c082f30f263285af5d5e43fd9bf8c3dae32ceee389ef9427f3e2b06646d48aa570ba1ca0713d0f270ea2f4996e8ad3d03a3e39a6\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334353324\u0026X-Goog-Signature=6f16236d50a82c7ede3c23366389344149faba103443acf1c1a2b0d3b0d91948c8c15a3ad990382ab9b61badf8acee3c4b9e5ec6f96b72d395cd2e18227ad5c051ad189c1af48030fb44c1b9cc84a44a74fec04d576a6d699c1f0e133b2fc5d583872b5577c60a01f58d163de6e95591d260e9246fae0243d7472748e854ccaf4b4c3de80fb4e085f428427fe9914d22fba7416ae8b9e33beb0dfbcb91273609d0333aa1135b835bc69f9e15e1097452133e376b9b3f1bb7b4d7d7e9cb860677ce786e49239e8a8af86f83b7e27e76846212daddd2859f8dec4942b7351c69d396d4fd9a529fd0c48677aa4923063a752f6845801f22d3de227a640946f1e212\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=31ed02660dcead7471a7b47e2f471f4cf515e37525c14027f8d87ba1479982f244df02979b7da03676d4bbbd21221b3f70cefea4501eca3a5a64acb187ef5a2b03b19116729059dc666c2e295fc1ac831cd38864c1673344e04855db279a3c96409550dfade09631b0818e6cb576ff02afba6a87624f7c33077f7f19c891f6413b317cabb834182585d445ce97aa0294beca4de0edd2f9a0f6429747e93db53df875249a9c0543a329dac0f46ef3ed4e64a7e51648d0820ba52b7845b6f98371aca7deb165b4f7b7537be0d659cf33bb34893417666d7c9e9f611332ccba6bae6024be6862350185886cf61354eda3591e96e344c59350ca6f68ce50670fa869\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334573735\u0026X-Goog-Signature=209b62c9c3e2657b9270ed126a6c1e3979471f9e308670ce1595e9c18a0ec7c627a313b1f1c87108355602dc11b804ff609ba3394fada2b4fd186c6fef4138e22222045d0fd0660f103e6378ac83c6bd7d8da27c1a3d306dbfd778f6738c2e492bd8527ba9112e086f15334b7ab7795b88212d3825b3be325606cc507e5504406bc4e9dc41b29fb99e1207fe74c11053e7332ad9fdd65587f55cd10cc1bfd903672f43de38a18cb346977d8498fd751ab8728ca580261a1a0367421437d56df7f5f7dda895ddd370954aee632258304da31fd8e7c33619dd5cf9ee0ebe9e60eba3cc416e871e181525f23e525489ef6dffced9211f7d3681ec24b3ac7c0b05ab\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=8d32ad0137f3b10393aeaf635732f7fe337149323c0a33b634f9dcc592e662d4d268223f5c5cc89956787c5a3c061fa69c2de4d1a3225497dfad466548fcd4df7ee6000c4e60ff0661d0f9b451efa2701df5de55c322b3585c09210171403d47c358e6f41281f245711afd63eed3157e6674e9958ebffc5516f6ccb06b9af1830400e1d6a9bed44538b7212eeb257665aa43eecfa2af7482863f6032002de57f7a01af83cf5051541178297ab4a256710c04fd082cb51af4725dbafb288ee79caa630853dcbf35c49595b2b2e552804cabdbdf955806d57b97451e87035bd3ea89dbb5560a1c14170c531c3222a1d0b59d79d4c45b641887699f0bf5830612cb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334233994\u0026X-Goog-Signature=6eed60a846158365a0c54d4c96516bb8c4011b497da203479fa9066bc2b7c2c7c683aa351d5249e05df64b8f86549f251150327013e41231ee06f22e9a51e1061c577d7b2da4704b80f684776702da13be6122ad6652463f210c53d9149da9e32a253fde4851723cbb616f18374a8f9b7b5cd36a82e66a2e5677ae51b906e6c4649ea27fcbbcba3ef24bf163a4b0f469c10eca8da464f58161ead8371bdb5b0d452ee713b0ec498ca9aaefd521a1d8240d82885a19af58ddefb167146ab9d411b5d0712457b126104b5f42f57ab4ede2da5ae9af74e708840b507a5538571804c60e9c166516e0118aa555a13169401e21f0a5ec303f3b801d6a972ef94426ee\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=472d88289a3910bdde93dd468e6ae9e66c0821fb0d72a356ef887771baa2a80f356b0ac67183873c0e89c87afb3080b3a78122fa7c3e37ad8b54d8aa216e9e04cb6f2af4f200784beba8de4eea7cf3a39faa8d800b7cc6bbf03df55beaeb69727d413266e2e59fb900bc25b6859d7c14db3c22aa6a0aeaa71c331ae0a2bd27c9e2c246fe931a2562210411491bc83bc34e7bef60901189949348eee909e64c90e3aab7b03d7c274ee84c03cca6dd624169f40323a785bcc1a8357810f738b45a637c09267530fbddc413a75622d793876c74c9128b11d8b4ea2fe2b959b34b96e06a9c384f72522ad357557d0a3b1c8b5792c2e94631139eae178fa660a5e8b2\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334678692\u0026X-Goog-Signature=239a77d1c63c4074b2ddc3dd21a5bc77de8ea993fdd911a199b6cca207fef9286307a8321d91d02cffda17060a217f50b2a1d8017bbcb29511339a1187b38332a39795fb1b2c754988948be8849a371fd66a7698f8e35a0f1a3430b6837ede73c37c90da49a062586828f19595d4246e4eabf3e3d629f251b066fb20e0f34ddad80599f5946aa76e0694b859f7d575dc781a850d56e9846d456d285b0023e90862154008154fa13ef1f95096a6161e6a1b314b82ca23bc44fc1b4b7799a9f936dca09ab09830446089d8defe11a2c0e066aa8a65204d9027140f314001c59187593290ae22c4092ee5d0293a6e5da1e1da3d42c32c0aced7db7ad4f774575deb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1077450c755044fd6121e0dd67ec7de75a1785c4bacede635b78dd64d6b982ba02bba7a00065035fa13f003ef03c14d7f622b6a5c9ef19cadc956470e5a62e0f1855fb8cae0bbe723c0352224d152c85e173b234e3f5a1bcc2e1ddd4da065c184e82487eb4aa17464d330144b3b7c17ba357af1fb267f97730b3a798ff42a514ea47b83aa9f2560cfd428778638287c96ed17f95f0327e1cade380d046120d79479aca927d627ba65f3bc009c75d1662cbf93c6d33cea4b030a7906d3689922bac13ca3fab45536f115ae6253715cc9c24769e57e83657bf0393ce55e45f90d9666513a19a939044d3b3ad528757e022624d6601dacd326ae8ab3c9ce4ae6d52\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334302924\u0026X-Goog-Signature=3a25e70e59647f427ae55d72168c576fdb59b05f9ed6b72885a9e9c11f655e6bf10ded55744adcef330659731f0effdda7550ddca99c309cefe46ba864b088f155a5243e579d182a55f916d21d9dd0a83534a5e2efb2955a9146db12b22b4321e3f36b69f8d89d663507d7db83ac96bb2419a2baa0787fd0c6e6079d06652b8a2ac364a0a0a5d8d9de6331658b798abddbaaae1ede3026a4f0d955e74782afe240d31e6748bea8ab332ed945f541ca20c587b8c1449643f4748a3b059aae857334b6249bdf86794d307340b6a07d0dc47d3980e234be9c0549f52636d33776b7474fb95ebc014656c3cc217a777d438612a08849ac498baba0ce4716ac4ea432\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" + ], + "text/plain": [ + " image \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + " blurred \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" } + ] + }, + { + "id": "11fcc6ec", + "cell_type": "markdown", + "source": [ + "### 4. Use LLM models to ask questions and generate embeddings on images" ], + "metadata": { + "id": "Euk5saeVVdTP" + }, + "execution_count": null + }, + { + "id": "793b2f45", + "cell_type": "code", "source": [ "from bigframes.ml import llm\n", "gemini = llm.GeminiTextGenerator()" + ], + "metadata": { + "id": "mRUGfcaFVW-3" + }, + "execution_count": 9, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "default model will be removed in BigFrames 3.0. Please supply an\n", + "explicit model to avoid this message.\n", + " return method(*args, **kwargs)\n" + ] + } ] }, { - "cell_type": "code", - "execution_count": 10, "id": "13d7cb93", + "cell_type": "code", + "source": [ + "# Ask the same question on the images\n", + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", + "answer[[\"ml_generate_text_llm_result\", \"image\"]]" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -719,6 +728,7 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, + "execution_count": 10, "outputs": [ { "name": "stderr", @@ -741,8 +751,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of K9 Guard Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=1b184636c15ee0a74b099df1903a0a79f2d0832d2cf829c84f9344269dc70408ea48dfaddce2f284cb4862cef857e8a0420627e25ca8c7ecc08d34bc69c695bc030ca8d90417860eecad65826160cbbf1cabe6c90d5e51a09c8b582bc542dfc5b309908be7b945d615eabba73b79912987306b3b110a5b0a9e52ccf900a3d2c490bb4e4572f3108f3acabf9a332e0fa503f74265f44d3b9ff40745afa4c59a3a0f3366ca4e4a800a09a5df0b363909a01705433e31bf2c9b7ccc0cc1e75f3e8ec323907140f29e4033238385eb83815b6d95ec54945cbf829d966510ae2504187f9c167fe70ac9e77231d4b38df380a7f6ec5f973828f21e51f4a95626ea0504\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe item is K9 Guard Dog Hot Spot Spray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=7e3adbf71385c63b494609546b7b2a3ef41dc437772a35d579bfed25594b3dd4716f8170dae6e4c9afcabce49c2af584561111983494d6dff504ff5eed611c3c7712cb667e6f1f3451aff5ac6911c4da9aeb2ba5c9227f9459f7c54085268a515eeb28add1b384a4724159f3d1408278e37716465f11a18f823dd4058004e43f9bdd4cab28fc97e97043adddc53c4b5e3059cf1641f1300bedded5a679f6922a45c021055e413953e2f0b000a6fd5047a0aa2fa5fce5f0ca08b6f93411704c9b10c6534894130c11f8ee9a8ee70d26a6455c0cbaf2088c1b5205328858e22026d1c4efc9e558a33082169f7eec5e4fb406de7de13e3ee72a48421e9896cca6f7\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eThe image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry \u0026 Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=380d369f6fc8a0e4274420f7bb038aaf748111cd654f7dc20818a97d80da8e6b93125884982c656009cdfdc7e70496c1defa42b2de69e1b73d99e0b925953bd5e0e5ae20ddfac13a619f8c6b1bf6c3fe9cdb195910eb08efd96a193dda55488eacc2d0deac4d0a777fd7b7916a0cc1707f22463e1678c83b8ae51d082b8365c2643f85c19c59a9ec34c89fcc87b9c510cf6dbcdda5d0648d6602bd023b0a09e6b4b02c4cea9f6b10f563a14097e86a5cd9898ea3b3606a6c847a23ce97aed46b3154c1c1791da19edf172d7f57cd4e604bb2774ae3dc02d902c826ca9be17972ff17d612106b9ac61e734837646e5d0b40f8162798bf7695dccb0d320e6a58a9\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a cat tree.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=712a70543cff388ba937bc867b4d94e1bfb09579bc7ecf998b8fb5194937cd15f491643f76925582ed5b7c853a9845b77e3c6c248126e211d45c3f6ebe751cc06193ae052999bf9bd827acbb204d2a64ad5d6eae1101fc5b2518f16ae29469ee7213cae403a3a6a29d7081561decc6b189593beb4b649bc7169828f4570a929d8b15c8dd0b3f259bfa4e2680b9d5b88653068357c7aefa0b1f26e10dc309b743da4164d9a5abd1761b00cc9a12380ba6fb3786f141b8e536fdc27e869b632c3e1a130f312ad5185362b0f9b30f473387a02905f22956992278d94fc2ef387a87cb855d35cfcabe9ad5d82c1b4dd85c56152e28438f6631322a4c229a9520adb5\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=70d2709b3b655fb6add8616767e7886e7e304cc96fc891df927085d1e4d90ee9bb13b370762c6c5a8dd43baefa163312267203bc1b371954320bc27c32d0831f7f8937f288da999e506bf6f47d101cd2e49a870f3d5be428d321149f7e1c7d1146569d22f19640d62325665b6d08e7254a89535c021c8b464d65e754312dd47dde08be9ca58856a97d3c3f243030ccfbd8c1bda5ddca2b3618b113f6c1640afa14936b8c16c59d77c44139fe75f3719e2a83924fed36514c61787b02ace0d439f8d3c4fea81c9bf01684f8c06a39f7ec626e93d59262db87f2eea30dd0f849a3436d8dd36d2188f2e52826e8b96bf72614c256cda9867b1905a1d1cd3edebd18\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -816,21 +826,11 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Ask the same question on the images\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", - "answer[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { - "cell_type": "code", - "execution_count": 11, "id": "68857305", - "metadata": { - "id": "IG3J3HsKhyBY" - }, - "outputs": [], + "cell_type": "code", "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\n", @@ -840,12 +840,19 @@ " \"is it for pets?\",\n", " \"what is the weight of the product?\",\n", "]" - ] + ], + "metadata": { + "id": "IG3J3HsKhyBY" + }, + "execution_count": 11 }, { - "cell_type": "code", - "execution_count": 12, "id": "829afc69", + "cell_type": "code", + "source": [ + "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", + "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -854,6 +861,7 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, + "execution_count": 12, "outputs": [ { "name": "stderr", @@ -876,8 +884,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=988967bbfa1e4c70be10a6faa407fde8edbe0e93a7a1c9f9fbe8e81fa55c11bcc27b9673ae4d91b13edbc56ad5e50051a81d1639cafd38946f693f73e81e86a0bf6e945c3a8edf9b3b2d275203caba770fcb9a9fa272b969023fabda363efc8d90ae4b2bbc9a4f420699f3604d0a13607f96694687529b38bd80b211f5998ef0a11ab0f3a0b936a4e6068a9289ec43a9536416b5782ca2a0645c2b43f94ac6b8e31632c62b3714b56f28dce7e5675a66ded7bcc9d1c1a154f5a83f826b3f4b1363b1316536549d959d664931e6cd462a9d83362257e5226ad5b35b5e6c0e6a155fd62d2890b2fc071b59e6e4fa796a22e346bceebc4fb131eee66793f6748699\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=6098f2a2487364290c54d4f6bcc98f29e5097702ab78da4ff9ef97d13c03a7fa3a17bd22324d61ae8264d3a59a6bdb2bf4be55bb7efffcb00c68f0c9b69f413f8b33b2089697456ef919690d59a68548e95ebd68526de53ab9062e18009ab73452fc1934c43f99ad75a45931fb250ef1e78a7ced3e0bfc9d9468ef300a63d750b8e249e7d20afa00895b7e586b16686141799603bcdf731b48237323de166d0a1a4f310519671a4ce6ae56b5d4ebd1887361cc21130b3e8fa57a6107d50239a671319ecbef623719554a9642489d2ea083c2cc466f5d73c55084023567e9db291b40a335e7f65b20da018a70cbf5ef9654aa86ec500aa4df35c1db81116abc83\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eHere are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry \u0026 Blossom Treat Blend** is the product in the purple bag.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=9672e4d0fa6ea5f4bcf1605e35a7642cab21e7decfcfe55e37616c99a7774445cecf69c848ccd0dd92f5ae1925bfaa4bbd62ba413ecfffc835aa9a4596d18036381d595ec3387d4200a08b8fdb02ddfdb7432ebd14d5d4b77abf4628e61899c4383899ddbe6465ce91b856e8a02f37391c850589a971e6045ed0c389133dd63d4bac98d5fc744f83a51b000bf29c51111589bd34e2d287cc835ccd8c009270cef818eb3ed9ee07f9978bf24ce994311af944fc2b7d7066c3c37613fe08e0542efd277ab77df0fe1f168e05d2f6d6749e5d8af573c22032b29c3c47ac2f3b7978a09139ddfd38ab9045b5ae86d8804f23db758a7602225bd749a5b7552bae4eb8\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eYes, it is for pets. It appears to be a cat tree or scratching post.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=5c8e23c4c5f8017e09bf02b1b480200c74c1523aa25297141fe4abf226d7e18c546aed2ea5a2b5b5f8c5b90f2f4c569accf41a73ab8776285b458d230c6df43025eeb3066d40fe141089fa547b5a1f6a12d8c2eed0e614c8667be12c237d7ba4b29a530c16a6c8528cdfc8d8c4761aeae91c5f8452069f88de31cb637aaa34cbea60d8a8e9bd66b9d2f6f2c70a60d7791fc6bac13a4dd3736323da29fb52c5c90068c57c088407935255468513b780c07ea8e922f690645ee5ed8644240f894061c662b7bfe7ab660fd8466280e428424724f62a6c4eb6c781e3bf988059bd9f63fd6ffc2e75d296bca2ef068c6cdda0f6bb994f6799e04b33b4352f66a68224\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe image shows that the weight of the product is 15 oz/ 257g.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=3be42f1f88c163e019f7496107a2f7d69b90a5482dd157a5d05e427fa117dd334a834dd9c72d0c84126e4f2c9e35903521ca8e81645afc945d8dbe47671f6b2f73ccbf494df6e5ad8e19c04ab4c6020859519c663b6fd57b19512fa94562106d01073b9122011ca602279a712fd761dc4ffaed3a8d7b76abcf1f42c2c1bce33228cff0e0454107cb84039a8981a2fb689191c43b7edf54e19354ff2e8c5deeb3a07944285b15db8a4fe474744f52852f8048c377708e2b3b85ee639b952e9292bc8d60d9b1a37c84ab83398ef295b8ba6b3c8e3a500714fb70e91b5fa7f19301fa4ce1bb1041f60a8ce84b3d863d5ba66f14614ed27689cd4daeb293b493172b\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -951,16 +959,17 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { - "cell_type": "code", - "execution_count": 13, "id": "e75df430", + "cell_type": "code", + "source": [ + "# Generate embeddings.\n", + "embed_model = llm.MultimodalEmbeddingGenerator()\n", + "embeddings = embed_model.predict(df_image[\"image\"])\n", + "embeddings" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -969,6 +978,7 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, + "execution_count": 13, "outputs": [ { "name": "stderr", @@ -993,8 +1003,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_result\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_status\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_start_sec\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_end_sec\u003c/th\u003e\n", + " \u003cth\u003econtent\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00638822 0.01666385 0.00451817 ... -0.02...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00973976 0.02148137 0.0024429 ... 0.00...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.01195884 0.02139394 0.05968047 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e[-0.02621161 0.02797648 0.04416926 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.05918628 0.0125137 0.01907336 ... 0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1073,18 +1083,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1100,41 +1110,24 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Generate embeddings.\n", - "embed_model = llm.MultimodalEmbeddingGenerator()\n", - "embeddings = embed_model.predict(df_image[\"image\"])\n", - "embeddings" ] }, { - "cell_type": "markdown", "id": "23892b0e", - "metadata": { - "id": "iRUi8AjG7cIf" - }, + "cell_type": "markdown", "source": [ "### 5. PDF extraction and chunking function\n", "\n", "This section demonstrates how to extract text and chunk text from PDF files using custom BigQuery Python UDFs and the `pypdf` library." - ] + ], + "metadata": { + "id": "iRUi8AjG7cIf" + }, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 14, "id": "136a18b8", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } - ], + "cell_type": "code", "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1147,7 +1140,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -> str:\n", + "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1174,7 +1167,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1192,7 +1185,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) >= chunk_size:\n", + " while len(curr_chunk) \u003e= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1203,19 +1196,60 @@ " if curr_chunk:\n", " all_text_chunks.append(curr_chunk)\n", " return all_text_chunks" + ], + "metadata": {}, + "execution_count": 14, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } ] }, { - "cell_type": "code", - "execution_count": 15, "id": "234a5f86", + "cell_type": "code", + "source": [ + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", + "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_pdf = df_pdf[['pdf']]\n", + "\n", + "# Generate a JSON string containing the runtime information (including signed read URLs)\n", + "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", + "\n", + "# Apply PDF extraction\n", + "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", + "\n", + "# Apply PDF chunking\n", + "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", + "\n", + "df_pdf[[\"extracted_text\", \"chunked\"]]" + ], "metadata": {}, + "execution_count": 15, "outputs": [ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eextracted_text\u003c/th\u003e\n", + " \u003cth\u003echunked\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eCritterCuisine Pro 5000 - Automatic Pet Feeder...\u003c/td\u003e\n", + " \u003ctd\u003e[\"CritterCuisine Pro 5000 - Automatic Pet Feed...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1261,53 +1295,29 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", - "# List files using gcsfs\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", - "\n", - "# Ensure URIs have gs:// prefix\n", - "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", - "\n", - "# Read the URIs into a BigQuery DataFrame\n", - "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df_pdf = df_pdf[['pdf']]\n", - "\n", - "# Generate a JSON string containing the runtime information (including signed read URLs)\n", - "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", - "\n", - "# Apply PDF extraction\n", - "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", - "\n", - "# Apply PDF chunking\n", - "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", - "\n", - "df_pdf[[\"extracted_text\", \"chunked\"]]" ] }, { - "cell_type": "code", - "execution_count": 16, "id": "d80effbe", + "cell_type": "code", + "source": [ + "# Explode the chunks to see each chunk as a separate row\n", + "chunked = df_pdf[\"chunked\"].explode()\n", + "chunked" + ], "metadata": {}, + "execution_count": 16, "outputs": [ { "data": { "text/html": [ - "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+       "\u003cpre\u003e0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...
" + "0 continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1324,27 +1334,20 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Explode the chunks to see each chunk as a separate row\n", - "chunked = df_pdf[\"chunked\"].explode()\n", - "chunked" ] }, { - "cell_type": "markdown", "id": "118cf1c7", - "metadata": {}, + "cell_type": "markdown", "source": [ "### 6. Audio transcribe" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 17, "id": "1794c54f", - "metadata": {}, - "outputs": [], + "cell_type": "code", "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -1369,39 +1372,13 @@ "# Create the object reference column\n", "df['audio'] = bbq.obj.make_ref(df['uri'], authorizer=FULL_CONNECTION_ID)\n", "df = df[['audio']]" - ] + ], + "metadata": {}, + "execution_count": 17 }, { - "cell_type": "code", - "execution_count": null, "id": "c9f9d484", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
0    Now, as all books, not primarily intended as p...
" - ], - "text/plain": [ - "0 Now, as all books, not primarily intended as p...\n", - "Name: transcribed_content, dtype: string" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "cell_type": "code", "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -1427,29 +1404,39 @@ "\n", "transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n", "transcribed_series" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "7209a62a", + ], "metadata": {}, + "execution_count": null, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, { "data": { "text/html": [ - "
0    {'status': '', 'content': 'Now, as all books, ...
" + "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" ], "text/plain": [ - "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct[pyarrow]" + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } - ], + ] + }, + { + "id": "7209a62a", + "cell_type": "code", "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -1464,39 +1451,47 @@ "# Package as a struct for consistent display\n", "transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n", "transcribed_series_verbose" + ], + "metadata": {}, + "execution_count": 19, + "outputs": [ + { + "data": { + "text/html": [ + "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" + ], + "text/plain": [ + "0 {'status': '', 'content': 'Now, as all books, ...\n", + "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } ] }, { - "cell_type": "markdown", "id": "c8351cc3", - "metadata": {}, + "cell_type": "markdown", "source": [ "### 7. Extract EXIF metadata from images" - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "markdown", "id": "e59670b9", - "metadata": {}, + "cell_type": "markdown", "source": [ "This section demonstrates how to extract EXIF metadata from images using a custom BigQuery Python UDF and the `Pillow` library." - ] + ], + "metadata": {}, + "execution_count": null }, { - "cell_type": "code", - "execution_count": 20, "id": "fda362f4", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } - ], + "cell_type": "code", "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1512,7 +1507,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -> str:\n", + "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1529,38 +1524,23 @@ " tag_name = ExifTags.TAGS.get(tag, tag)\n", " exif_dict[tag_name] = value\n", " return json.dumps(exif_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "40bb6bc9", + ], "metadata": {}, + "execution_count": 20, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", - "change in future versions.\n", - " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" ] - }, - { - "data": { - "text/html": [ - "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" - ], - "text/plain": [ - "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension>[pyarrow]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" } - ], + ] + }, + { + "id": "40bb6bc9", + "cell_type": "code", "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -1591,6 +1571,33 @@ "exif_data = bbq.parse_json(exif_json)\n", "\n", "exif_data" + ], + "metadata": {}, + "execution_count": 21, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", + "change in future versions.\n", + " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" + ], + "text/plain": [ + "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", + "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } ] } ], @@ -1616,6 +1623,6 @@ "version": "3.13.0" } }, - "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 0, + "nbformat": 4 } From 5ecdec9872b123b25aa9499d35cc4aeb3ada42ac Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 30 Apr 2026 22:05:26 +0000 Subject: [PATCH 28/39] fix: revert opencv-python to opencv-python-headless and restore missing notebook outputs --- .../notebooks/dataframes/anywidget_mode.ipynb | 487 ++++---- .../generative_ai/ai_movie_poster.ipynb | 294 ++--- .../multimodal/multimodal_dataframe.ipynb | 1054 +++++------------ 3 files changed, 711 insertions(+), 1124 deletions(-) diff --git a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb index 5289aee1ed77..e92e4b295bdb 100644 --- a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb +++ b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb @@ -19,7 +19,8 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": 1 + "execution_count": 1, + "outputs": [] }, { "id": "acca43ae", @@ -37,7 +38,8 @@ "import bigframes.pandas as bpd" ], "metadata": {}, - "execution_count": 2 + "execution_count": 2, + "outputs": [] }, { "id": "04406a4d", @@ -46,7 +48,7 @@ "This notebook demonstrates the **anywidget** display mode for BigQuery DataFrames. This mode provides an interactive table experience for exploring your data directly within the notebook.\n", "\n", "**Key features:**\n", - "- **Rich DataFrames \u0026 Series:** Both DataFrames and Series are displayed as interactive widgets.\n", + "- **Rich DataFrames & Series:** Both DataFrames and Series are displayed as interactive widgets.\n", "- **Pagination:** Navigate through large datasets page by page without overwhelming the output.\n", "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views. Use **Shift + Click** to sort by multiple columns.\n", "- **Column Resizing:** Drag the dividers between column headers to adjust their width.\n", @@ -63,7 +65,8 @@ "bpd.options.display.render_mode = \"anywidget\"" ], "metadata": {}, - "execution_count": 3 + "execution_count": 3, + "outputs": [] }, { "id": "0a354c69", @@ -102,7 +105,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -147,8 +150,8 @@ "version_minor": 1 }, "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stategenderyearnamenumber
0ALF1910Annie482
1ALF1910Myrtle104
2ARF1910Lillian56
3CTF1910Anne38
4CTF1910Frances45
5FLF1910Margaret53
6GAF1910Mae73
7GAF1910Beatrice96
8GAF1910Lola47
9IAF1910Viola49
\n", + "

10 rows × 5 columns

\n", + "
[5552452 rows x 5 columns in total]" ], "text/plain": [ "state gender year name number\n", @@ -306,11 +309,11 @@ "data": { "text/html": [ "\n", - " Query processed 171.4 MB in 46 seconds of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:dcf260e0-eaad-4979-9ec6-12f2436698e4\u0026page=queryresults\"\u003eJob bigframes-dev:US.dcf260e0-eaad-4979-9ec6-12f2436698e4 details\u003c/a\u003e]\n", + " Query processed 171.4 MB in 46 seconds of slot time. [Job bigframes-dev:US.dcf260e0-eaad-4979-9ec6-12f2436698e4 details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -324,7 +327,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -378,7 +381,7 @@ "version_minor": 1 }, "text/html": [ - "\u003cpre\u003e0 1910\n", + "
0    1910\n",
        "1    1910\n",
        "2    1910\n",
        "3    1910\n",
@@ -387,7 +390,7 @@
        "6    1910\n",
        "7    1910\n",
        "8    1910\n",
-       "9    1910\u003c/pre\u003e\u003cp\u003e[5552452 rows]\u003c/p\u003e"
+       "9    1910

[5552452 rows]

" ], "text/plain": [ "1910\n", @@ -498,7 +501,7 @@ "version_minor": 1 }, "text/plain": [ - "\u003cbigframes.display.anywidget.TableWidget object at 0x7f50500e2ad0\u003e" + "" ] }, "execution_count": 8, @@ -583,7 +586,7 @@ "version_minor": 1 }, "text/plain": [ - "\u003cbigframes.display.anywidget.TableWidget object at 0x7f505016e190\u003e" + "" ] }, "execution_count": 10, @@ -619,9 +622,9 @@ "bpd.read_gbq(\"\"\"\n", " SELECT\n", " AI.GENERATE(\n", - " prompt=\u003e(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"us.conn\")), \"r\")),\n", - " connection_id=\u003e\"bigframes-dev.us.bigframes-default-connection\",\n", - " output_schema=\u003e\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n", + " prompt=>(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"us.conn\")), \"r\")),\n", + " connection_id=>\"bigframes-dev.us.bigframes-default-connection\",\n", + " output_schema=>\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n", " *\n", " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", " LIMIT 5;\n", @@ -638,7 +641,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -652,8 +655,8 @@ "version_minor": 1 }, "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
\n", + "

5 rows × 15 columns

\n", + "
[5 rows x 15 columns in total]" ], "text/plain": [ " result \\\n", @@ -799,18 +802,18 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 \u003cNA\u003e 18157874.1 \n", - "1 03.10.2018 G06F 11/30 \u003cNA\u003e 18157347.8 \n", - "2 03.10.2018 A01K 31/00 \u003cNA\u003e 18171005.4 \n", - "3 03.10.2018 H05B 6/12 \u003cNA\u003e 18165514.3 \n", - "4 03.10.2018 H01L 21/20 \u003cNA\u003e 18166536.5 \n", + "0 29.08.018 E04H 6/12 18157874.1 \n", + "1 03.10.2018 G06F 11/30 18157347.8 \n", + "2 03.10.2018 A01K 31/00 18171005.4 \n", + "3 03.10.2018 H05B 6/12 18165514.3 \n", + "4 03.10.2018 H01L 21/20 18166536.5 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke \u0026 Partner Patentanw√§lte \n", + "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", "1 19.02.2018 31.03.2017 Hoffmann Eitle \n", "2 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", - "3 03.04.2018 30.03.2017 \u003cNA\u003e \n", - "4 16.02.2016 \u003cNA\u003e Scheider, Sascha et al \n", + "3 03.04.2018 30.03.2017 \n", + "4 16.02.2016 Scheider, Sascha et al \n", "\n", " applicant_line_1 inventor_line_1 \\\n", "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index 20617012487c..672d4f237780 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -21,7 +21,8 @@ "metadata": { "id": "XZpKUoHjXw3_" }, - "execution_count": 1 + "execution_count": 1, + "outputs": [] }, { "id": "ee509844", @@ -38,26 +39,26 @@ "id": "81b8de8d", "cell_type": "markdown", "source": [ - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" + " \n", + "
" ], "metadata": {}, "execution_count": null @@ -115,7 +116,8 @@ "metadata": { "id": "6nqoRHYbPAx3" }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "015a63c1", @@ -189,11 +191,11 @@ "data": { "text/html": [ "\n", - " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", + " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -215,11 +217,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", + " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -233,7 +235,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -242,8 +244,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poster
0
\n", + "

1 rows × 1 columns

\n", + "
[1 rows x 1 columns in total]" ], "text/plain": [ " poster\n", @@ -341,11 +343,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", + " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -359,7 +361,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -368,8 +370,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitle
0Der Student von Prag
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" ], "text/plain": [ " poster title\n", @@ -475,11 +477,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", + " Query processed 1.3 kB in 4 minutes of slot time. [Job bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -493,7 +495,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -502,8 +504,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
0Der Student von Prag1913
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" ], "text/plain": [ " poster title \\\n", @@ -582,8 +584,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", + "

" ], "text/plain": [ - "poster struct\u003curi: string, version: string, authorize...\n", + "poster structJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -699,7 +701,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -708,8 +710,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
8Shoulder Arms1918
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" ], "text/plain": [ " poster title year\n", diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index edea6eba7bf7..09a7bfc2e6b5 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -19,7 +19,8 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": 1 + "execution_count": 1, + "outputs": [] }, { "id": "816ab253", @@ -27,26 +28,26 @@ "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e\n" + " \n", + "
\n" ], "metadata": { "id": "YOrUAvz6DMw-" @@ -84,7 +85,7 @@ "id": "750954c4", "cell_type": "markdown", "source": [ - "Install the latest bigframes package if bigframes version \u003c 2.4.0" + "Install the latest bigframes package if bigframes version < 2.4.0" ], "metadata": {}, "execution_count": null @@ -96,7 +97,8 @@ "# !pip install bigframes --upgrade" ], "metadata": {}, - "execution_count": 2 + "execution_count": 2, + "outputs": [] }, { "id": "df561d04", @@ -136,7 +138,8 @@ "id": "bGyhLnfEeB0X", "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" }, - "execution_count": 3 + "execution_count": 3, + "outputs": [] }, { "id": "35bd6e6e", @@ -182,7 +185,8 @@ " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ], "metadata": {}, - "execution_count": 4 + "execution_count": 4, + "outputs": [] }, { "id": "be9ce892", @@ -226,7 +230,8 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "execution_count": 5 + "execution_count": 5, + "outputs": [] }, { "id": "2e0436b0", @@ -246,23 +251,11 @@ }, "execution_count": 6, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
0{'uri': 'gs://cloud-samples-data/bigquery/tuto...
1{'uri': 'gs://cloud-samples-data/bigquery/tuto...
2{'uri': 'gs://cloud-samples-data/bigquery/tuto...
3{'uri': 'gs://cloud-samples-data/bigquery/tuto...
4{'uri': 'gs://cloud-samples-data/bigquery/tuto...
\n", + "

5 rows × 1 columns

\n", + "
[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", "\n", "[5 rows x 1 columns]" ] @@ -364,23 +357,11 @@ }, "execution_count": 7, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0{'uri': 'gs://cloud-samples-data/bigquery/tuto...aliceimage/png7157662025-03-20 17:44:38+00:00
1{'uri': 'gs://cloud-samples-data/bigquery/tuto...bobimage/png11674062025-03-20 17:44:38+00:00
2{'uri': 'gs://cloud-samples-data/bigquery/tuto...bobimage/png11508922025-03-20 17:44:39+00:00
3{'uri': 'gs://cloud-samples-data/bigquery/tuto...aliceimage/png17365332025-03-20 17:44:39+00:00
4{'uri': 'gs://cloud-samples-data/bigquery/tuto...bobimage/png4397402025-03-20 17:44:39+00:00
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... alice image/png \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... alice image/png \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", + "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", + "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", + "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", "\n", " size updated \n", - "0 1591240 2025-03-20 17:45:04+00:00 \n", - "1 1182951 2025-03-20 17:45:02+00:00 \n", - "2 1520884 2025-03-20 17:44:55+00:00 \n", - "3 1235401 2025-03-20 17:45:19+00:00 \n", - "4 1591923 2025-03-20 17:44:47+00:00 \n", + "0 715766 2025-03-20 17:44:38+00:00 \n", + "1 1167406 2025-03-20 17:44:38+00:00 \n", + "2 1150892 2025-03-20 17:44:39+00:00 \n", + "3 1736533 2025-03-20 17:44:39+00:00 \n", + "4 439740 2025-03-20 17:44:39+00:00 \n", "\n", "[5 rows x 5 columns]" ] @@ -505,9 +486,9 @@ " dataset=DATASET_ID,\n", " name=\"image_blur\",\n", " bigquery_connection=FULL_CONNECTION_ID,\n", - " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", + " packages=[\"opencv-python-headless\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -577,103 +558,121 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, - "execution_count": 8, + "execution_count": null, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/pandas/__init__.py:211: PreviewWarning: udf is in preview.\n", " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:4695: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:457: UserWarning: Compiler ID eb39002044ce: BadRequest on sqlglot. Falling back to ibis.\n", + "Details: GET\n", + "https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-\n", + "dev/queries/db11d8fe-58e7-44ab-\n", + "aa57-662980a78596?maxResults=0&location=US&prettyPrint=false:\n", + "Encounter an error when invoking the function: Flight error: Encounter\n", + "an error when executing the function No module named 'cv2'. Traceback\n", + "(most recent call last): File \"/srv/grpc/bq_udf_server.py\", line\n", + "119, in call_udf result = [ ^ File\n", + "\"/srv/grpc/bq_udf_server.py\", line 120, in func(*row)\n", + "File\n", + "\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\",\n", + "line 43, in bigframes_handler return image_blur(*args)\n", + "^^^^^^^^^^^^^^^^^ File\n", + "\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\",\n", + "line 5, in image_blur import cv2 as cv ModuleNotFoundError: No\n", + "module named 'cv2' Location: US Job ID: db11d8fe-58e7-44ab-\n", + "aa57-662980a78596\n", + " warnings.warn(msg, category=UserWarning)\n" ] }, { - "data": { - "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eimage\u003c/th\u003e\n", - " \u003cth\u003eblurred\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=2bb786ea7fda0a778b591d0f85b1d1df003726a26585490c1c8e1cc632bc90e418bc2762993da32c60017b96b36a9bf03ca123f7e74a34e5da98d4a8ae65e340c64872e1ab7c6442435253669103a157ee61c793da79ad0e6ae2a7bdbf54e8d67dce61de37d9dd2f54b0c994f8135d88af27f08eeb99e713b385b37fd0762503244cd0c597479d6925dcf111db4605842c797aabb307736028a5dd802ef08a2697a4bd1c96b5bf8bbecdb8a8f384028cec03bc9d51c1d2dfee1760756e0c6e54d9d753b373115e308cf45de2c082f30f263285af5d5e43fd9bf8c3dae32ceee389ef9427f3e2b06646d48aa570ba1ca0713d0f270ea2f4996e8ad3d03a3e39a6\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334353324\u0026X-Goog-Signature=6f16236d50a82c7ede3c23366389344149faba103443acf1c1a2b0d3b0d91948c8c15a3ad990382ab9b61badf8acee3c4b9e5ec6f96b72d395cd2e18227ad5c051ad189c1af48030fb44c1b9cc84a44a74fec04d576a6d699c1f0e133b2fc5d583872b5577c60a01f58d163de6e95591d260e9246fae0243d7472748e854ccaf4b4c3de80fb4e085f428427fe9914d22fba7416ae8b9e33beb0dfbcb91273609d0333aa1135b835bc69f9e15e1097452133e376b9b3f1bb7b4d7d7e9cb860677ce786e49239e8a8af86f83b7e27e76846212daddd2859f8dec4942b7351c69d396d4fd9a529fd0c48677aa4923063a752f6845801f22d3de227a640946f1e212\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e1\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=31ed02660dcead7471a7b47e2f471f4cf515e37525c14027f8d87ba1479982f244df02979b7da03676d4bbbd21221b3f70cefea4501eca3a5a64acb187ef5a2b03b19116729059dc666c2e295fc1ac831cd38864c1673344e04855db279a3c96409550dfade09631b0818e6cb576ff02afba6a87624f7c33077f7f19c891f6413b317cabb834182585d445ce97aa0294beca4de0edd2f9a0f6429747e93db53df875249a9c0543a329dac0f46ef3ed4e64a7e51648d0820ba52b7845b6f98371aca7deb165b4f7b7537be0d659cf33bb34893417666d7c9e9f611332ccba6bae6024be6862350185886cf61354eda3591e96e344c59350ca6f68ce50670fa869\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334573735\u0026X-Goog-Signature=209b62c9c3e2657b9270ed126a6c1e3979471f9e308670ce1595e9c18a0ec7c627a313b1f1c87108355602dc11b804ff609ba3394fada2b4fd186c6fef4138e22222045d0fd0660f103e6378ac83c6bd7d8da27c1a3d306dbfd778f6738c2e492bd8527ba9112e086f15334b7ab7795b88212d3825b3be325606cc507e5504406bc4e9dc41b29fb99e1207fe74c11053e7332ad9fdd65587f55cd10cc1bfd903672f43de38a18cb346977d8498fd751ab8728ca580261a1a0367421437d56df7f5f7dda895ddd370954aee632258304da31fd8e7c33619dd5cf9ee0ebe9e60eba3cc416e871e181525f23e525489ef6dffced9211f7d3681ec24b3ac7c0b05ab\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e2\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=8d32ad0137f3b10393aeaf635732f7fe337149323c0a33b634f9dcc592e662d4d268223f5c5cc89956787c5a3c061fa69c2de4d1a3225497dfad466548fcd4df7ee6000c4e60ff0661d0f9b451efa2701df5de55c322b3585c09210171403d47c358e6f41281f245711afd63eed3157e6674e9958ebffc5516f6ccb06b9af1830400e1d6a9bed44538b7212eeb257665aa43eecfa2af7482863f6032002de57f7a01af83cf5051541178297ab4a256710c04fd082cb51af4725dbafb288ee79caa630853dcbf35c49595b2b2e552804cabdbdf955806d57b97451e87035bd3ea89dbb5560a1c14170c531c3222a1d0b59d79d4c45b641887699f0bf5830612cb\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334233994\u0026X-Goog-Signature=6eed60a846158365a0c54d4c96516bb8c4011b497da203479fa9066bc2b7c2c7c683aa351d5249e05df64b8f86549f251150327013e41231ee06f22e9a51e1061c577d7b2da4704b80f684776702da13be6122ad6652463f210c53d9149da9e32a253fde4851723cbb616f18374a8f9b7b5cd36a82e66a2e5677ae51b906e6c4649ea27fcbbcba3ef24bf163a4b0f469c10eca8da464f58161ead8371bdb5b0d452ee713b0ec498ca9aaefd521a1d8240d82885a19af58ddefb167146ab9d411b5d0712457b126104b5f42f57ab4ede2da5ae9af74e708840b507a5538571804c60e9c166516e0118aa555a13169401e21f0a5ec303f3b801d6a972ef94426ee\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e3\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=472d88289a3910bdde93dd468e6ae9e66c0821fb0d72a356ef887771baa2a80f356b0ac67183873c0e89c87afb3080b3a78122fa7c3e37ad8b54d8aa216e9e04cb6f2af4f200784beba8de4eea7cf3a39faa8d800b7cc6bbf03df55beaeb69727d413266e2e59fb900bc25b6859d7c14db3c22aa6a0aeaa71c331ae0a2bd27c9e2c246fe931a2562210411491bc83bc34e7bef60901189949348eee909e64c90e3aab7b03d7c274ee84c03cca6dd624169f40323a785bcc1a8357810f738b45a637c09267530fbddc413a75622d793876c74c9128b11d8b4ea2fe2b959b34b96e06a9c384f72522ad357557d0a3b1c8b5792c2e94631139eae178fa660a5e8b2\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334678692\u0026X-Goog-Signature=239a77d1c63c4074b2ddc3dd21a5bc77de8ea993fdd911a199b6cca207fef9286307a8321d91d02cffda17060a217f50b2a1d8017bbcb29511339a1187b38332a39795fb1b2c754988948be8849a371fd66a7698f8e35a0f1a3430b6837ede73c37c90da49a062586828f19595d4246e4eabf3e3d629f251b066fb20e0f34ddad80599f5946aa76e0694b859f7d575dc781a850d56e9846d456d285b0023e90862154008154fa13ef1f95096a6161e6a1b314b82ca23bc44fc1b4b7799a9f936dca09ab09830446089d8defe11a2c0e066aa8a65204d9027140f314001c59187593290ae22c4092ee5d0293a6e5da1e1da3d42c32c0aced7db7ad4f774575deb\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e4\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1077450c755044fd6121e0dd67ec7de75a1785c4bacede635b78dd64d6b982ba02bba7a00065035fa13f003ef03c14d7f622b6a5c9ef19cadc956470e5a62e0f1855fb8cae0bbe723c0352224d152c85e173b234e3f5a1bcc2e1ddd4da065c184e82487eb4aa17464d330144b3b7c17ba357af1fb267f97730b3a798ff42a514ea47b83aa9f2560cfd428778638287c96ed17f95f0327e1cade380d046120d79479aca927d627ba65f3bc009c75d1662cbf93c6d33cea4b030a7906d3689922bac13ca3fab45536f115ae6253715cc9c24769e57e83657bf0393ce55e45f90d9666513a19a939044d3b3ad528757e022624d6601dacd326ae8ab3c9ce4ae6d52\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334302924\u0026X-Goog-Signature=3a25e70e59647f427ae55d72168c576fdb59b05f9ed6b72885a9e9c11f655e6bf10ded55744adcef330659731f0effdda7550ddca99c309cefe46ba864b088f155a5243e579d182a55f916d21d9dd0a83534a5e2efb2955a9146db12b22b4321e3f36b69f8d89d663507d7db83ac96bb2419a2baa0787fd0c6e6079d06652b8a2ac364a0a0a5d8d9de6331658b798abddbaaae1ede3026a4f0d955e74782afe240d31e6748bea8ab332ed945f541ca20c587b8c1449643f4748a3b059aae857334b6249bdf86794d307340b6a07d0dc47d3980e234be9c0549f52636d33776b7474fb95ebc014656c3cc217a777d438612a08849ac498baba0ce4716ac4ea432\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", - "\u003c/div\u003e[5 rows x 2 columns in total]" - ], - "text/plain": [ - " image \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - " blurred \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - "[5 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" + "ename": "BadRequest", + "evalue": "400 GET https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-dev/queries/9b17582d-da3b-47d9-947f-a1c5efa11dd9?maxResults=0&location=US&prettyPrint=false: Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named 'cv2'. Traceback (most recent call last):\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\n result = [\n ^\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \n func(*row)\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 43, in bigframes_handler\n return image_blur(*args)\n ^^^^^^^^^^^^^^^^^\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 5, in image_blur\n import cv2 as cv\nModuleNotFoundError: No module named 'cv2'\n\n\nLocation: US\nJob ID: 9b17582d-da3b-47d9-947f-a1c5efa11dd9\n [{'@type': 'type.googleapis.com/google.rpc.DebugInfo', 'detail': '[INVALID_INPUT] message=QUERY_ERROR: [Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\'cv2\\'. Traceback (most recent call last):\\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\\n result = [\\n ^\\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \\n func(*row)\\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 43, in bigframes_handler\\n return image_blur(*args)\\n ^^^^^^^^^^^^^^^^^\\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 5, in image_blur\\n import cv2 as cv\\nModuleNotFoundError: No module named \\'cv2\\'\\n] debug=code: \\t BAD_QUERY\\ndescription: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\ncause: USER_ERROR\\naddress: \"http://jfdaj38.prod.google.com:4901/task?handle=logs.7122.serving.shard-hals.cloud-dataengine.14304701181199 Partition description: __SHUFFLE1_RAND0/117 ShuffleByTableDef \\\\\\'__SHUFFLE0\\\\\\' shuffle \\\\t { spec { num_sources: 1 num_sinks: 500 } partitioning { rand { num_partitions: 500 } } config { group_id: 3066382614537913598 shuffle_id: 0 precreated_mfs_path: \\\\\"/mfs/jf/home/cloud-dataengine/serving-shuffle/ttl=3d/d24bfe88e472d688_shuffle_cache/00\\\\\" } } sink_id: 116\"\\nstatus_proto {\\n code: 11\\n space: \"generic\"\\n message: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\n}\\nerror_details {\\n runtime_error {\\n query_error {\\n }\\n }\\n debug_info {\\n error_message_template: \"Encounter an error when invoking the function: $0\"\\n error_id: 3276645920\\n }\\n}\\n errorProto=code: \"QUERY_ERROR\"\\nargument: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\nlocation_type: OTHER\\nlocation: \"query\"\\n\\n\\tat com.google.cloud.helix.common.Exceptions.fromProto(Exceptions.java:1983)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl.mapDremelErrorsTohelixException(QueryExecutorImpl.java:1235)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:810)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:736)\\n\\tat com.google.net.rpc3.stream.RpcMessageCallback$ForwardingRpcMessageCallback.onMessage(RpcMessageCallback.java:128)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.processMessageUnlocked(RpcStreamInternalContext.java:1861)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksInternalUnlocked(RpcStreamInternalContext.java:2916)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksUnlocked(RpcStreamInternalContext.java:2842)\\n\\tat com.google.net.eventmanager.AbstractFutureTask$Sync.innerRun(AbstractFutureTask.java:259)\\n\\tat com.google.net.eventmanager.AbstractFutureTask.run(AbstractFutureTask.java:120)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTaskTraced(EventManagerImpl.java:903)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTask(EventManagerImpl.java:895)\\n\\tat com.google.net.eventmanager.EventManagerImpl.internalRunWorkerLoop(EventManagerImpl.java:1322)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runWorkerLoop(EventManagerImpl.java:1213)\\n\\tat com.google.net.eventmanager.WorkerThreadInfo.runWorkerLoop(WorkerThreadInfo.java:153)\\n\\tat com.google.net.eventmanager.EventManagerImpl$WorkerThread.run(EventManagerImpl.java:2006)\\n'}]", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mBadRequest\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183\u001b[39m, in \u001b[36mmethod_logger..outer_wrapper..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 180\u001b[39m _call_stack.append(full_method_name)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m183\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mNotImplementedError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Log method parameters that are implemented in pandas but either missing (TypeError)\u001b[39;00m\n\u001b[32m 186\u001b[39m \u001b[38;5;66;03m# or not fully supported (NotImplementedError) in BigFrames.\u001b[39;00m\n\u001b[32m 187\u001b[39m \u001b[38;5;66;03m# Logging is currently supported only when we can access the bqclient through\u001b[39;00m\n\u001b[32m 188\u001b[39m \u001b[38;5;66;03m# _block.session.bqclient.\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_call_stack) == \u001b[32m1\u001b[39m:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:835\u001b[39m, in \u001b[36mDataFrame._repr_mimebundle_\u001b[39m\u001b[34m(self, include, exclude)\u001b[39m\n\u001b[32m 831\u001b[39m \u001b[38;5;66;03m# TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and\u001b[39;00m\n\u001b[32m 832\u001b[39m \u001b[38;5;66;03m# BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed.\u001b[39;00m\n\u001b[32m 833\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mbigframes\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdisplay\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m html\n\u001b[32m--> \u001b[39m\u001b[32m835\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mhtml\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrepr_mimebundle\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minclude\u001b[49m\u001b[43m=\u001b[49m\u001b[43minclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexclude\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/display/html.py:355\u001b[39m, in \u001b[36mrepr_mimebundle\u001b[39m\u001b[34m(obj, include, exclude)\u001b[39m\n\u001b[32m 345\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[32m 346\u001b[39m \u001b[38;5;66;03m# Anywidget is an optional dependency, so warn rather than fail.\u001b[39;00m\n\u001b[32m 347\u001b[39m \u001b[38;5;66;03m# TODO(shuowei): When Anywidget becomes the default for all repr modes,\u001b[39;00m\n\u001b[32m 348\u001b[39m \u001b[38;5;66;03m# remove this warning.\u001b[39;00m\n\u001b[32m 349\u001b[39m warnings.warn(\n\u001b[32m 350\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mAnywidget mode is not available. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 351\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPlease `pip install anywidget traitlets` or `pip install \u001b[39m\u001b[33m'\u001b[39m\u001b[33mbigframes[anywidget]\u001b[39m\u001b[33m'\u001b[39m\u001b[33m` to use interactive tables. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 352\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFalling back to static HTML. Error: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtraceback.format_exc()\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 353\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m355\u001b[39m bundle = \u001b[43mrepr_mimebundle_head\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 356\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m opts.render_mode == \u001b[33m\"\u001b[39m\u001b[33mplaintext\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 357\u001b[39m bundle.pop(\u001b[33m\"\u001b[39m\u001b[33mtext/html\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/display/html.py:302\u001b[39m, in \u001b[36mrepr_mimebundle_head\u001b[39m\u001b[34m(obj)\u001b[39m\n\u001b[32m 300\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 301\u001b[39m df, _ = obj._get_display_df_and_blob_cols()\n\u001b[32m--> \u001b[39m\u001b[32m302\u001b[39m pandas_df, row_count, query_job = \u001b[43mdf\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_block\u001b[49m\u001b[43m.\u001b[49m\u001b[43mretrieve_repr_request_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 303\u001b[39m \u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmax_rows\u001b[49m\n\u001b[32m 304\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 306\u001b[39m obj._set_internal_query_job(query_job)\n\u001b[32m 307\u001b[39m column_count = \u001b[38;5;28mlen\u001b[39m(pandas_df.columns)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/blocks.py:1615\u001b[39m, in \u001b[36mBlock.retrieve_repr_request_results\u001b[39m\u001b[34m(self, max_results)\u001b[39m\n\u001b[32m 1613\u001b[39m \u001b[38;5;66;03m# head caches full underlying expression, so row_count will be free after\u001b[39;00m\n\u001b[32m 1614\u001b[39m executor = \u001b[38;5;28mself\u001b[39m.session._executor\n\u001b[32m-> \u001b[39m\u001b[32m1615\u001b[39m \u001b[43mexecutor\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcached\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1616\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1617\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecutors\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCacheConfig\u001b[49m\u001b[43m(\u001b[49m\u001b[43moptimize_for\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhead\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mif_cached\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreuse-strict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1618\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1619\u001b[39m head_result = \u001b[38;5;28mself\u001b[39m.session._executor.execute(\n\u001b[32m 1620\u001b[39m \u001b[38;5;28mself\u001b[39m.expr.slice(start=\u001b[38;5;28;01mNone\u001b[39;00m, stop=max_results, step=\u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[32m 1621\u001b[39m execution_spec.ExecutionSpec(\n\u001b[32m (...)\u001b[39m\u001b[32m 1624\u001b[39m ),\n\u001b[32m 1625\u001b[39m )\n\u001b[32m 1626\u001b[39m row_count = (\n\u001b[32m 1627\u001b[39m \u001b[38;5;28mself\u001b[39m.session._executor.execute(\n\u001b[32m 1628\u001b[39m \u001b[38;5;28mself\u001b[39m.expr.row_count(),\n\u001b[32m (...)\u001b[39m\u001b[32m 1635\u001b[39m .to_py_scalar()\n\u001b[32m 1636\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:348\u001b[39m, in \u001b[36mBigQueryCachingExecutor.cached\u001b[39m\u001b[34m(self, array_value, config)\u001b[39m\n\u001b[32m 346\u001b[39m \u001b[38;5;28mself\u001b[39m._cache_with_session_awareness(array_value)\n\u001b[32m 347\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m config.optimize_for == \u001b[33m\"\u001b[39m\u001b[33mhead\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m348\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_cache_with_offsets\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 350\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(config.optimize_for, executor.HierarchicalKey)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:504\u001b[39m, in \u001b[36mBigQueryCachingExecutor._cache_with_offsets\u001b[39m\u001b[34m(self, array_value)\u001b[39m\n\u001b[32m 500\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Executes the query and uses the resulting table to rewrite future executions.\"\"\"\u001b[39;00m\n\u001b[32m 501\u001b[39m execution_spec = ex_spec.ExecutionSpec(\n\u001b[32m 502\u001b[39m destination_spec=ex_spec.CacheSpec(cluster_cols=\u001b[38;5;28mtuple\u001b[39m())\n\u001b[32m 503\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m504\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 505\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 506\u001b[39m \u001b[43m \u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 507\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:169\u001b[39m, in \u001b[36mBigQueryCachingExecutor.execute\u001b[39m\u001b[34m(self, array_value, execution_spec)\u001b[39m\n\u001b[32m 162\u001b[39m \u001b[38;5;28mself\u001b[39m._publisher.publish(\n\u001b[32m 163\u001b[39m bigframes.core.events.ExecutionFinished(\n\u001b[32m 164\u001b[39m result=result,\n\u001b[32m 165\u001b[39m )\n\u001b[32m 166\u001b[39m )\n\u001b[32m 167\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[32m--> \u001b[39m\u001b[32m169\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_execute_plan_gbq\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 170\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m.\u001b[49m\u001b[43mnode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 171\u001b[39m \u001b[43m \u001b[49m\u001b[43mordered\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mordered\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[43m \u001b[49m\u001b[43mpeek\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpeek\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 173\u001b[39m \u001b[43m \u001b[49m\u001b[43mcache_spec\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdestination_spec\u001b[49m\n\u001b[32m 174\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdestination_spec\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mex_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCacheSpec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 175\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 176\u001b[39m \u001b[43m \u001b[49m\u001b[43mmust_create_table\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpromise_under_10gb\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 177\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 178\u001b[39m \u001b[38;5;66;03m# post steps: export\u001b[39;00m\n\u001b[32m 179\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(execution_spec.destination_spec, ex_spec.GcsOutputSpec):\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:687\u001b[39m, in \u001b[36mBigQueryCachingExecutor._execute_plan_gbq\u001b[39m\u001b[34m(self, plan, ordered, peek, cache_spec, must_create_table)\u001b[39m\n\u001b[32m 679\u001b[39m iterator, query_job = \u001b[38;5;28mself\u001b[39m._run_execute_query(\n\u001b[32m 680\u001b[39m sql=compiled.sql,\n\u001b[32m 681\u001b[39m job_config=job_config,\n\u001b[32m 682\u001b[39m query_with_job=(destination_table \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[32m 683\u001b[39m session=plan.session,\n\u001b[32m 684\u001b[39m )\n\u001b[32m 685\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m iterator, query_job, compiled\n\u001b[32m--> \u001b[39m\u001b[32m687\u001b[39m iterator, query_job, compiled = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_compile_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_with_compiler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 689\u001b[39m \u001b[38;5;66;03m# might have more columns than og schema, for hidden ordering columns\u001b[39;00m\n\u001b[32m 690\u001b[39m compiled_schema = compiled.sql_schema\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:458\u001b[39m, in \u001b[36mBigQueryCachingExecutor._compile_with_fallback\u001b[39m\u001b[34m(self, run_fn)\u001b[39m\n\u001b[32m 453\u001b[39m msg = bfe.format_message(\n\u001b[32m 454\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCompiler ID \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m: BadRequest on sqlglot. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 455\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFalling back to ibis. Details: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me.message\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 456\u001b[39m )\n\u001b[32m 457\u001b[39m warnings.warn(msg, category=\u001b[38;5;167;01mUserWarning\u001b[39;00m)\n\u001b[32m--> \u001b[39m\u001b[32m458\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrun_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mibis\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompiler_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompiler_id\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:679\u001b[39m, in \u001b[36mBigQueryCachingExecutor._execute_plan_gbq..run_with_compiler\u001b[39m\u001b[34m(compiler_name, compiler_id)\u001b[39m\n\u001b[32m 675\u001b[39m job_config.labels[\u001b[33m\"\u001b[39m\u001b[33mbigframes-dtypes\u001b[39m\u001b[33m\"\u001b[39m] = compiled.encoded_type_refs\n\u001b[32m 676\u001b[39m job_config.labels[\u001b[33m\"\u001b[39m\u001b[33mbigframes-compiler\u001b[39m\u001b[33m\"\u001b[39m] = (\n\u001b[32m 677\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m compiler_id \u001b[38;5;28;01melse\u001b[39;00m compiler_name\n\u001b[32m 678\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m679\u001b[39m iterator, query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_run_execute_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 680\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompiled\u001b[49m\u001b[43m.\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 681\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 682\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdestination_table\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 683\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[43mplan\u001b[49m\u001b[43m.\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 684\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 685\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m iterator, query_job, compiled\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:378\u001b[39m, in \u001b[36mBigQueryCachingExecutor._run_execute_query\u001b[39m\u001b[34m(self, sql, job_config, query_with_job, session)\u001b[39m\n\u001b[32m 375\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 376\u001b[39m \u001b[38;5;66;03m# Trick the type checker into thinking we got a literal.\u001b[39;00m\n\u001b[32m 377\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m query_with_job:\n\u001b[32m--> \u001b[39m\u001b[32m378\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbq_io\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstart_query_with_client\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 379\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbqclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 380\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 381\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 382\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 383\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 384\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 385\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 386\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 387\u001b[39m \u001b[43m \u001b[49m\u001b[43mpublisher\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_publisher\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 388\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 389\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 390\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 391\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m bq_io.start_query_with_client(\n\u001b[32m 392\u001b[39m \u001b[38;5;28mself\u001b[39m.bqclient,\n\u001b[32m 393\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 401\u001b[39m session=session,\n\u001b[32m 402\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/_io/bigquery/__init__.py:395\u001b[39m, in \u001b[36mstart_query_with_client\u001b[39m\u001b[34m(bq_client, sql, job_config, location, project, timeout, metrics, query_with_job, job_retry, publisher, session)\u001b[39m\n\u001b[32m 385\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 386\u001b[39m publisher.publish(\n\u001b[32m 387\u001b[39m bigframes.core.events.BigQuerySentEvent(\n\u001b[32m 388\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 393\u001b[39m )\n\u001b[32m 394\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m395\u001b[39m results_iterator = \u001b[43mquery_job\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 397\u001b[39m publisher.publish(\n\u001b[32m 398\u001b[39m bigframes.core.events.BigQueryFinishedEvent(\n\u001b[32m 399\u001b[39m billing_project=query_job.project,\n\u001b[32m (...)\u001b[39m\u001b[32m 409\u001b[39m )\n\u001b[32m 410\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1797\u001b[39m, in \u001b[36mQueryJob.result\u001b[39m\u001b[34m(self, page_size, max_results, retry, timeout, start_index, job_retry)\u001b[39m\n\u001b[32m 1792\u001b[39m remaining_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1794\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remaining_timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1795\u001b[39m \u001b[38;5;66;03m# Since is_job_done() calls jobs.getQueryResults, which is a\u001b[39;00m\n\u001b[32m 1796\u001b[39m \u001b[38;5;66;03m# long-running API, don't delay the next request at all.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1797\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mis_job_done\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m 1798\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m 1799\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1800\u001b[39m \u001b[38;5;66;03m# Use a monotonic clock since we don't actually care about\u001b[39;00m\n\u001b[32m 1801\u001b[39m \u001b[38;5;66;03m# daylight savings or similar, just the elapsed time.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:156\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 152\u001b[39m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[32m 153\u001b[39m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[32m 154\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 155\u001b[39m \u001b[38;5;66;03m# defer to shared logic for handling errors\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m156\u001b[39m next_sleep = \u001b[43m_retry_error_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 157\u001b[39m \u001b[43m \u001b[49m\u001b[43mexc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 158\u001b[39m \u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[43m \u001b[49m\u001b[43merror_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 161\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 162\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 163\u001b[39m \u001b[43m \u001b[49m\u001b[43mexception_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 164\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 165\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 166\u001b[39m \u001b[38;5;66;03m# if exception not raised, sleep before next attempt\u001b[39;00m\n\u001b[32m 167\u001b[39m time.sleep(next_sleep)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_base.py:216\u001b[39m, in \u001b[36m_retry_error_helper\u001b[39m\u001b[34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m predicate_fn(exc):\n\u001b[32m 211\u001b[39m final_exc, source_exc = exc_factory_fn(\n\u001b[32m 212\u001b[39m error_list,\n\u001b[32m 213\u001b[39m RetryFailureReason.NON_RETRYABLE_ERROR,\n\u001b[32m 214\u001b[39m original_timeout,\n\u001b[32m 215\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m216\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m final_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msource_exc\u001b[39;00m\n\u001b[32m 217\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m on_error_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 218\u001b[39m on_error_fn(exc)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1766\u001b[39m, in \u001b[36mQueryJob.result..is_job_done\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 1760\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 1762\u001b[39m \u001b[38;5;66;03m# Call jobs.getQueryResults with max results set to 0 just to\u001b[39;00m\n\u001b[32m 1763\u001b[39m \u001b[38;5;66;03m# wait for the query to finish. Unlike most methods,\u001b[39;00m\n\u001b[32m 1764\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults hangs as long as it can to ensure we\u001b[39;00m\n\u001b[32m 1765\u001b[39m \u001b[38;5;66;03m# know when the query has finished as soon as possible.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1766\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_reload_query_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mreload_query_results_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1768\u001b[39m \u001b[38;5;66;03m# Even if the query is finished now according to\u001b[39;00m\n\u001b[32m 1769\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults, we'll want to reload the job status if\u001b[39;00m\n\u001b[32m 1770\u001b[39m \u001b[38;5;66;03m# it's not already DONE.\u001b[39;00m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1558\u001b[39m, in \u001b[36mQueryJob._reload_query_results\u001b[39m\u001b[34m(self, retry, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 1555\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(transport_timeout, (\u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mint\u001b[39m)):\n\u001b[32m 1556\u001b[39m transport_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1558\u001b[39m \u001b[38;5;28mself\u001b[39m._query_results = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_get_query_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1559\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mjob_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1560\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1561\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mproject\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1562\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1563\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1564\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtransport_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1565\u001b[39m \u001b[43m \u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1566\u001b[39m \u001b[43m \u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1567\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:2113\u001b[39m, in \u001b[36mClient._get_query_results\u001b[39m\u001b[34m(self, job_id, retry, project, timeout_ms, location, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 2109\u001b[39m \u001b[38;5;66;03m# This call is typically made in a polling loop that checks whether the\u001b[39;00m\n\u001b[32m 2110\u001b[39m \u001b[38;5;66;03m# job is complete (from QueryJob.done(), called ultimately from\u001b[39;00m\n\u001b[32m 2111\u001b[39m \u001b[38;5;66;03m# QueryJob.result()). So we don't need to poll here.\u001b[39;00m\n\u001b[32m 2112\u001b[39m span_attributes = {\u001b[33m\"\u001b[39m\u001b[33mpath\u001b[39m\u001b[33m\"\u001b[39m: path}\n\u001b[32m-> \u001b[39m\u001b[32m2113\u001b[39m resource = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_api\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2114\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2115\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mBigQuery.getQueryResults\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2116\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m=\u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2117\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mGET\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2118\u001b[39m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2119\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2120\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2121\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2122\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _QueryResults.from_api_repr(resource)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:861\u001b[39m, in \u001b[36mClient._call_api\u001b[39m\u001b[34m(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)\u001b[39m\n\u001b[32m 857\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m span_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 858\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m create_span(\n\u001b[32m 859\u001b[39m name=span_name, attributes=span_attributes, client=\u001b[38;5;28mself\u001b[39m, job_ref=job_ref\n\u001b[32m 860\u001b[39m ):\n\u001b[32m--> \u001b[39m\u001b[32m861\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 863\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m call()\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:156\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 152\u001b[39m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[32m 153\u001b[39m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[32m 154\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 155\u001b[39m \u001b[38;5;66;03m# defer to shared logic for handling errors\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m156\u001b[39m next_sleep = \u001b[43m_retry_error_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 157\u001b[39m \u001b[43m \u001b[49m\u001b[43mexc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 158\u001b[39m \u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[43m \u001b[49m\u001b[43merror_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 161\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 162\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 163\u001b[39m \u001b[43m \u001b[49m\u001b[43mexception_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 164\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 165\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 166\u001b[39m \u001b[38;5;66;03m# if exception not raised, sleep before next attempt\u001b[39;00m\n\u001b[32m 167\u001b[39m time.sleep(next_sleep)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_base.py:216\u001b[39m, in \u001b[36m_retry_error_helper\u001b[39m\u001b[34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m predicate_fn(exc):\n\u001b[32m 211\u001b[39m final_exc, source_exc = exc_factory_fn(\n\u001b[32m 212\u001b[39m error_list,\n\u001b[32m 213\u001b[39m RetryFailureReason.NON_RETRYABLE_ERROR,\n\u001b[32m 214\u001b[39m original_timeout,\n\u001b[32m 215\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m216\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m final_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msource_exc\u001b[39;00m\n\u001b[32m 217\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m on_error_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 218\u001b[39m on_error_fn(exc)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:494\u001b[39m, in \u001b[36mJSONConnection.api_request\u001b[39m\u001b[34m(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)\u001b[39m\n\u001b[32m 482\u001b[39m response = \u001b[38;5;28mself\u001b[39m._make_request(\n\u001b[32m 483\u001b[39m method=method,\n\u001b[32m 484\u001b[39m url=url,\n\u001b[32m (...)\u001b[39m\u001b[32m 490\u001b[39m extra_api_info=extra_api_info,\n\u001b[32m 491\u001b[39m )\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[32m200\u001b[39m <= response.status_code < \u001b[32m300\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m exceptions.from_http_response(response)\n\u001b[32m 496\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m expect_json \u001b[38;5;129;01mand\u001b[39;00m response.content:\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response.json()\n", + "\u001b[31mBadRequest\u001b[39m: 400 GET https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-dev/queries/9b17582d-da3b-47d9-947f-a1c5efa11dd9?maxResults=0&location=US&prettyPrint=false: Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named 'cv2'. Traceback (most recent call last):\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\n result = [\n ^\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \n func(*row)\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 43, in bigframes_handler\n return image_blur(*args)\n ^^^^^^^^^^^^^^^^^\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 5, in image_blur\n import cv2 as cv\nModuleNotFoundError: No module named 'cv2'\n\n\nLocation: US\nJob ID: 9b17582d-da3b-47d9-947f-a1c5efa11dd9\n [{'@type': 'type.googleapis.com/google.rpc.DebugInfo', 'detail': '[INVALID_INPUT] message=QUERY_ERROR: [Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\'cv2\\'. Traceback (most recent call last):\\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\\n result = [\\n ^\\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \\n func(*row)\\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 43, in bigframes_handler\\n return image_blur(*args)\\n ^^^^^^^^^^^^^^^^^\\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 5, in image_blur\\n import cv2 as cv\\nModuleNotFoundError: No module named \\'cv2\\'\\n] debug=code: \\t BAD_QUERY\\ndescription: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\ncause: USER_ERROR\\naddress: \"http://jfdaj38.prod.google.com:4901/task?handle=logs.7122.serving.shard-hals.cloud-dataengine.14304701181199 Partition description: __SHUFFLE1_RAND0/117 ShuffleByTableDef \\\\\\'__SHUFFLE0\\\\\\' shuffle \\\\t { spec { num_sources: 1 num_sinks: 500 } partitioning { rand { num_partitions: 500 } } config { group_id: 3066382614537913598 shuffle_id: 0 precreated_mfs_path: \\\\\"/mfs/jf/home/cloud-dataengine/serving-shuffle/ttl=3d/d24bfe88e472d688_shuffle_cache/00\\\\\" } } sink_id: 116\"\\nstatus_proto {\\n code: 11\\n space: \"generic\"\\n message: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\n}\\nerror_details {\\n runtime_error {\\n query_error {\\n }\\n }\\n debug_info {\\n error_message_template: \"Encounter an error when invoking the function: $0\"\\n error_id: 3276645920\\n }\\n}\\n errorProto=code: \"QUERY_ERROR\"\\nargument: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\nlocation_type: OTHER\\nlocation: \"query\"\\n\\n\\tat com.google.cloud.helix.common.Exceptions.fromProto(Exceptions.java:1983)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl.mapDremelErrorsTohelixException(QueryExecutorImpl.java:1235)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:810)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:736)\\n\\tat com.google.net.rpc3.stream.RpcMessageCallback$ForwardingRpcMessageCallback.onMessage(RpcMessageCallback.java:128)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.processMessageUnlocked(RpcStreamInternalContext.java:1861)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksInternalUnlocked(RpcStreamInternalContext.java:2916)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksUnlocked(RpcStreamInternalContext.java:2842)\\n\\tat com.google.net.eventmanager.AbstractFutureTask$Sync.innerRun(AbstractFutureTask.java:259)\\n\\tat com.google.net.eventmanager.AbstractFutureTask.run(AbstractFutureTask.java:120)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTaskTraced(EventManagerImpl.java:903)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTask(EventManagerImpl.java:895)\\n\\tat com.google.net.eventmanager.EventManagerImpl.internalRunWorkerLoop(EventManagerImpl.java:1322)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runWorkerLoop(EventManagerImpl.java:1213)\\n\\tat com.google.net.eventmanager.WorkerThreadInfo.runWorkerLoop(WorkerThreadInfo.java:153)\\n\\tat com.google.net.eventmanager.EventManagerImpl$WorkerThread.run(EventManagerImpl.java:2006)\\n'}]" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[11]\u001b[39m\u001b[32m, line 72\u001b[39m\n\u001b[32m 68\u001b[39m df_image[\"blurred\"] = apply_transformation(\n\u001b[32m 69\u001b[39m df_image[\u001b[33m\"image\"\u001b[39m], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n\u001b[32m 70\u001b[39m image_blur, \u001b[32m20\u001b[39m, \u001b[32m20\u001b[39m\n\u001b[32m 71\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m72\u001b[39m df_image[[\u001b[33m\"image\"\u001b[39m, \u001b[33m\"blurred\"\u001b[39m]]\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/decorator.py:235\u001b[39m, in \u001b[36mdecorate..fun\u001b[39m\u001b[34m(*args, **kw)\u001b[39m\n\u001b[32m 233\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwsyntax:\n\u001b[32m 234\u001b[39m args, kw = fix(args, kw, sig)\n\u001b[32m--> \u001b[39m\u001b[32m235\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcaller\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextras\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183\u001b[39m, in \u001b[36mmethod_logger..outer_wrapper..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 180\u001b[39m _call_stack.append(full_method_name)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m183\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mNotImplementedError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Log method parameters that are implemented in pandas but either missing (TypeError)\u001b[39;00m\n\u001b[32m 186\u001b[39m \u001b[38;5;66;03m# or not fully supported (NotImplementedError) in BigFrames.\u001b[39;00m\n\u001b[32m 187\u001b[39m \u001b[38;5;66;03m# Logging is currently supported only when we can access the bqclient through\u001b[39;00m\n\u001b[32m 188\u001b[39m \u001b[38;5;66;03m# _block.session.bqclient.\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_call_stack) == \u001b[32m1\u001b[39m:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:807\u001b[39m, in \u001b[36mDataFrame.__repr__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 802\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m formatter.repr_query_job(\u001b[38;5;28mself\u001b[39m._compute_dry_run())\n\u001b[32m 804\u001b[39m \u001b[38;5;66;03m# TODO(swast): pass max_columns and get the true column count back. Maybe\u001b[39;00m\n\u001b[32m 805\u001b[39m \u001b[38;5;66;03m# get 1 more column than we have requested so that pandas can add the\u001b[39;00m\n\u001b[32m 806\u001b[39m \u001b[38;5;66;03m# ... for us?\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m807\u001b[39m pandas_df, row_count, query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_block\u001b[49m\u001b[43m.\u001b[49m\u001b[43mretrieve_repr_request_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 808\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_results\u001b[49m\n\u001b[32m 809\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 811\u001b[39m \u001b[38;5;28mself\u001b[39m._set_internal_query_job(query_job)\n\u001b[32m 812\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mbigframes\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdisplay\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m plaintext\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/blocks.py:1615\u001b[39m, in \u001b[36mBlock.retrieve_repr_request_results\u001b[39m\u001b[34m(self, max_results)\u001b[39m\n\u001b[32m 1613\u001b[39m \u001b[38;5;66;03m# head caches full underlying expression, so row_count will be free after\u001b[39;00m\n\u001b[32m 1614\u001b[39m executor = \u001b[38;5;28mself\u001b[39m.session._executor\n\u001b[32m-> \u001b[39m\u001b[32m1615\u001b[39m \u001b[43mexecutor\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcached\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1616\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1617\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecutors\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCacheConfig\u001b[49m\u001b[43m(\u001b[49m\u001b[43moptimize_for\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhead\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mif_cached\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreuse-strict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1618\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1619\u001b[39m head_result = \u001b[38;5;28mself\u001b[39m.session._executor.execute(\n\u001b[32m 1620\u001b[39m \u001b[38;5;28mself\u001b[39m.expr.slice(start=\u001b[38;5;28;01mNone\u001b[39;00m, stop=max_results, step=\u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[32m 1621\u001b[39m execution_spec.ExecutionSpec(\n\u001b[32m (...)\u001b[39m\u001b[32m 1624\u001b[39m ),\n\u001b[32m 1625\u001b[39m )\n\u001b[32m 1626\u001b[39m row_count = (\n\u001b[32m 1627\u001b[39m \u001b[38;5;28mself\u001b[39m.session._executor.execute(\n\u001b[32m 1628\u001b[39m \u001b[38;5;28mself\u001b[39m.expr.row_count(),\n\u001b[32m (...)\u001b[39m\u001b[32m 1635\u001b[39m .to_py_scalar()\n\u001b[32m 1636\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:348\u001b[39m, in \u001b[36mBigQueryCachingExecutor.cached\u001b[39m\u001b[34m(self, array_value, config)\u001b[39m\n\u001b[32m 346\u001b[39m \u001b[38;5;28mself\u001b[39m._cache_with_session_awareness(array_value)\n\u001b[32m 347\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m config.optimize_for == \u001b[33m\"\u001b[39m\u001b[33mhead\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m348\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_cache_with_offsets\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 350\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(config.optimize_for, executor.HierarchicalKey)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:504\u001b[39m, in \u001b[36mBigQueryCachingExecutor._cache_with_offsets\u001b[39m\u001b[34m(self, array_value)\u001b[39m\n\u001b[32m 500\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Executes the query and uses the resulting table to rewrite future executions.\"\"\"\u001b[39;00m\n\u001b[32m 501\u001b[39m execution_spec = ex_spec.ExecutionSpec(\n\u001b[32m 502\u001b[39m destination_spec=ex_spec.CacheSpec(cluster_cols=\u001b[38;5;28mtuple\u001b[39m())\n\u001b[32m 503\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m504\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 505\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 506\u001b[39m \u001b[43m \u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 507\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:169\u001b[39m, in \u001b[36mBigQueryCachingExecutor.execute\u001b[39m\u001b[34m(self, array_value, execution_spec)\u001b[39m\n\u001b[32m 162\u001b[39m \u001b[38;5;28mself\u001b[39m._publisher.publish(\n\u001b[32m 163\u001b[39m bigframes.core.events.ExecutionFinished(\n\u001b[32m 164\u001b[39m result=result,\n\u001b[32m 165\u001b[39m )\n\u001b[32m 166\u001b[39m )\n\u001b[32m 167\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[32m--> \u001b[39m\u001b[32m169\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_execute_plan_gbq\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 170\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m.\u001b[49m\u001b[43mnode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 171\u001b[39m \u001b[43m \u001b[49m\u001b[43mordered\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mordered\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[43m \u001b[49m\u001b[43mpeek\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpeek\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 173\u001b[39m \u001b[43m \u001b[49m\u001b[43mcache_spec\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdestination_spec\u001b[49m\n\u001b[32m 174\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdestination_spec\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mex_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCacheSpec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 175\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 176\u001b[39m \u001b[43m \u001b[49m\u001b[43mmust_create_table\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpromise_under_10gb\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 177\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 178\u001b[39m \u001b[38;5;66;03m# post steps: export\u001b[39;00m\n\u001b[32m 179\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(execution_spec.destination_spec, ex_spec.GcsOutputSpec):\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:687\u001b[39m, in \u001b[36mBigQueryCachingExecutor._execute_plan_gbq\u001b[39m\u001b[34m(self, plan, ordered, peek, cache_spec, must_create_table)\u001b[39m\n\u001b[32m 679\u001b[39m iterator, query_job = \u001b[38;5;28mself\u001b[39m._run_execute_query(\n\u001b[32m 680\u001b[39m sql=compiled.sql,\n\u001b[32m 681\u001b[39m job_config=job_config,\n\u001b[32m 682\u001b[39m query_with_job=(destination_table \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[32m 683\u001b[39m session=plan.session,\n\u001b[32m 684\u001b[39m )\n\u001b[32m 685\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m iterator, query_job, compiled\n\u001b[32m--> \u001b[39m\u001b[32m687\u001b[39m iterator, query_job, compiled = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_compile_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_with_compiler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 689\u001b[39m \u001b[38;5;66;03m# might have more columns than og schema, for hidden ordering columns\u001b[39;00m\n\u001b[32m 690\u001b[39m compiled_schema = compiled.sql_schema\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:451\u001b[39m, in \u001b[36mBigQueryCachingExecutor._compile_with_fallback\u001b[39m\u001b[34m(self, run_fn)\u001b[39m\n\u001b[32m 449\u001b[39m compiler_id = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00muuid.uuid1().hex[:\u001b[32m12\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 450\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m451\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrun_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43msqlglot\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompiler_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompiler_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 452\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m google.cloud.exceptions.BadRequest \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 453\u001b[39m msg = bfe.format_message(\n\u001b[32m 454\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCompiler ID \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m: BadRequest on sqlglot. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 455\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFalling back to ibis. Details: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me.message\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 456\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:679\u001b[39m, in \u001b[36mBigQueryCachingExecutor._execute_plan_gbq..run_with_compiler\u001b[39m\u001b[34m(compiler_name, compiler_id)\u001b[39m\n\u001b[32m 675\u001b[39m job_config.labels[\u001b[33m\"\u001b[39m\u001b[33mbigframes-dtypes\u001b[39m\u001b[33m\"\u001b[39m] = compiled.encoded_type_refs\n\u001b[32m 676\u001b[39m job_config.labels[\u001b[33m\"\u001b[39m\u001b[33mbigframes-compiler\u001b[39m\u001b[33m\"\u001b[39m] = (\n\u001b[32m 677\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m compiler_id \u001b[38;5;28;01melse\u001b[39;00m compiler_name\n\u001b[32m 678\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m679\u001b[39m iterator, query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_run_execute_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 680\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompiled\u001b[49m\u001b[43m.\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 681\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 682\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdestination_table\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 683\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[43mplan\u001b[49m\u001b[43m.\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 684\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 685\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m iterator, query_job, compiled\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:378\u001b[39m, in \u001b[36mBigQueryCachingExecutor._run_execute_query\u001b[39m\u001b[34m(self, sql, job_config, query_with_job, session)\u001b[39m\n\u001b[32m 375\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 376\u001b[39m \u001b[38;5;66;03m# Trick the type checker into thinking we got a literal.\u001b[39;00m\n\u001b[32m 377\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m query_with_job:\n\u001b[32m--> \u001b[39m\u001b[32m378\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbq_io\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstart_query_with_client\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 379\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbqclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 380\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 381\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 382\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 383\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 384\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 385\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 386\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 387\u001b[39m \u001b[43m \u001b[49m\u001b[43mpublisher\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_publisher\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 388\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 389\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 390\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 391\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m bq_io.start_query_with_client(\n\u001b[32m 392\u001b[39m \u001b[38;5;28mself\u001b[39m.bqclient,\n\u001b[32m 393\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 401\u001b[39m session=session,\n\u001b[32m 402\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/_io/bigquery/__init__.py:395\u001b[39m, in \u001b[36mstart_query_with_client\u001b[39m\u001b[34m(bq_client, sql, job_config, location, project, timeout, metrics, query_with_job, job_retry, publisher, session)\u001b[39m\n\u001b[32m 385\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 386\u001b[39m publisher.publish(\n\u001b[32m 387\u001b[39m bigframes.core.events.BigQuerySentEvent(\n\u001b[32m 388\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 393\u001b[39m )\n\u001b[32m 394\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m395\u001b[39m results_iterator = \u001b[43mquery_job\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 397\u001b[39m publisher.publish(\n\u001b[32m 398\u001b[39m bigframes.core.events.BigQueryFinishedEvent(\n\u001b[32m 399\u001b[39m billing_project=query_job.project,\n\u001b[32m (...)\u001b[39m\u001b[32m 409\u001b[39m )\n\u001b[32m 410\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1797\u001b[39m, in \u001b[36mQueryJob.result\u001b[39m\u001b[34m(self, page_size, max_results, retry, timeout, start_index, job_retry)\u001b[39m\n\u001b[32m 1792\u001b[39m remaining_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1794\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remaining_timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1795\u001b[39m \u001b[38;5;66;03m# Since is_job_done() calls jobs.getQueryResults, which is a\u001b[39;00m\n\u001b[32m 1796\u001b[39m \u001b[38;5;66;03m# long-running API, don't delay the next request at all.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1797\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mis_job_done\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m 1798\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m 1799\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1800\u001b[39m \u001b[38;5;66;03m# Use a monotonic clock since we don't actually care about\u001b[39;00m\n\u001b[32m 1801\u001b[39m \u001b[38;5;66;03m# daylight savings or similar, just the elapsed time.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1766\u001b[39m, in \u001b[36mQueryJob.result..is_job_done\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 1760\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 1762\u001b[39m \u001b[38;5;66;03m# Call jobs.getQueryResults with max results set to 0 just to\u001b[39;00m\n\u001b[32m 1763\u001b[39m \u001b[38;5;66;03m# wait for the query to finish. Unlike most methods,\u001b[39;00m\n\u001b[32m 1764\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults hangs as long as it can to ensure we\u001b[39;00m\n\u001b[32m 1765\u001b[39m \u001b[38;5;66;03m# know when the query has finished as soon as possible.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1766\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_reload_query_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mreload_query_results_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1768\u001b[39m \u001b[38;5;66;03m# Even if the query is finished now according to\u001b[39;00m\n\u001b[32m 1769\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults, we'll want to reload the job status if\u001b[39;00m\n\u001b[32m 1770\u001b[39m \u001b[38;5;66;03m# it's not already DONE.\u001b[39;00m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1558\u001b[39m, in \u001b[36mQueryJob._reload_query_results\u001b[39m\u001b[34m(self, retry, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 1555\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(transport_timeout, (\u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mint\u001b[39m)):\n\u001b[32m 1556\u001b[39m transport_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1558\u001b[39m \u001b[38;5;28mself\u001b[39m._query_results = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_get_query_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1559\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mjob_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1560\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1561\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mproject\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1562\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1563\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1564\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtransport_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1565\u001b[39m \u001b[43m \u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1566\u001b[39m \u001b[43m \u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1567\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:2113\u001b[39m, in \u001b[36mClient._get_query_results\u001b[39m\u001b[34m(self, job_id, retry, project, timeout_ms, location, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 2109\u001b[39m \u001b[38;5;66;03m# This call is typically made in a polling loop that checks whether the\u001b[39;00m\n\u001b[32m 2110\u001b[39m \u001b[38;5;66;03m# job is complete (from QueryJob.done(), called ultimately from\u001b[39;00m\n\u001b[32m 2111\u001b[39m \u001b[38;5;66;03m# QueryJob.result()). So we don't need to poll here.\u001b[39;00m\n\u001b[32m 2112\u001b[39m span_attributes = {\u001b[33m\"\u001b[39m\u001b[33mpath\u001b[39m\u001b[33m\"\u001b[39m: path}\n\u001b[32m-> \u001b[39m\u001b[32m2113\u001b[39m resource = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_api\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2114\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2115\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mBigQuery.getQueryResults\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2116\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m=\u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2117\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mGET\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2118\u001b[39m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2119\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2120\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2121\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2122\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _QueryResults.from_api_repr(resource)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:861\u001b[39m, in \u001b[36mClient._call_api\u001b[39m\u001b[34m(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)\u001b[39m\n\u001b[32m 857\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m span_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 858\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m create_span(\n\u001b[32m 859\u001b[39m name=span_name, attributes=span_attributes, client=\u001b[38;5;28mself\u001b[39m, job_ref=job_ref\n\u001b[32m 860\u001b[39m ):\n\u001b[32m--> \u001b[39m\u001b[32m861\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 863\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m call()\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:482\u001b[39m, in \u001b[36mJSONConnection.api_request\u001b[39m\u001b[34m(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)\u001b[39m\n\u001b[32m 479\u001b[39m data = json.dumps(data)\n\u001b[32m 480\u001b[39m content_type = \u001b[33m\"\u001b[39m\u001b[33mapplication/json\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m--> \u001b[39m\u001b[32m482\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 483\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 484\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 485\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 486\u001b[39m \u001b[43m \u001b[49m\u001b[43mcontent_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcontent_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 487\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 488\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget_object\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_target_object\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 489\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 490\u001b[39m \u001b[43m \u001b[49m\u001b[43mextra_api_info\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_api_info\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 491\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[32m200\u001b[39m <= response.status_code < \u001b[32m300\u001b[39m:\n\u001b[32m 494\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m exceptions.from_http_response(response)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:341\u001b[39m, in \u001b[36mJSONConnection._make_request\u001b[39m\u001b[34m(self, method, url, data, content_type, headers, target_object, timeout, extra_api_info)\u001b[39m\n\u001b[32m 338\u001b[39m headers[CLIENT_INFO_HEADER] = \u001b[38;5;28mself\u001b[39m.user_agent\n\u001b[32m 339\u001b[39m headers[\u001b[33m\"\u001b[39m\u001b[33mUser-Agent\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mself\u001b[39m.user_agent\n\u001b[32m--> \u001b[39m\u001b[32m341\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_do_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 342\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_object\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\n\u001b[32m 343\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:379\u001b[39m, in \u001b[36mJSONConnection._do_request\u001b[39m\u001b[34m(self, method, url, headers, data, target_object, timeout)\u001b[39m\n\u001b[32m 345\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_do_request\u001b[39m(\n\u001b[32m 346\u001b[39m \u001b[38;5;28mself\u001b[39m, method, url, headers, data, target_object, timeout=_DEFAULT_TIMEOUT\n\u001b[32m 347\u001b[39m ): \u001b[38;5;66;03m# pylint: disable=unused-argument\u001b[39;00m\n\u001b[32m 348\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Low-level helper: perform the actual API request over HTTP.\u001b[39;00m\n\u001b[32m 349\u001b[39m \n\u001b[32m 350\u001b[39m \u001b[33;03m Allows batch context managers to override and defer a request.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 377\u001b[39m \u001b[33;03m :returns: The HTTP response.\u001b[39;00m\n\u001b[32m 378\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m379\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mhttp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 380\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\n\u001b[32m 381\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/auth/transport/requests.py:543\u001b[39m, in \u001b[36mAuthorizedSession.request\u001b[39m\u001b[34m(self, method, url, data, headers, max_allowed_time, timeout, **kwargs)\u001b[39m\n\u001b[32m 541\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m TimeoutGuard(remaining_time) \u001b[38;5;28;01mas\u001b[39;00m guard:\n\u001b[32m 542\u001b[39m _helpers.request_log(_LOGGER, method, url, data, headers)\n\u001b[32m--> \u001b[39m\u001b[32m543\u001b[39m response = \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mAuthorizedSession\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 544\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 545\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 546\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 547\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrequest_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 548\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 549\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m 550\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 551\u001b[39m remaining_time = guard.remaining_timeout\n\u001b[32m 553\u001b[39m \u001b[38;5;66;03m# If the response indicated that the credentials needed to be\u001b[39;00m\n\u001b[32m 554\u001b[39m \u001b[38;5;66;03m# refreshed, then refresh the credentials and re-attempt the\u001b[39;00m\n\u001b[32m 555\u001b[39m \u001b[38;5;66;03m# request.\u001b[39;00m\n\u001b[32m 556\u001b[39m \u001b[38;5;66;03m# A stored token may expire between the time it is retrieved and\u001b[39;00m\n\u001b[32m 557\u001b[39m \u001b[38;5;66;03m# the time the request is made, so we may need to try twice.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/requests/sessions.py:592\u001b[39m, in \u001b[36mSession.request\u001b[39m\u001b[34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[39m\n\u001b[32m 587\u001b[39m send_kwargs = {\n\u001b[32m 588\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mtimeout\u001b[39m\u001b[33m\"\u001b[39m: timeout,\n\u001b[32m 589\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mallow_redirects\u001b[39m\u001b[33m\"\u001b[39m: allow_redirects,\n\u001b[32m 590\u001b[39m }\n\u001b[32m 591\u001b[39m send_kwargs.update(settings)\n\u001b[32m--> \u001b[39m\u001b[32m592\u001b[39m resp = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 594\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/requests/sessions.py:706\u001b[39m, in \u001b[36mSession.send\u001b[39m\u001b[34m(self, request, **kwargs)\u001b[39m\n\u001b[32m 703\u001b[39m start = preferred_clock()\n\u001b[32m 705\u001b[39m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m706\u001b[39m r = \u001b[43madapter\u001b[49m\u001b[43m.\u001b[49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 708\u001b[39m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[32m 709\u001b[39m elapsed = preferred_clock() - start\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/requests/adapters.py:645\u001b[39m, in \u001b[36mHTTPAdapter.send\u001b[39m\u001b[34m(self, request, stream, timeout, verify, cert, proxies)\u001b[39m\n\u001b[32m 642\u001b[39m timeout = TimeoutSauce(connect=timeout, read=timeout)\n\u001b[32m 644\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m645\u001b[39m resp = \u001b[43mconn\u001b[49m\u001b[43m.\u001b[49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 646\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 647\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 648\u001b[39m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m.\u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 649\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m.\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 650\u001b[39m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 651\u001b[39m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 652\u001b[39m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 653\u001b[39m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 654\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 655\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 656\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 657\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 659\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[32m 660\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request=request)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/urllib3/connectionpool.py:787\u001b[39m, in \u001b[36mHTTPConnectionPool.urlopen\u001b[39m\u001b[34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[39m\n\u001b[32m 784\u001b[39m response_conn = conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 786\u001b[39m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m787\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 788\u001b[39m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 789\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 790\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 791\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 792\u001b[39m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 793\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 794\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 795\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 796\u001b[39m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[43m=\u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 797\u001b[39m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 798\u001b[39m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 799\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 800\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 802\u001b[39m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[32m 803\u001b[39m clean_exit = \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/urllib3/connectionpool.py:534\u001b[39m, in \u001b[36mHTTPConnectionPool._make_request\u001b[39m\u001b[34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[39m\n\u001b[32m 532\u001b[39m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[32m 533\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m534\u001b[39m response = \u001b[43mconn\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 535\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 536\u001b[39m \u001b[38;5;28mself\u001b[39m._raise_timeout(err=e, url=url, timeout_value=read_timeout)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/urllib3/connection.py:571\u001b[39m, in \u001b[36mHTTPConnection.getresponse\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 568\u001b[39m _shutdown = \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m.sock, \u001b[33m\"\u001b[39m\u001b[33mshutdown\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m 570\u001b[39m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m571\u001b[39m httplib_response = \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 573\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 574\u001b[39m assert_header_parsing(httplib_response.msg)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/http/client.py:1428\u001b[39m, in \u001b[36mHTTPConnection.getresponse\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1426\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1427\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1428\u001b[39m \u001b[43mresponse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1429\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[32m 1430\u001b[39m \u001b[38;5;28mself\u001b[39m.close()\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/http/client.py:331\u001b[39m, in \u001b[36mHTTPResponse.begin\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 329\u001b[39m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[32m 330\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m331\u001b[39m version, status, reason = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 332\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m status != CONTINUE:\n\u001b[32m 333\u001b[39m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/http/client.py:292\u001b[39m, in \u001b[36mHTTPResponse._read_status\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 291\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m292\u001b[39m line = \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[33m\"\u001b[39m\u001b[33miso-8859-1\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 293\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) > _MAXLINE:\n\u001b[32m 294\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[33m\"\u001b[39m\u001b[33mstatus line\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/socket.py:719\u001b[39m, in \u001b[36mSocketIO.readinto\u001b[39m\u001b[34m(self, b)\u001b[39m\n\u001b[32m 717\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mcannot read from timed out object\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 718\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m719\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sock\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 720\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[32m 721\u001b[39m \u001b[38;5;28mself\u001b[39m._timeout_occurred = \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/ssl.py:1304\u001b[39m, in \u001b[36mSSLSocket.recv_into\u001b[39m\u001b[34m(self, buffer, nbytes, flags)\u001b[39m\n\u001b[32m 1300\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m flags != \u001b[32m0\u001b[39m:\n\u001b[32m 1301\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 1302\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 1303\u001b[39m \u001b[38;5;28mself\u001b[39m.\u001b[34m__class__\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1304\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1305\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1306\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m().recv_into(buffer, nbytes, flags)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/ssl.py:1138\u001b[39m, in \u001b[36mSSLSocket.read\u001b[39m\u001b[34m(self, len, buffer)\u001b[39m\n\u001b[32m 1136\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1137\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1138\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sslobj\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1139\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1140\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._sslobj.read(\u001b[38;5;28mlen\u001b[39m)\n", + "\u001b[31mKeyboardInterrupt\u001b[39m: " + ] } ] }, @@ -704,10 +703,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", "default model will be removed in BigFrames 3.0. Please supply an\n", "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n" + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/__init__.py:437: FutureWarning: You are using the BigFrames session default connection: bigframes-\n", + "default-connection, which can be different from the\n", + "BigQuery project default connection. This default\n", + "connection may change in the future.\n", + " warnings.warn(msg, category=FutureWarning)\n" ] } ] @@ -734,97 +738,61 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "ename": "BadRequest", + "evalue": "400 GET https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-dev/queries/5e86e836-354e-439f-8300-f6186ff77b79?maxResults=0&location=US&prettyPrint=false: Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named 'cv2'. Traceback (most recent call last):\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\n result = [\n ^\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \n func(*row)\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 43, in bigframes_handler\n return image_blur(*args)\n ^^^^^^^^^^^^^^^^^\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 5, in image_blur\n import cv2 as cv\nModuleNotFoundError: No module named 'cv2'\n\n\nLocation: US\nJob ID: 5e86e836-354e-439f-8300-f6186ff77b79\n [{'@type': 'type.googleapis.com/google.rpc.DebugInfo', 'detail': '[INVALID_INPUT] message=QUERY_ERROR: [Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\'cv2\\'. Traceback (most recent call last):\\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\\n result = [\\n ^\\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \\n func(*row)\\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 43, in bigframes_handler\\n return image_blur(*args)\\n ^^^^^^^^^^^^^^^^^\\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 5, in image_blur\\n import cv2 as cv\\nModuleNotFoundError: No module named \\'cv2\\'\\n] debug=code: \\t BAD_QUERY\\ndescription: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\ncause: USER_ERROR\\naddress: \"http://jfbxw15.prod.google.com:4901/task?handle=logs.21974.serving.shard-hals.cloud-dataengine.14302809449245 Partition description: __SHUFFLE1/256 ShuffleByTableDef \\\\\\'__SHUFFLE0\\\\\\' shuffle \\\\t { spec { num_sources: 1 num_sinks: 500 } partitioning { rand { num_partitions: 500 } } config { group_id: 173353402696091398 shuffle_id: 0 precreated_mfs_path: \\\\\"/mfs/jf/home/cloud-dataengine/serving-shuffle/ttl=3d/a7f7e000544e4706_shuffle_cache/02\\\\\" } } sink_id: 256\"\\nstatus_proto {\\n code: 11\\n space: \"generic\"\\n message: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\n}\\nerror_details {\\n runtime_error {\\n query_error {\\n }\\n }\\n debug_info {\\n error_message_template: \"Encounter an error when invoking the function: $0\"\\n error_id: 3276645920\\n }\\n}\\n errorProto=code: \"QUERY_ERROR\"\\nargument: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\nlocation_type: OTHER\\nlocation: \"query\"\\n\\n\\tat com.google.cloud.helix.common.Exceptions.fromProto(Exceptions.java:1983)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl.mapDremelErrorsTohelixException(QueryExecutorImpl.java:1235)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:810)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:736)\\n\\tat com.google.net.rpc3.stream.RpcMessageCallback$ForwardingRpcMessageCallback.onMessage(RpcMessageCallback.java:128)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.processMessageUnlocked(RpcStreamInternalContext.java:1861)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksInternalUnlocked(RpcStreamInternalContext.java:2916)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksUnlocked(RpcStreamInternalContext.java:2842)\\n\\tat com.google.net.eventmanager.AbstractFutureTask$Sync.innerRun(AbstractFutureTask.java:259)\\n\\tat com.google.net.eventmanager.AbstractFutureTask.run(AbstractFutureTask.java:120)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTaskTraced(EventManagerImpl.java:903)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTask(EventManagerImpl.java:895)\\n\\tat com.google.net.eventmanager.EventManagerImpl.internalRunWorkerLoop(EventManagerImpl.java:1322)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runWorkerLoop(EventManagerImpl.java:1213)\\n\\tat com.google.net.eventmanager.WorkerThreadInfo.runWorkerLoop(WorkerThreadInfo.java:153)\\n\\tat com.google.net.eventmanager.EventManagerImpl$WorkerThread.run(EventManagerImpl.java:2006)\\n'}]", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mBadRequest\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Ask the same question on the images\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m answer = gemini.predict(df_image, prompt=[\u001b[33m\"what item is it?\"\u001b[39m, df_image[\u001b[33m\"image\"\u001b[39m]])\n\u001b[32m 3\u001b[39m answer[[\u001b[33m\"ml_generate_text_llm_result\"\u001b[39m, \u001b[33m\"image\"\u001b[39m]]\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183\u001b[39m, in \u001b[36mmethod_logger..outer_wrapper..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 180\u001b[39m _call_stack.append(full_method_name)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m183\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mNotImplementedError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Log method parameters that are implemented in pandas but either missing (TypeError)\u001b[39;00m\n\u001b[32m 186\u001b[39m \u001b[38;5;66;03m# or not fully supported (NotImplementedError) in BigFrames.\u001b[39;00m\n\u001b[32m 187\u001b[39m \u001b[38;5;66;03m# Logging is currently supported only when we can access the bqclient through\u001b[39;00m\n\u001b[32m 188\u001b[39m \u001b[38;5;66;03m# _block.session.bqclient.\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_call_stack) == \u001b[32m1\u001b[39m:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/ml/llm.py:764\u001b[39m, in \u001b[36mGeminiTextGenerator.predict\u001b[39m\u001b[34m(self, X, temperature, max_output_tokens, top_k, top_p, ground_with_google_search, max_retries, prompt, output_schema)\u001b[39m\n\u001b[32m 756\u001b[39m options[\u001b[33m\"\u001b[39m\u001b[33moutput_schema\u001b[39m\u001b[33m\"\u001b[39m] = output_schema\n\u001b[32m 757\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._predict_and_retry(\n\u001b[32m 758\u001b[39m core.BqmlModel.generate_table_tvf,\n\u001b[32m 759\u001b[39m X,\n\u001b[32m 760\u001b[39m options=options,\n\u001b[32m 761\u001b[39m max_retries=max_retries,\n\u001b[32m 762\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m764\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predict_and_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 765\u001b[39m \u001b[43m \u001b[49m\u001b[43mcore\u001b[49m\u001b[43m.\u001b[49m\u001b[43mBqmlModel\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate_text_tvf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 766\u001b[39m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 767\u001b[39m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 768\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 769\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/ml/base.py:282\u001b[39m, in \u001b[36mRetriableRemotePredictor._predict_and_retry\u001b[39m\u001b[34m(self, bqml_model_predict_tvf, X, options, max_retries)\u001b[39m\n\u001b[32m 279\u001b[39m warnings.warn(msg, category=\u001b[38;5;167;01mRuntimeWarning\u001b[39;00m)\n\u001b[32m 280\u001b[39m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m282\u001b[39m df = \u001b[43mbqml_model_predict_tvf\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtvf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_bqml_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf_fail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 284\u001b[39m success = df[bqml_model_predict_tvf.status_col].str.len() == \u001b[32m0\u001b[39m\n\u001b[32m 285\u001b[39m df_succ = df[success]\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/ml/core.py:197\u001b[39m, in \u001b[36mBqmlModel.generate_text\u001b[39m\u001b[34m(self, input_data, options)\u001b[39m\n\u001b[32m 191\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mgenerate_text\u001b[39m(\n\u001b[32m 192\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 193\u001b[39m input_data: bpd.DataFrame,\n\u001b[32m 194\u001b[39m options: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Union[\u001b[38;5;28mint\u001b[39m, \u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mbool\u001b[39m]],\n\u001b[32m 195\u001b[39m ) -> bpd.DataFrame:\n\u001b[32m 196\u001b[39m options[\u001b[33m\"\u001b[39m\u001b[33mflatten_json_output\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m197\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_apply_ml_tvf\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 198\u001b[39m \u001b[43m \u001b[49m\u001b[43minput_data\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 199\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msource_sql\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sql_generator\u001b[49m\u001b[43m.\u001b[49m\u001b[43mml_generate_text\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 200\u001b[39m \u001b[43m \u001b[49m\u001b[43msource_sql\u001b[49m\u001b[43m=\u001b[49m\u001b[43msource_sql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 201\u001b[39m \u001b[43m \u001b[49m\u001b[43mstruct_options\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 202\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 203\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/ml/core.py:103\u001b[39m, in \u001b[36mBqmlModel._apply_ml_tvf\u001b[39m\u001b[34m(self, input_data, apply_sql_tvf)\u001b[39m\n\u001b[32m 98\u001b[39m input_sql, index_col_ids, index_labels = input_data._to_sql_query(\n\u001b[32m 99\u001b[39m include_index=\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 100\u001b[39m )\n\u001b[32m 102\u001b[39m result_sql = apply_sql_tvf(input_sql)\n\u001b[32m--> \u001b[39m\u001b[32m103\u001b[39m df = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_session\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_gbq_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 104\u001b[39m \u001b[43m \u001b[49m\u001b[43mresult_sql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 105\u001b[39m \u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[43m=\u001b[49m\u001b[43mindex_col_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 106\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Many ML methods use nested JSON, which isn't yet compatible with\u001b[39;49;00m\n\u001b[32m 107\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# joining local results. Also, there is a chance that the results\u001b[39;49;00m\n\u001b[32m 108\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# are greater than 10 GB.\u001b[39;49;00m\n\u001b[32m 109\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# TODO(b/395912450): Once the limitations with local data are\u001b[39;49;00m\n\u001b[32m 110\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# resolved, consider setting allow_large_results only when expected\u001b[39;49;00m\n\u001b[32m 111\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# data size is large.\u001b[39;49;00m\n\u001b[32m 112\u001b[39m \u001b[43m \u001b[49m\u001b[43mallow_large_results\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 113\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 114\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m df._has_index:\n\u001b[32m 115\u001b[39m df.index.names = index_labels\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183\u001b[39m, in \u001b[36mmethod_logger..outer_wrapper..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 180\u001b[39m _call_stack.append(full_method_name)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m183\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mNotImplementedError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Log method parameters that are implemented in pandas but either missing (TypeError)\u001b[39;00m\n\u001b[32m 186\u001b[39m \u001b[38;5;66;03m# or not fully supported (NotImplementedError) in BigFrames.\u001b[39;00m\n\u001b[32m 187\u001b[39m \u001b[38;5;66;03m# Logging is currently supported only when we can access the bqclient through\u001b[39;00m\n\u001b[32m 188\u001b[39m \u001b[38;5;66;03m# _block.session.bqclient.\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_call_stack) == \u001b[32m1\u001b[39m:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/__init__.py:780\u001b[39m, in \u001b[36mSession.read_gbq_query\u001b[39m\u001b[34m(self, query, index_col, columns, configuration, max_results, use_cache, col_order, filters, dry_run, allow_large_results)\u001b[39m\n\u001b[32m 777\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m allow_large_results \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 778\u001b[39m allow_large_results = bigframes._config.options._allow_large_results\n\u001b[32m--> \u001b[39m\u001b[32m780\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_loader\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_gbq_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore # for dry_run overload\u001b[39;49;00m\n\u001b[32m 781\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m=\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 782\u001b[39m \u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[43m=\u001b[49m\u001b[43mindex_col\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 783\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 784\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfiguration\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfiguration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 785\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_results\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmax_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 786\u001b[39m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 787\u001b[39m \u001b[43m \u001b[49m\u001b[43mfilters\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfilters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 788\u001b[39m \u001b[43m \u001b[49m\u001b[43mdry_run\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdry_run\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 789\u001b[39m \u001b[43m \u001b[49m\u001b[43mallow_large_results\u001b[49m\u001b[43m=\u001b[49m\u001b[43mallow_large_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 790\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/loader.py:1260\u001b[39m, in \u001b[36mGbqDataLoader.read_gbq_query\u001b[39m\u001b[34m(self, query, index_col, columns, configuration, max_results, use_cache, filters, dry_run, force_total_order, allow_large_results)\u001b[39m\n\u001b[32m 1257\u001b[39m \u001b[38;5;66;03m# TODO(b/421161077): If an explicit destination table is set in\u001b[39;00m\n\u001b[32m 1258\u001b[39m \u001b[38;5;66;03m# configuration, should we respect that setting?\u001b[39;00m\n\u001b[32m 1259\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m allow_large_results:\n\u001b[32m-> \u001b[39m\u001b[32m1260\u001b[39m destination, query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_query_to_destination\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1261\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1262\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# No cluster candidates as user query might not be clusterable\u001b[39;49;00m\n\u001b[32m 1263\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# (eg because of ORDER BY clause)\u001b[39;49;00m\n\u001b[32m 1264\u001b[39m \u001b[43m \u001b[49m\u001b[43mcluster_candidates\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1265\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfiguration\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfiguration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1266\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1267\u001b[39m query_job_for_metrics = query_job\n\u001b[32m 1268\u001b[39m rows: Optional[google.cloud.bigquery.table.RowIterator] = \u001b[38;5;28;01mNone\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/loader.py:1435\u001b[39m, in \u001b[36mGbqDataLoader._query_to_destination\u001b[39m\u001b[34m(self, query, cluster_candidates, configuration, do_clustering)\u001b[39m\n\u001b[32m 1429\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m query_job.destination, query_job\n\u001b[32m 1430\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m google.api_core.exceptions.BadRequest:\n\u001b[32m 1431\u001b[39m \u001b[38;5;66;03m# Some SELECT statements still aren't compatible with cluster\u001b[39;00m\n\u001b[32m 1432\u001b[39m \u001b[38;5;66;03m# tables as the destination. For example, if the query has a\u001b[39;00m\n\u001b[32m 1433\u001b[39m \u001b[38;5;66;03m# top-level ORDER BY, this conflicts with our ability to cluster\u001b[39;00m\n\u001b[32m 1434\u001b[39m \u001b[38;5;66;03m# the table by the index column(s).\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1435\u001b[39m query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_start_query_with_job\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1436\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m query_job.destination, query_job\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/loader.py:1492\u001b[39m, in \u001b[36mGbqDataLoader._start_query_with_job\u001b[39m\u001b[34m(self, sql, job_config, timeout)\u001b[39m\n\u001b[32m 1486\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1487\u001b[39m \u001b[33;03mStarts BigQuery query job and waits for results.\u001b[39;00m\n\u001b[32m 1488\u001b[39m \n\u001b[32m 1489\u001b[39m \u001b[33;03mDo not execute dataframe through this API, instead use the executor.\u001b[39;00m\n\u001b[32m 1490\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1491\u001b[39m job_config = \u001b[38;5;28mself\u001b[39m._prepare_job_config(job_config)\n\u001b[32m-> \u001b[39m\u001b[32m1492\u001b[39m _, query_job = \u001b[43mbf_io_bigquery\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstart_query_with_client\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1493\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_bqclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1494\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1495\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1496\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1497\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1498\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1499\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1500\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1501\u001b[39m \u001b[43m \u001b[49m\u001b[43mpublisher\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_publisher\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1502\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_session\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1503\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1504\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m query_job\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/_io/bigquery/__init__.py:395\u001b[39m, in \u001b[36mstart_query_with_client\u001b[39m\u001b[34m(bq_client, sql, job_config, location, project, timeout, metrics, query_with_job, job_retry, publisher, session)\u001b[39m\n\u001b[32m 385\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 386\u001b[39m publisher.publish(\n\u001b[32m 387\u001b[39m bigframes.core.events.BigQuerySentEvent(\n\u001b[32m 388\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 393\u001b[39m )\n\u001b[32m 394\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m395\u001b[39m results_iterator = \u001b[43mquery_job\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 397\u001b[39m publisher.publish(\n\u001b[32m 398\u001b[39m bigframes.core.events.BigQueryFinishedEvent(\n\u001b[32m 399\u001b[39m billing_project=query_job.project,\n\u001b[32m (...)\u001b[39m\u001b[32m 409\u001b[39m )\n\u001b[32m 410\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1797\u001b[39m, in \u001b[36mQueryJob.result\u001b[39m\u001b[34m(self, page_size, max_results, retry, timeout, start_index, job_retry)\u001b[39m\n\u001b[32m 1792\u001b[39m remaining_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1794\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remaining_timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1795\u001b[39m \u001b[38;5;66;03m# Since is_job_done() calls jobs.getQueryResults, which is a\u001b[39;00m\n\u001b[32m 1796\u001b[39m \u001b[38;5;66;03m# long-running API, don't delay the next request at all.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1797\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mis_job_done\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m 1798\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m 1799\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1800\u001b[39m \u001b[38;5;66;03m# Use a monotonic clock since we don't actually care about\u001b[39;00m\n\u001b[32m 1801\u001b[39m \u001b[38;5;66;03m# daylight savings or similar, just the elapsed time.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:156\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 152\u001b[39m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[32m 153\u001b[39m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[32m 154\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 155\u001b[39m \u001b[38;5;66;03m# defer to shared logic for handling errors\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m156\u001b[39m next_sleep = \u001b[43m_retry_error_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 157\u001b[39m \u001b[43m \u001b[49m\u001b[43mexc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 158\u001b[39m \u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[43m \u001b[49m\u001b[43merror_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 161\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 162\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 163\u001b[39m \u001b[43m \u001b[49m\u001b[43mexception_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 164\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 165\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 166\u001b[39m \u001b[38;5;66;03m# if exception not raised, sleep before next attempt\u001b[39;00m\n\u001b[32m 167\u001b[39m time.sleep(next_sleep)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_base.py:216\u001b[39m, in \u001b[36m_retry_error_helper\u001b[39m\u001b[34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m predicate_fn(exc):\n\u001b[32m 211\u001b[39m final_exc, source_exc = exc_factory_fn(\n\u001b[32m 212\u001b[39m error_list,\n\u001b[32m 213\u001b[39m RetryFailureReason.NON_RETRYABLE_ERROR,\n\u001b[32m 214\u001b[39m original_timeout,\n\u001b[32m 215\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m216\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m final_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msource_exc\u001b[39;00m\n\u001b[32m 217\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m on_error_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 218\u001b[39m on_error_fn(exc)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1766\u001b[39m, in \u001b[36mQueryJob.result..is_job_done\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 1760\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 1762\u001b[39m \u001b[38;5;66;03m# Call jobs.getQueryResults with max results set to 0 just to\u001b[39;00m\n\u001b[32m 1763\u001b[39m \u001b[38;5;66;03m# wait for the query to finish. Unlike most methods,\u001b[39;00m\n\u001b[32m 1764\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults hangs as long as it can to ensure we\u001b[39;00m\n\u001b[32m 1765\u001b[39m \u001b[38;5;66;03m# know when the query has finished as soon as possible.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1766\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_reload_query_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mreload_query_results_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1768\u001b[39m \u001b[38;5;66;03m# Even if the query is finished now according to\u001b[39;00m\n\u001b[32m 1769\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults, we'll want to reload the job status if\u001b[39;00m\n\u001b[32m 1770\u001b[39m \u001b[38;5;66;03m# it's not already DONE.\u001b[39;00m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1558\u001b[39m, in \u001b[36mQueryJob._reload_query_results\u001b[39m\u001b[34m(self, retry, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 1555\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(transport_timeout, (\u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mint\u001b[39m)):\n\u001b[32m 1556\u001b[39m transport_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1558\u001b[39m \u001b[38;5;28mself\u001b[39m._query_results = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_get_query_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1559\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mjob_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1560\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1561\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mproject\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1562\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1563\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1564\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtransport_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1565\u001b[39m \u001b[43m \u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1566\u001b[39m \u001b[43m \u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1567\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:2113\u001b[39m, in \u001b[36mClient._get_query_results\u001b[39m\u001b[34m(self, job_id, retry, project, timeout_ms, location, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 2109\u001b[39m \u001b[38;5;66;03m# This call is typically made in a polling loop that checks whether the\u001b[39;00m\n\u001b[32m 2110\u001b[39m \u001b[38;5;66;03m# job is complete (from QueryJob.done(), called ultimately from\u001b[39;00m\n\u001b[32m 2111\u001b[39m \u001b[38;5;66;03m# QueryJob.result()). So we don't need to poll here.\u001b[39;00m\n\u001b[32m 2112\u001b[39m span_attributes = {\u001b[33m\"\u001b[39m\u001b[33mpath\u001b[39m\u001b[33m\"\u001b[39m: path}\n\u001b[32m-> \u001b[39m\u001b[32m2113\u001b[39m resource = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_api\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2114\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2115\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mBigQuery.getQueryResults\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2116\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m=\u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2117\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mGET\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2118\u001b[39m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2119\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2120\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2121\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2122\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _QueryResults.from_api_repr(resource)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:861\u001b[39m, in \u001b[36mClient._call_api\u001b[39m\u001b[34m(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)\u001b[39m\n\u001b[32m 857\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m span_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 858\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m create_span(\n\u001b[32m 859\u001b[39m name=span_name, attributes=span_attributes, client=\u001b[38;5;28mself\u001b[39m, job_ref=job_ref\n\u001b[32m 860\u001b[39m ):\n\u001b[32m--> \u001b[39m\u001b[32m861\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 863\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m call()\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:156\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 152\u001b[39m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[32m 153\u001b[39m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[32m 154\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 155\u001b[39m \u001b[38;5;66;03m# defer to shared logic for handling errors\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m156\u001b[39m next_sleep = \u001b[43m_retry_error_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 157\u001b[39m \u001b[43m \u001b[49m\u001b[43mexc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 158\u001b[39m \u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[43m \u001b[49m\u001b[43merror_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 161\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 162\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 163\u001b[39m \u001b[43m \u001b[49m\u001b[43mexception_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 164\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 165\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 166\u001b[39m \u001b[38;5;66;03m# if exception not raised, sleep before next attempt\u001b[39;00m\n\u001b[32m 167\u001b[39m time.sleep(next_sleep)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_base.py:216\u001b[39m, in \u001b[36m_retry_error_helper\u001b[39m\u001b[34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m predicate_fn(exc):\n\u001b[32m 211\u001b[39m final_exc, source_exc = exc_factory_fn(\n\u001b[32m 212\u001b[39m error_list,\n\u001b[32m 213\u001b[39m RetryFailureReason.NON_RETRYABLE_ERROR,\n\u001b[32m 214\u001b[39m original_timeout,\n\u001b[32m 215\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m216\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m final_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msource_exc\u001b[39;00m\n\u001b[32m 217\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m on_error_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 218\u001b[39m on_error_fn(exc)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:494\u001b[39m, in \u001b[36mJSONConnection.api_request\u001b[39m\u001b[34m(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)\u001b[39m\n\u001b[32m 482\u001b[39m response = \u001b[38;5;28mself\u001b[39m._make_request(\n\u001b[32m 483\u001b[39m method=method,\n\u001b[32m 484\u001b[39m url=url,\n\u001b[32m (...)\u001b[39m\u001b[32m 490\u001b[39m extra_api_info=extra_api_info,\n\u001b[32m 491\u001b[39m )\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[32m200\u001b[39m <= response.status_code < \u001b[32m300\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m exceptions.from_http_response(response)\n\u001b[32m 496\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m expect_json \u001b[38;5;129;01mand\u001b[39;00m response.content:\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response.json()\n", + "\u001b[31mBadRequest\u001b[39m: 400 GET https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-dev/queries/5e86e836-354e-439f-8300-f6186ff77b79?maxResults=0&location=US&prettyPrint=false: Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named 'cv2'. Traceback (most recent call last):\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\n result = [\n ^\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \n func(*row)\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 43, in bigframes_handler\n return image_blur(*args)\n ^^^^^^^^^^^^^^^^^\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 5, in image_blur\n import cv2 as cv\nModuleNotFoundError: No module named 'cv2'\n\n\nLocation: US\nJob ID: 5e86e836-354e-439f-8300-f6186ff77b79\n [{'@type': 'type.googleapis.com/google.rpc.DebugInfo', 'detail': '[INVALID_INPUT] message=QUERY_ERROR: [Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\'cv2\\'. Traceback (most recent call last):\\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\\n result = [\\n ^\\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \\n func(*row)\\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 43, in bigframes_handler\\n return image_blur(*args)\\n ^^^^^^^^^^^^^^^^^\\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 5, in image_blur\\n import cv2 as cv\\nModuleNotFoundError: No module named \\'cv2\\'\\n] debug=code: \\t BAD_QUERY\\ndescription: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\ncause: USER_ERROR\\naddress: \"http://jfbxw15.prod.google.com:4901/task?handle=logs.21974.serving.shard-hals.cloud-dataengine.14302809449245 Partition description: __SHUFFLE1/256 ShuffleByTableDef \\\\\\'__SHUFFLE0\\\\\\' shuffle \\\\t { spec { num_sources: 1 num_sinks: 500 } partitioning { rand { num_partitions: 500 } } config { group_id: 173353402696091398 shuffle_id: 0 precreated_mfs_path: \\\\\"/mfs/jf/home/cloud-dataengine/serving-shuffle/ttl=3d/a7f7e000544e4706_shuffle_cache/02\\\\\" } } sink_id: 256\"\\nstatus_proto {\\n code: 11\\n space: \"generic\"\\n message: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\n}\\nerror_details {\\n runtime_error {\\n query_error {\\n }\\n }\\n debug_info {\\n error_message_template: \"Encounter an error when invoking the function: $0\"\\n error_id: 3276645920\\n }\\n}\\n errorProto=code: \"QUERY_ERROR\"\\nargument: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\nlocation_type: OTHER\\nlocation: \"query\"\\n\\n\\tat com.google.cloud.helix.common.Exceptions.fromProto(Exceptions.java:1983)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl.mapDremelErrorsTohelixException(QueryExecutorImpl.java:1235)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:810)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:736)\\n\\tat com.google.net.rpc3.stream.RpcMessageCallback$ForwardingRpcMessageCallback.onMessage(RpcMessageCallback.java:128)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.processMessageUnlocked(RpcStreamInternalContext.java:1861)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksInternalUnlocked(RpcStreamInternalContext.java:2916)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksUnlocked(RpcStreamInternalContext.java:2842)\\n\\tat com.google.net.eventmanager.AbstractFutureTask$Sync.innerRun(AbstractFutureTask.java:259)\\n\\tat com.google.net.eventmanager.AbstractFutureTask.run(AbstractFutureTask.java:120)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTaskTraced(EventManagerImpl.java:903)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTask(EventManagerImpl.java:895)\\n\\tat com.google.net.eventmanager.EventManagerImpl.internalRunWorkerLoop(EventManagerImpl.java:1322)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runWorkerLoop(EventManagerImpl.java:1213)\\n\\tat com.google.net.eventmanager.WorkerThreadInfo.runWorkerLoop(WorkerThreadInfo.java:153)\\n\\tat com.google.net.eventmanager.EventManagerImpl$WorkerThread.run(EventManagerImpl.java:2006)\\n'}]" ] }, { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", - " \u003cth\u003eimage\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003eThe item is a container of K9 Guard Dog Paw Balm.\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=1b184636c15ee0a74b099df1903a0a79f2d0832d2cf829c84f9344269dc70408ea48dfaddce2f284cb4862cef857e8a0420627e25ca8c7ecc08d34bc69c695bc030ca8d90417860eecad65826160cbbf1cabe6c90d5e51a09c8b582bc542dfc5b309908be7b945d615eabba73b79912987306b3b110a5b0a9e52ccf900a3d2c490bb4e4572f3108f3acabf9a332e0fa503f74265f44d3b9ff40745afa4c59a3a0f3366ca4e4a800a09a5df0b363909a01705433e31bf2c9b7ccc0cc1e75f3e8ec323907140f29e4033238385eb83815b6d95ec54945cbf829d966510ae2504187f9c167fe70ac9e77231d4b38df380a7f6ec5f973828f21e51f4a95626ea0504\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e1\u003c/th\u003e\n", - " \u003ctd\u003eThe item is K9 Guard Dog Hot Spot Spray.\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=7e3adbf71385c63b494609546b7b2a3ef41dc437772a35d579bfed25594b3dd4716f8170dae6e4c9afcabce49c2af584561111983494d6dff504ff5eed611c3c7712cb667e6f1f3451aff5ac6911c4da9aeb2ba5c9227f9459f7c54085268a515eeb28add1b384a4724159f3d1408278e37716465f11a18f823dd4058004e43f9bdd4cab28fc97e97043adddc53c4b5e3059cf1641f1300bedded5a679f6922a45c021055e413953e2f0b000a6fd5047a0aa2fa5fce5f0ca08b6f93411704c9b10c6534894130c11f8ee9a8ee70d26a6455c0cbaf2088c1b5205328858e22026d1c4efc9e558a33082169f7eec5e4fb406de7de13e3ee72a48421e9896cca6f7\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e2\u003c/th\u003e\n", - " \u003ctd\u003eThe image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry \u0026 Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=380d369f6fc8a0e4274420f7bb038aaf748111cd654f7dc20818a97d80da8e6b93125884982c656009cdfdc7e70496c1defa42b2de69e1b73d99e0b925953bd5e0e5ae20ddfac13a619f8c6b1bf6c3fe9cdb195910eb08efd96a193dda55488eacc2d0deac4d0a777fd7b7916a0cc1707f22463e1678c83b8ae51d082b8365c2643f85c19c59a9ec34c89fcc87b9c510cf6dbcdda5d0648d6602bd023b0a09e6b4b02c4cea9f6b10f563a14097e86a5cd9898ea3b3606a6c847a23ce97aed46b3154c1c1791da19edf172d7f57cd4e604bb2774ae3dc02d902c826ca9be17972ff17d612106b9ac61e734837646e5d0b40f8162798bf7695dccb0d320e6a58a9\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e3\u003c/th\u003e\n", - " \u003ctd\u003eThe item is a cat tree.\\n\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=712a70543cff388ba937bc867b4d94e1bfb09579bc7ecf998b8fb5194937cd15f491643f76925582ed5b7c853a9845b77e3c6c248126e211d45c3f6ebe751cc06193ae052999bf9bd827acbb204d2a64ad5d6eae1101fc5b2518f16ae29469ee7213cae403a3a6a29d7081561decc6b189593beb4b649bc7169828f4570a929d8b15c8dd0b3f259bfa4e2680b9d5b88653068357c7aefa0b1f26e10dc309b743da4164d9a5abd1761b00cc9a12380ba6fb3786f141b8e536fdc27e869b632c3e1a130f312ad5185362b0f9b30f473387a02905f22956992278d94fc2ef387a87cb855d35cfcabe9ad5d82c1b4dd85c56152e28438f6631322a4c229a9520adb5\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e4\u003c/th\u003e\n", - " \u003ctd\u003eThe item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=70d2709b3b655fb6add8616767e7886e7e304cc96fc891df927085d1e4d90ee9bb13b370762c6c5a8dd43baefa163312267203bc1b371954320bc27c32d0831f7f8937f288da999e506bf6f47d101cd2e49a870f3d5be428d321149f7e1c7d1146569d22f19640d62325665b6d08e7254a89535c021c8b464d65e754312dd47dde08be9ca58856a97d3c3f243030ccfbd8c1bda5ddca2b3618b113f6c1640afa14936b8c16c59d77c44139fe75f3719e2a83924fed36514c61787b02ace0d439f8d3c4fea81c9bf01684f8c06a39f7ec626e93d59262db87f2eea30dd0f849a3436d8dd36d2188f2e52826e8b96bf72614c256cda9867b1905a1d1cd3edebd18\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", - "\u003c/div\u003e[5 rows x 2 columns in total]" + " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.job_7iYLeJKq2dEkHXZjF99wCN-xOm57 details]\n", + " " ], "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The item is a container of K9 Guard Dog Paw Balm. \n", - "1 The item is K9 Guard Dog Hot Spot Spray. \n", - "2 The image contains three bags of food, likely ... \n", - "3 The item is a cat tree.\\n \n", - "4 The item is a bag of bird seed. Specifically, ... \n", - "\n", - " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "\n", - "[5 rows x 2 columns]" + "" ] }, - "execution_count": 10, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ] }, @@ -844,7 +812,8 @@ "metadata": { "id": "IG3J3HsKhyBY" }, - "execution_count": 11 + "execution_count": null, + "outputs": [] }, { "id": "829afc69", @@ -861,105 +830,8 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, - "execution_count": 12, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", - " \u003cth\u003eimage\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003eThe item is a container of Dog Paw Balm.\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=988967bbfa1e4c70be10a6faa407fde8edbe0e93a7a1c9f9fbe8e81fa55c11bcc27b9673ae4d91b13edbc56ad5e50051a81d1639cafd38946f693f73e81e86a0bf6e945c3a8edf9b3b2d275203caba770fcb9a9fa272b969023fabda363efc8d90ae4b2bbc9a4f420699f3604d0a13607f96694687529b38bd80b211f5998ef0a11ab0f3a0b936a4e6068a9289ec43a9536416b5782ca2a0645c2b43f94ac6b8e31632c62b3714b56f28dce7e5675a66ded7bcc9d1c1a154f5a83f826b3f4b1363b1316536549d959d664931e6cd462a9d83362257e5226ad5b35b5e6c0e6a155fd62d2890b2fc071b59e6e4fa796a22e346bceebc4fb131eee66793f6748699\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e1\u003c/th\u003e\n", - " \u003ctd\u003eThe picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=6098f2a2487364290c54d4f6bcc98f29e5097702ab78da4ff9ef97d13c03a7fa3a17bd22324d61ae8264d3a59a6bdb2bf4be55bb7efffcb00c68f0c9b69f413f8b33b2089697456ef919690d59a68548e95ebd68526de53ab9062e18009ab73452fc1934c43f99ad75a45931fb250ef1e78a7ced3e0bfc9d9468ef300a63d750b8e249e7d20afa00895b7e586b16686141799603bcdf731b48237323de166d0a1a4f310519671a4ce6ae56b5d4ebd1887361cc21130b3e8fa57a6107d50239a671319ecbef623719554a9642489d2ea083c2cc466f5d73c55084023567e9db291b40a335e7f65b20da018a70cbf5ef9654aa86ec500aa4df35c1db81116abc83\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e2\u003c/th\u003e\n", - " \u003ctd\u003eHere are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry \u0026 Blossom Treat Blend** is the product in the purple bag.\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=9672e4d0fa6ea5f4bcf1605e35a7642cab21e7decfcfe55e37616c99a7774445cecf69c848ccd0dd92f5ae1925bfaa4bbd62ba413ecfffc835aa9a4596d18036381d595ec3387d4200a08b8fdb02ddfdb7432ebd14d5d4b77abf4628e61899c4383899ddbe6465ce91b856e8a02f37391c850589a971e6045ed0c389133dd63d4bac98d5fc744f83a51b000bf29c51111589bd34e2d287cc835ccd8c009270cef818eb3ed9ee07f9978bf24ce994311af944fc2b7d7066c3c37613fe08e0542efd277ab77df0fe1f168e05d2f6d6749e5d8af573c22032b29c3c47ac2f3b7978a09139ddfd38ab9045b5ae86d8804f23db758a7602225bd749a5b7552bae4eb8\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e3\u003c/th\u003e\n", - " \u003ctd\u003eYes, it is for pets. It appears to be a cat tree or scratching post.\\n\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=5c8e23c4c5f8017e09bf02b1b480200c74c1523aa25297141fe4abf226d7e18c546aed2ea5a2b5b5f8c5b90f2f4c569accf41a73ab8776285b458d230c6df43025eeb3066d40fe141089fa547b5a1f6a12d8c2eed0e614c8667be12c237d7ba4b29a530c16a6c8528cdfc8d8c4761aeae91c5f8452069f88de31cb637aaa34cbea60d8a8e9bd66b9d2f6f2c70a60d7791fc6bac13a4dd3736323da29fb52c5c90068c57c088407935255468513b780c07ea8e922f690645ee5ed8644240f894061c662b7bfe7ab660fd8466280e428424724f62a6c4eb6c781e3bf988059bd9f63fd6ffc2e75d296bca2ef068c6cdda0f6bb994f6799e04b33b4352f66a68224\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e4\u003c/th\u003e\n", - " \u003ctd\u003eThe image shows that the weight of the product is 15 oz/ 257g.\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=3be42f1f88c163e019f7496107a2f7d69b90a5482dd157a5d05e427fa117dd334a834dd9c72d0c84126e4f2c9e35903521ca8e81645afc945d8dbe47671f6b2f73ccbf494df6e5ad8e19c04ab4c6020859519c663b6fd57b19512fa94562106d01073b9122011ca602279a712fd761dc4ffaed3a8d7b76abcf1f42c2c1bce33228cff0e0454107cb84039a8981a2fb689191c43b7edf54e19354ff2e8c5deeb3a07944285b15db8a4fe474744f52852f8048c377708e2b3b85ee639b952e9292bc8d60d9b1a37c84ab83398ef295b8ba6b3c8e3a500714fb70e91b5fa7f19301fa4ce1bb1041f60a8ce84b3d863d5ba66f14614ed27689cd4daeb293b493172b\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", - "\u003c/div\u003e[5 rows x 2 columns in total]" - ], - "text/plain": [ - " ml_generate_text_llm_result \\\n", - "0 The item is a container of Dog Paw Balm. \n", - "1 The picture contains many colors, including wh... \n", - "2 Here are the product names from the image:\\n\\n... \n", - "3 Yes, it is for pets. It appears to be a cat tr... \n", - "4 The image shows that the weight of the product... \n", - "\n", - " image \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "\n", - "[5 rows x 2 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ] + "execution_count": null, + "outputs": [] }, { "id": "e75df430", @@ -978,139 +850,8 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "execution_count": 13, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", - "default model will be removed in BigFrames 3.0. Please supply an\n", - "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eml_generate_embedding_result\u003c/th\u003e\n", - " \u003cth\u003eml_generate_embedding_status\u003c/th\u003e\n", - " \u003cth\u003eml_generate_embedding_start_sec\u003c/th\u003e\n", - " \u003cth\u003eml_generate_embedding_end_sec\u003c/th\u003e\n", - " \u003cth\u003econtent\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003e[ 0.00638822 0.01666385 0.00451817 ... -0.02...\u003c/td\u003e\n", - " \u003ctd\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e1\u003c/th\u003e\n", - " \u003ctd\u003e[ 0.00973976 0.02148137 0.0024429 ... 0.00...\u003c/td\u003e\n", - " \u003ctd\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e2\u003c/th\u003e\n", - " \u003ctd\u003e[ 0.01195884 0.02139394 0.05968047 ... -0.01...\u003c/td\u003e\n", - " \u003ctd\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e3\u003c/th\u003e\n", - " \u003ctd\u003e[-0.02621161 0.02797648 0.04416926 ... -0.01...\u003c/td\u003e\n", - " \u003ctd\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e4\u003c/th\u003e\n", - " \u003ctd\u003e[ 0.05918628 0.0125137 0.01907336 ... 0.01...\u003c/td\u003e\n", - " \u003ctd\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", - " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", - "\u003c/div\u003e[5 rows x 5 columns in total]" - ], - "text/plain": [ - " ml_generate_embedding_result \\\n", - "0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n", - "1 [ 0.00973976 0.02148137 0.0024429 ... 0.00... \n", - "2 [ 0.01195884 0.02139394 0.05968047 ... -0.01... \n", - "3 [-0.02621161 0.02797648 0.04416926 ... -0.01... \n", - "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", - "\n", - " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", - "\n", - " ml_generate_embedding_end_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", - "\n", - " content \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", - "\n", - "[5 rows x 5 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ] + "execution_count": null, + "outputs": [] }, { "id": "23892b0e", @@ -1140,7 +881,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", + "def pdf_extract(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1167,7 +908,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1185,7 +926,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) \u003e= chunk_size:\n", + " while len(curr_chunk) >= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1198,17 +939,8 @@ " return all_text_chunks" ], "metadata": {}, - "execution_count": 14, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } - ] + "execution_count": null, + "outputs": [] }, { "id": "234a5f86", @@ -1243,59 +975,8 @@ "df_pdf[[\"extracted_text\", \"chunked\"]]" ], "metadata": {}, - "execution_count": 15, - "outputs": [ - { - "data": { - "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eextracted_text\u003c/th\u003e\n", - " \u003cth\u003echunked\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003eCritterCuisine Pro 5000 - Automatic Pet Feeder...\u003c/td\u003e\n", - " \u003ctd\u003e[\"CritterCuisine Pro 5000 - Automatic Pet Feed...\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", - "\u003c/div\u003e[1 rows x 2 columns in total]" - ], - "text/plain": [ - " extracted_text \\\n", - "0 CritterCuisine Pro 5000 - Automatic Pet Feeder... \n", - "\n", - " chunked \n", - "0 [\"CritterCuisine Pro 5000 - Automatic Pet Feed... \n", - "\n", - "[1 rows x 2 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ] + "execution_count": null, + "outputs": [] }, { "id": "d80effbe", @@ -1306,35 +987,8 @@ "chunked" ], "metadata": {}, - "execution_count": 16, - "outputs": [ - { - "data": { - "text/html": [ - "\u003cpre\u003e0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", - "0 on a level, stable surface to prevent tipping....\n", - "0 included)\\nto maintain the schedule during pow...\n", - "0 digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n", - "0 paperclip) for 5\\nseconds. This will reset all...\n", - "0 unit with a damp cloth. Do not immerse the bas...\n", - "0 continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e" - ], - "text/plain": [ - "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", - "0 on a level, stable surface to prevent tipping....\n", - "0 included)\\nto maintain the schedule during pow...\n", - "0 digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n", - "0 paperclip) for 5\\nseconds. This will reset all...\n", - "0 unit with a damp cloth. Do not immerse the bas...\n", - "0 continues,\\ncontact customer support.\\nE2: Foo...\n", - "Name: chunked, dtype: string" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ] + "execution_count": null, + "outputs": [] }, { "id": "118cf1c7", @@ -1374,7 +1028,8 @@ "df = df[['audio']]" ], "metadata": {}, - "execution_count": 17 + "execution_count": null, + "outputs": [] }, { "id": "c9f9d484", @@ -1407,32 +1062,7 @@ ], "metadata": {}, "execution_count": null, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" - ], - "text/plain": [ - "0 Now, as all books, not primarily intended as p...\n", - "Name: transcribed_content, dtype: string" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ] + "outputs": [] }, { "id": "7209a62a", @@ -1453,23 +1083,8 @@ "transcribed_series_verbose" ], "metadata": {}, - "execution_count": 19, - "outputs": [ - { - "data": { - "text/html": [ - "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" - ], - "text/plain": [ - "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ] + "execution_count": null, + "outputs": [] }, { "id": "c8351cc3", @@ -1507,7 +1122,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", + "def extract_exif(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1526,17 +1141,8 @@ " return json.dumps(exif_dict)" ], "metadata": {}, - "execution_count": 20, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } - ] + "execution_count": null, + "outputs": [] }, { "id": "40bb6bc9", @@ -1573,32 +1179,8 @@ "exif_data" ], "metadata": {}, - "execution_count": 21, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", - "change in future versions.\n", - " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" - ], - "text/plain": [ - "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ] + "execution_count": null, + "outputs": [] } ], "metadata": { From 1736391d5ae463b0474b48c2883496d53a93de39 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 05:32:36 +0000 Subject: [PATCH 29/39] fix: pass package requirements to CodeDef during UDF provisioning and update notebook --- packages/bigframes/bigframes/functions/_function_session.py | 2 +- .../bigframes/notebooks/multimodal/multimodal_dataframe.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/bigframes/functions/_function_session.py b/packages/bigframes/bigframes/functions/_function_session.py index b20c02588448..c0900881adf8 100644 --- a/packages/bigframes/bigframes/functions/_function_session.py +++ b/packages/bigframes/bigframes/functions/_function_session.py @@ -835,7 +835,7 @@ def wrapper(func): session=session, # type: ignore ) config = udf_def.ManagedFunctionConfig( - code=udf_def.CodeDef.from_func(func), + code=udf_def.CodeDef.from_func(func, package_requirements=packages), signature=udf_sig, max_batching_rows=max_batching_rows, container_cpu=container_cpu, diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 09a7bfc2e6b5..0e3d7bfa065f 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -484,7 +484,7 @@ " input_types=[str, str, int, int],\n", " output_type=str,\n", " dataset=DATASET_ID,\n", - " name=\"image_blur\",\n", + " name=\"image_blur_v2\",\n", " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python-headless\", \"numpy\", \"requests\"],\n", ")\n", From 2bbfe0ad5e96aedf04c37ae994f0daf03545892b Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 06:01:34 +0000 Subject: [PATCH 30/39] fix: ensure correct package formatting and force invalidate cached functions --- .../bigframes/functions/_function_client.py | 9 +- .../bigframes/bigframes/functions/_utils.py | 2 +- .../bigframes/functions/function_template.py | 6 + .../multimodal/multimodal_dataframe.ipynb | 383 +----------------- 4 files changed, 29 insertions(+), 371 deletions(-) diff --git a/packages/bigframes/bigframes/functions/_function_client.py b/packages/bigframes/bigframes/functions/_function_client.py index cff35b7484fb..54c9171642e2 100644 --- a/packages/bigframes/bigframes/functions/_function_client.py +++ b/packages/bigframes/bigframes/functions/_function_client.py @@ -156,9 +156,16 @@ def _create_bq_function(self, create_function_ddl: str) -> None: logger.info(f"Created bigframes function {query_job.ddl_target_routine}") def _format_function_options(self, function_options: dict) -> str: + def format_val(val): + if isinstance(val, str): + return f"'{val}'" + if isinstance(val, (list, tuple)): + return str(list(val)) + return str(val) + return ", ".join( [ - f"{key}='{val}'" if isinstance(val, str) else f"{key}={val}" + f"{key}={format_val(val)}" for key, val in function_options.items() if val is not None ] diff --git a/packages/bigframes/bigframes/functions/_utils.py b/packages/bigframes/bigframes/functions/_utils.py index 09acaa8d4ceb..8bafc8766c79 100644 --- a/packages/bigframes/bigframes/functions/_utils.py +++ b/packages/bigframes/bigframes/functions/_utils.py @@ -108,7 +108,7 @@ def get_updated_package_requirements( requirements.append(f"numpy=={numpy.__version__}") if not requirements: - return package_requirements + return list(package_requirements) result = list(package_requirements) for package in requirements: diff --git a/packages/bigframes/bigframes/functions/function_template.py b/packages/bigframes/bigframes/functions/function_template.py index 1b7255387a6a..005a6fc91cdf 100644 --- a/packages/bigframes/bigframes/functions/function_template.py +++ b/packages/bigframes/bigframes/functions/function_template.py @@ -364,6 +364,12 @@ def bigframes_handler(*args): ) udf_code_block = [] + if code_def.package_requirements: + # Include package requirements as comments to help force a new + # BigQuery UDF definition when only package requirements change. + packages_comment = "# Packages: " + ", ".join(sorted(code_def.package_requirements)) + udf_code_block.append(packages_comment) + if not capture_references and signature.is_row_processor: # Enable postponed evaluation of type annotations. This converts all # type hints to strings at runtime, which is necessary for correctly diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 0e3d7bfa065f..74d9c36f6310 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -19,7 +19,7 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": 1, + "execution_count": null, "outputs": [] }, { @@ -97,7 +97,7 @@ "# !pip install bigframes --upgrade" ], "metadata": {}, - "execution_count": 2, + "execution_count": null, "outputs": [] }, { @@ -138,7 +138,7 @@ "id": "bGyhLnfEeB0X", "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" }, - "execution_count": 3, + "execution_count": null, "outputs": [] }, { @@ -185,7 +185,7 @@ " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ], "metadata": {}, - "execution_count": 4, + "execution_count": null, "outputs": [] }, { @@ -230,7 +230,7 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "execution_count": 5, + "execution_count": null, "outputs": [] }, { @@ -249,74 +249,8 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, - "execution_count": 6, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
image
0{'uri': 'gs://cloud-samples-data/bigquery/tuto...
1{'uri': 'gs://cloud-samples-data/bigquery/tuto...
2{'uri': 'gs://cloud-samples-data/bigquery/tuto...
3{'uri': 'gs://cloud-samples-data/bigquery/tuto...
4{'uri': 'gs://cloud-samples-data/bigquery/tuto...
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" - ], - "text/plain": [ - " image\n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", - "\n", - "[5 rows x 1 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ] + "execution_count": null, + "outputs": [] }, { "id": "429b0117", @@ -355,105 +289,8 @@ "metadata": { "id": "YYYVn7NDH0Me" }, - "execution_count": 7, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdated
0{'uri': 'gs://cloud-samples-data/bigquery/tuto...aliceimage/png7157662025-03-20 17:44:38+00:00
1{'uri': 'gs://cloud-samples-data/bigquery/tuto...bobimage/png11674062025-03-20 17:44:38+00:00
2{'uri': 'gs://cloud-samples-data/bigquery/tuto...bobimage/png11508922025-03-20 17:44:39+00:00
3{'uri': 'gs://cloud-samples-data/bigquery/tuto...aliceimage/png17365332025-03-20 17:44:39+00:00
4{'uri': 'gs://cloud-samples-data/bigquery/tuto...bobimage/png4397402025-03-20 17:44:39+00:00
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" - ], - "text/plain": [ - " image author content_type \\\n", - "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", - "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", - "\n", - " size updated \n", - "0 715766 2025-03-20 17:44:38+00:00 \n", - "1 1167406 2025-03-20 17:44:38+00:00 \n", - "2 1150892 2025-03-20 17:44:39+00:00 \n", - "3 1736533 2025-03-20 17:44:39+00:00 \n", - "4 439740 2025-03-20 17:44:39+00:00 \n", - "\n", - "[5 rows x 5 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ] + "execution_count": null, + "outputs": [] }, { "id": "f90826f6", @@ -559,122 +396,7 @@ "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, "execution_count": null, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/pandas/__init__.py:211: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:4695: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:457: UserWarning: Compiler ID eb39002044ce: BadRequest on sqlglot. Falling back to ibis.\n", - "Details: GET\n", - "https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-\n", - "dev/queries/db11d8fe-58e7-44ab-\n", - "aa57-662980a78596?maxResults=0&location=US&prettyPrint=false:\n", - "Encounter an error when invoking the function: Flight error: Encounter\n", - "an error when executing the function No module named 'cv2'. Traceback\n", - "(most recent call last): File \"/srv/grpc/bq_udf_server.py\", line\n", - "119, in call_udf result = [ ^ File\n", - "\"/srv/grpc/bq_udf_server.py\", line 120, in func(*row)\n", - "File\n", - "\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\",\n", - "line 43, in bigframes_handler return image_blur(*args)\n", - "^^^^^^^^^^^^^^^^^ File\n", - "\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\",\n", - "line 5, in image_blur import cv2 as cv ModuleNotFoundError: No\n", - "module named 'cv2' Location: US Job ID: db11d8fe-58e7-44ab-\n", - "aa57-662980a78596\n", - " warnings.warn(msg, category=UserWarning)\n" - ] - }, - { - "ename": "BadRequest", - "evalue": "400 GET https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-dev/queries/9b17582d-da3b-47d9-947f-a1c5efa11dd9?maxResults=0&location=US&prettyPrint=false: Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named 'cv2'. Traceback (most recent call last):\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\n result = [\n ^\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \n func(*row)\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 43, in bigframes_handler\n return image_blur(*args)\n ^^^^^^^^^^^^^^^^^\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 5, in image_blur\n import cv2 as cv\nModuleNotFoundError: No module named 'cv2'\n\n\nLocation: US\nJob ID: 9b17582d-da3b-47d9-947f-a1c5efa11dd9\n [{'@type': 'type.googleapis.com/google.rpc.DebugInfo', 'detail': '[INVALID_INPUT] message=QUERY_ERROR: [Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\'cv2\\'. Traceback (most recent call last):\\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\\n result = [\\n ^\\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \\n func(*row)\\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 43, in bigframes_handler\\n return image_blur(*args)\\n ^^^^^^^^^^^^^^^^^\\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 5, in image_blur\\n import cv2 as cv\\nModuleNotFoundError: No module named \\'cv2\\'\\n] debug=code: \\t BAD_QUERY\\ndescription: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\ncause: USER_ERROR\\naddress: \"http://jfdaj38.prod.google.com:4901/task?handle=logs.7122.serving.shard-hals.cloud-dataengine.14304701181199 Partition description: __SHUFFLE1_RAND0/117 ShuffleByTableDef \\\\\\'__SHUFFLE0\\\\\\' shuffle \\\\t { spec { num_sources: 1 num_sinks: 500 } partitioning { rand { num_partitions: 500 } } config { group_id: 3066382614537913598 shuffle_id: 0 precreated_mfs_path: \\\\\"/mfs/jf/home/cloud-dataengine/serving-shuffle/ttl=3d/d24bfe88e472d688_shuffle_cache/00\\\\\" } } sink_id: 116\"\\nstatus_proto {\\n code: 11\\n space: \"generic\"\\n message: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\n}\\nerror_details {\\n runtime_error {\\n query_error {\\n }\\n }\\n debug_info {\\n error_message_template: \"Encounter an error when invoking the function: $0\"\\n error_id: 3276645920\\n }\\n}\\n errorProto=code: \"QUERY_ERROR\"\\nargument: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\nlocation_type: OTHER\\nlocation: \"query\"\\n\\n\\tat com.google.cloud.helix.common.Exceptions.fromProto(Exceptions.java:1983)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl.mapDremelErrorsTohelixException(QueryExecutorImpl.java:1235)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:810)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:736)\\n\\tat com.google.net.rpc3.stream.RpcMessageCallback$ForwardingRpcMessageCallback.onMessage(RpcMessageCallback.java:128)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.processMessageUnlocked(RpcStreamInternalContext.java:1861)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksInternalUnlocked(RpcStreamInternalContext.java:2916)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksUnlocked(RpcStreamInternalContext.java:2842)\\n\\tat com.google.net.eventmanager.AbstractFutureTask$Sync.innerRun(AbstractFutureTask.java:259)\\n\\tat com.google.net.eventmanager.AbstractFutureTask.run(AbstractFutureTask.java:120)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTaskTraced(EventManagerImpl.java:903)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTask(EventManagerImpl.java:895)\\n\\tat com.google.net.eventmanager.EventManagerImpl.internalRunWorkerLoop(EventManagerImpl.java:1322)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runWorkerLoop(EventManagerImpl.java:1213)\\n\\tat com.google.net.eventmanager.WorkerThreadInfo.runWorkerLoop(WorkerThreadInfo.java:153)\\n\\tat com.google.net.eventmanager.EventManagerImpl$WorkerThread.run(EventManagerImpl.java:2006)\\n'}]", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mBadRequest\u001b[39m Traceback (most recent call last)", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183\u001b[39m, in \u001b[36mmethod_logger..outer_wrapper..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 180\u001b[39m _call_stack.append(full_method_name)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m183\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mNotImplementedError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Log method parameters that are implemented in pandas but either missing (TypeError)\u001b[39;00m\n\u001b[32m 186\u001b[39m \u001b[38;5;66;03m# or not fully supported (NotImplementedError) in BigFrames.\u001b[39;00m\n\u001b[32m 187\u001b[39m \u001b[38;5;66;03m# Logging is currently supported only when we can access the bqclient through\u001b[39;00m\n\u001b[32m 188\u001b[39m \u001b[38;5;66;03m# _block.session.bqclient.\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_call_stack) == \u001b[32m1\u001b[39m:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:835\u001b[39m, in \u001b[36mDataFrame._repr_mimebundle_\u001b[39m\u001b[34m(self, include, exclude)\u001b[39m\n\u001b[32m 831\u001b[39m \u001b[38;5;66;03m# TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and\u001b[39;00m\n\u001b[32m 832\u001b[39m \u001b[38;5;66;03m# BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed.\u001b[39;00m\n\u001b[32m 833\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mbigframes\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdisplay\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m html\n\u001b[32m--> \u001b[39m\u001b[32m835\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mhtml\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrepr_mimebundle\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minclude\u001b[49m\u001b[43m=\u001b[49m\u001b[43minclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexclude\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/display/html.py:355\u001b[39m, in \u001b[36mrepr_mimebundle\u001b[39m\u001b[34m(obj, include, exclude)\u001b[39m\n\u001b[32m 345\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[32m 346\u001b[39m \u001b[38;5;66;03m# Anywidget is an optional dependency, so warn rather than fail.\u001b[39;00m\n\u001b[32m 347\u001b[39m \u001b[38;5;66;03m# TODO(shuowei): When Anywidget becomes the default for all repr modes,\u001b[39;00m\n\u001b[32m 348\u001b[39m \u001b[38;5;66;03m# remove this warning.\u001b[39;00m\n\u001b[32m 349\u001b[39m warnings.warn(\n\u001b[32m 350\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mAnywidget mode is not available. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 351\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPlease `pip install anywidget traitlets` or `pip install \u001b[39m\u001b[33m'\u001b[39m\u001b[33mbigframes[anywidget]\u001b[39m\u001b[33m'\u001b[39m\u001b[33m` to use interactive tables. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 352\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFalling back to static HTML. Error: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtraceback.format_exc()\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 353\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m355\u001b[39m bundle = \u001b[43mrepr_mimebundle_head\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 356\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m opts.render_mode == \u001b[33m\"\u001b[39m\u001b[33mplaintext\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 357\u001b[39m bundle.pop(\u001b[33m\"\u001b[39m\u001b[33mtext/html\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/display/html.py:302\u001b[39m, in \u001b[36mrepr_mimebundle_head\u001b[39m\u001b[34m(obj)\u001b[39m\n\u001b[32m 300\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 301\u001b[39m df, _ = obj._get_display_df_and_blob_cols()\n\u001b[32m--> \u001b[39m\u001b[32m302\u001b[39m pandas_df, row_count, query_job = \u001b[43mdf\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_block\u001b[49m\u001b[43m.\u001b[49m\u001b[43mretrieve_repr_request_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 303\u001b[39m \u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmax_rows\u001b[49m\n\u001b[32m 304\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 306\u001b[39m obj._set_internal_query_job(query_job)\n\u001b[32m 307\u001b[39m column_count = \u001b[38;5;28mlen\u001b[39m(pandas_df.columns)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/blocks.py:1615\u001b[39m, in \u001b[36mBlock.retrieve_repr_request_results\u001b[39m\u001b[34m(self, max_results)\u001b[39m\n\u001b[32m 1613\u001b[39m \u001b[38;5;66;03m# head caches full underlying expression, so row_count will be free after\u001b[39;00m\n\u001b[32m 1614\u001b[39m executor = \u001b[38;5;28mself\u001b[39m.session._executor\n\u001b[32m-> \u001b[39m\u001b[32m1615\u001b[39m \u001b[43mexecutor\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcached\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1616\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1617\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecutors\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCacheConfig\u001b[49m\u001b[43m(\u001b[49m\u001b[43moptimize_for\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhead\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mif_cached\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreuse-strict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1618\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1619\u001b[39m head_result = \u001b[38;5;28mself\u001b[39m.session._executor.execute(\n\u001b[32m 1620\u001b[39m \u001b[38;5;28mself\u001b[39m.expr.slice(start=\u001b[38;5;28;01mNone\u001b[39;00m, stop=max_results, step=\u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[32m 1621\u001b[39m execution_spec.ExecutionSpec(\n\u001b[32m (...)\u001b[39m\u001b[32m 1624\u001b[39m ),\n\u001b[32m 1625\u001b[39m )\n\u001b[32m 1626\u001b[39m row_count = (\n\u001b[32m 1627\u001b[39m \u001b[38;5;28mself\u001b[39m.session._executor.execute(\n\u001b[32m 1628\u001b[39m \u001b[38;5;28mself\u001b[39m.expr.row_count(),\n\u001b[32m (...)\u001b[39m\u001b[32m 1635\u001b[39m .to_py_scalar()\n\u001b[32m 1636\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:348\u001b[39m, in \u001b[36mBigQueryCachingExecutor.cached\u001b[39m\u001b[34m(self, array_value, config)\u001b[39m\n\u001b[32m 346\u001b[39m \u001b[38;5;28mself\u001b[39m._cache_with_session_awareness(array_value)\n\u001b[32m 347\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m config.optimize_for == \u001b[33m\"\u001b[39m\u001b[33mhead\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m348\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_cache_with_offsets\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 350\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(config.optimize_for, executor.HierarchicalKey)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:504\u001b[39m, in \u001b[36mBigQueryCachingExecutor._cache_with_offsets\u001b[39m\u001b[34m(self, array_value)\u001b[39m\n\u001b[32m 500\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Executes the query and uses the resulting table to rewrite future executions.\"\"\"\u001b[39;00m\n\u001b[32m 501\u001b[39m execution_spec = ex_spec.ExecutionSpec(\n\u001b[32m 502\u001b[39m destination_spec=ex_spec.CacheSpec(cluster_cols=\u001b[38;5;28mtuple\u001b[39m())\n\u001b[32m 503\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m504\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 505\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 506\u001b[39m \u001b[43m \u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 507\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:169\u001b[39m, in \u001b[36mBigQueryCachingExecutor.execute\u001b[39m\u001b[34m(self, array_value, execution_spec)\u001b[39m\n\u001b[32m 162\u001b[39m \u001b[38;5;28mself\u001b[39m._publisher.publish(\n\u001b[32m 163\u001b[39m bigframes.core.events.ExecutionFinished(\n\u001b[32m 164\u001b[39m result=result,\n\u001b[32m 165\u001b[39m )\n\u001b[32m 166\u001b[39m )\n\u001b[32m 167\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[32m--> \u001b[39m\u001b[32m169\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_execute_plan_gbq\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 170\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m.\u001b[49m\u001b[43mnode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 171\u001b[39m \u001b[43m \u001b[49m\u001b[43mordered\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mordered\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[43m \u001b[49m\u001b[43mpeek\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpeek\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 173\u001b[39m \u001b[43m \u001b[49m\u001b[43mcache_spec\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdestination_spec\u001b[49m\n\u001b[32m 174\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdestination_spec\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mex_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCacheSpec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 175\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 176\u001b[39m \u001b[43m \u001b[49m\u001b[43mmust_create_table\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpromise_under_10gb\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 177\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 178\u001b[39m \u001b[38;5;66;03m# post steps: export\u001b[39;00m\n\u001b[32m 179\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(execution_spec.destination_spec, ex_spec.GcsOutputSpec):\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:687\u001b[39m, in \u001b[36mBigQueryCachingExecutor._execute_plan_gbq\u001b[39m\u001b[34m(self, plan, ordered, peek, cache_spec, must_create_table)\u001b[39m\n\u001b[32m 679\u001b[39m iterator, query_job = \u001b[38;5;28mself\u001b[39m._run_execute_query(\n\u001b[32m 680\u001b[39m sql=compiled.sql,\n\u001b[32m 681\u001b[39m job_config=job_config,\n\u001b[32m 682\u001b[39m query_with_job=(destination_table \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[32m 683\u001b[39m session=plan.session,\n\u001b[32m 684\u001b[39m )\n\u001b[32m 685\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m iterator, query_job, compiled\n\u001b[32m--> \u001b[39m\u001b[32m687\u001b[39m iterator, query_job, compiled = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_compile_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_with_compiler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 689\u001b[39m \u001b[38;5;66;03m# might have more columns than og schema, for hidden ordering columns\u001b[39;00m\n\u001b[32m 690\u001b[39m compiled_schema = compiled.sql_schema\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:458\u001b[39m, in \u001b[36mBigQueryCachingExecutor._compile_with_fallback\u001b[39m\u001b[34m(self, run_fn)\u001b[39m\n\u001b[32m 453\u001b[39m msg = bfe.format_message(\n\u001b[32m 454\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCompiler ID \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m: BadRequest on sqlglot. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 455\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFalling back to ibis. Details: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me.message\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 456\u001b[39m )\n\u001b[32m 457\u001b[39m warnings.warn(msg, category=\u001b[38;5;167;01mUserWarning\u001b[39;00m)\n\u001b[32m--> \u001b[39m\u001b[32m458\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrun_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mibis\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompiler_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompiler_id\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:679\u001b[39m, in \u001b[36mBigQueryCachingExecutor._execute_plan_gbq..run_with_compiler\u001b[39m\u001b[34m(compiler_name, compiler_id)\u001b[39m\n\u001b[32m 675\u001b[39m job_config.labels[\u001b[33m\"\u001b[39m\u001b[33mbigframes-dtypes\u001b[39m\u001b[33m\"\u001b[39m] = compiled.encoded_type_refs\n\u001b[32m 676\u001b[39m job_config.labels[\u001b[33m\"\u001b[39m\u001b[33mbigframes-compiler\u001b[39m\u001b[33m\"\u001b[39m] = (\n\u001b[32m 677\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m compiler_id \u001b[38;5;28;01melse\u001b[39;00m compiler_name\n\u001b[32m 678\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m679\u001b[39m iterator, query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_run_execute_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 680\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompiled\u001b[49m\u001b[43m.\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 681\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 682\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdestination_table\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 683\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[43mplan\u001b[49m\u001b[43m.\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 684\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 685\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m iterator, query_job, compiled\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:378\u001b[39m, in \u001b[36mBigQueryCachingExecutor._run_execute_query\u001b[39m\u001b[34m(self, sql, job_config, query_with_job, session)\u001b[39m\n\u001b[32m 375\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 376\u001b[39m \u001b[38;5;66;03m# Trick the type checker into thinking we got a literal.\u001b[39;00m\n\u001b[32m 377\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m query_with_job:\n\u001b[32m--> \u001b[39m\u001b[32m378\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbq_io\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstart_query_with_client\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 379\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbqclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 380\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 381\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 382\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 383\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 384\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 385\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 386\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 387\u001b[39m \u001b[43m \u001b[49m\u001b[43mpublisher\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_publisher\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 388\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 389\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 390\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 391\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m bq_io.start_query_with_client(\n\u001b[32m 392\u001b[39m \u001b[38;5;28mself\u001b[39m.bqclient,\n\u001b[32m 393\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 401\u001b[39m session=session,\n\u001b[32m 402\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/_io/bigquery/__init__.py:395\u001b[39m, in \u001b[36mstart_query_with_client\u001b[39m\u001b[34m(bq_client, sql, job_config, location, project, timeout, metrics, query_with_job, job_retry, publisher, session)\u001b[39m\n\u001b[32m 385\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 386\u001b[39m publisher.publish(\n\u001b[32m 387\u001b[39m bigframes.core.events.BigQuerySentEvent(\n\u001b[32m 388\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 393\u001b[39m )\n\u001b[32m 394\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m395\u001b[39m results_iterator = \u001b[43mquery_job\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 397\u001b[39m publisher.publish(\n\u001b[32m 398\u001b[39m bigframes.core.events.BigQueryFinishedEvent(\n\u001b[32m 399\u001b[39m billing_project=query_job.project,\n\u001b[32m (...)\u001b[39m\u001b[32m 409\u001b[39m )\n\u001b[32m 410\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1797\u001b[39m, in \u001b[36mQueryJob.result\u001b[39m\u001b[34m(self, page_size, max_results, retry, timeout, start_index, job_retry)\u001b[39m\n\u001b[32m 1792\u001b[39m remaining_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1794\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remaining_timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1795\u001b[39m \u001b[38;5;66;03m# Since is_job_done() calls jobs.getQueryResults, which is a\u001b[39;00m\n\u001b[32m 1796\u001b[39m \u001b[38;5;66;03m# long-running API, don't delay the next request at all.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1797\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mis_job_done\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m 1798\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m 1799\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1800\u001b[39m \u001b[38;5;66;03m# Use a monotonic clock since we don't actually care about\u001b[39;00m\n\u001b[32m 1801\u001b[39m \u001b[38;5;66;03m# daylight savings or similar, just the elapsed time.\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:156\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 152\u001b[39m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[32m 153\u001b[39m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[32m 154\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 155\u001b[39m \u001b[38;5;66;03m# defer to shared logic for handling errors\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m156\u001b[39m next_sleep = \u001b[43m_retry_error_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 157\u001b[39m \u001b[43m \u001b[49m\u001b[43mexc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 158\u001b[39m \u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[43m \u001b[49m\u001b[43merror_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 161\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 162\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 163\u001b[39m \u001b[43m \u001b[49m\u001b[43mexception_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 164\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 165\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 166\u001b[39m \u001b[38;5;66;03m# if exception not raised, sleep before next attempt\u001b[39;00m\n\u001b[32m 167\u001b[39m time.sleep(next_sleep)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_base.py:216\u001b[39m, in \u001b[36m_retry_error_helper\u001b[39m\u001b[34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m predicate_fn(exc):\n\u001b[32m 211\u001b[39m final_exc, source_exc = exc_factory_fn(\n\u001b[32m 212\u001b[39m error_list,\n\u001b[32m 213\u001b[39m RetryFailureReason.NON_RETRYABLE_ERROR,\n\u001b[32m 214\u001b[39m original_timeout,\n\u001b[32m 215\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m216\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m final_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msource_exc\u001b[39;00m\n\u001b[32m 217\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m on_error_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 218\u001b[39m on_error_fn(exc)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1766\u001b[39m, in \u001b[36mQueryJob.result..is_job_done\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 1760\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 1762\u001b[39m \u001b[38;5;66;03m# Call jobs.getQueryResults with max results set to 0 just to\u001b[39;00m\n\u001b[32m 1763\u001b[39m \u001b[38;5;66;03m# wait for the query to finish. Unlike most methods,\u001b[39;00m\n\u001b[32m 1764\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults hangs as long as it can to ensure we\u001b[39;00m\n\u001b[32m 1765\u001b[39m \u001b[38;5;66;03m# know when the query has finished as soon as possible.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1766\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_reload_query_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mreload_query_results_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1768\u001b[39m \u001b[38;5;66;03m# Even if the query is finished now according to\u001b[39;00m\n\u001b[32m 1769\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults, we'll want to reload the job status if\u001b[39;00m\n\u001b[32m 1770\u001b[39m \u001b[38;5;66;03m# it's not already DONE.\u001b[39;00m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1558\u001b[39m, in \u001b[36mQueryJob._reload_query_results\u001b[39m\u001b[34m(self, retry, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 1555\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(transport_timeout, (\u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mint\u001b[39m)):\n\u001b[32m 1556\u001b[39m transport_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1558\u001b[39m \u001b[38;5;28mself\u001b[39m._query_results = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_get_query_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1559\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mjob_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1560\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1561\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mproject\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1562\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1563\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1564\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtransport_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1565\u001b[39m \u001b[43m \u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1566\u001b[39m \u001b[43m \u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1567\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:2113\u001b[39m, in \u001b[36mClient._get_query_results\u001b[39m\u001b[34m(self, job_id, retry, project, timeout_ms, location, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 2109\u001b[39m \u001b[38;5;66;03m# This call is typically made in a polling loop that checks whether the\u001b[39;00m\n\u001b[32m 2110\u001b[39m \u001b[38;5;66;03m# job is complete (from QueryJob.done(), called ultimately from\u001b[39;00m\n\u001b[32m 2111\u001b[39m \u001b[38;5;66;03m# QueryJob.result()). So we don't need to poll here.\u001b[39;00m\n\u001b[32m 2112\u001b[39m span_attributes = {\u001b[33m\"\u001b[39m\u001b[33mpath\u001b[39m\u001b[33m\"\u001b[39m: path}\n\u001b[32m-> \u001b[39m\u001b[32m2113\u001b[39m resource = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_api\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2114\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2115\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mBigQuery.getQueryResults\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2116\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m=\u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2117\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mGET\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2118\u001b[39m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2119\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2120\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2121\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2122\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _QueryResults.from_api_repr(resource)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:861\u001b[39m, in \u001b[36mClient._call_api\u001b[39m\u001b[34m(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)\u001b[39m\n\u001b[32m 857\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m span_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 858\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m create_span(\n\u001b[32m 859\u001b[39m name=span_name, attributes=span_attributes, client=\u001b[38;5;28mself\u001b[39m, job_ref=job_ref\n\u001b[32m 860\u001b[39m ):\n\u001b[32m--> \u001b[39m\u001b[32m861\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 863\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m call()\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:156\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 152\u001b[39m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[32m 153\u001b[39m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[32m 154\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 155\u001b[39m \u001b[38;5;66;03m# defer to shared logic for handling errors\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m156\u001b[39m next_sleep = \u001b[43m_retry_error_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 157\u001b[39m \u001b[43m \u001b[49m\u001b[43mexc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 158\u001b[39m \u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[43m \u001b[49m\u001b[43merror_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 161\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 162\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 163\u001b[39m \u001b[43m \u001b[49m\u001b[43mexception_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 164\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 165\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 166\u001b[39m \u001b[38;5;66;03m# if exception not raised, sleep before next attempt\u001b[39;00m\n\u001b[32m 167\u001b[39m time.sleep(next_sleep)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_base.py:216\u001b[39m, in \u001b[36m_retry_error_helper\u001b[39m\u001b[34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m predicate_fn(exc):\n\u001b[32m 211\u001b[39m final_exc, source_exc = exc_factory_fn(\n\u001b[32m 212\u001b[39m error_list,\n\u001b[32m 213\u001b[39m RetryFailureReason.NON_RETRYABLE_ERROR,\n\u001b[32m 214\u001b[39m original_timeout,\n\u001b[32m 215\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m216\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m final_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msource_exc\u001b[39;00m\n\u001b[32m 217\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m on_error_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 218\u001b[39m on_error_fn(exc)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:494\u001b[39m, in \u001b[36mJSONConnection.api_request\u001b[39m\u001b[34m(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)\u001b[39m\n\u001b[32m 482\u001b[39m response = \u001b[38;5;28mself\u001b[39m._make_request(\n\u001b[32m 483\u001b[39m method=method,\n\u001b[32m 484\u001b[39m url=url,\n\u001b[32m (...)\u001b[39m\u001b[32m 490\u001b[39m extra_api_info=extra_api_info,\n\u001b[32m 491\u001b[39m )\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[32m200\u001b[39m <= response.status_code < \u001b[32m300\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m exceptions.from_http_response(response)\n\u001b[32m 496\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m expect_json \u001b[38;5;129;01mand\u001b[39;00m response.content:\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response.json()\n", - "\u001b[31mBadRequest\u001b[39m: 400 GET https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-dev/queries/9b17582d-da3b-47d9-947f-a1c5efa11dd9?maxResults=0&location=US&prettyPrint=false: Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named 'cv2'. Traceback (most recent call last):\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\n result = [\n ^\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \n func(*row)\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 43, in bigframes_handler\n return image_blur(*args)\n ^^^^^^^^^^^^^^^^^\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 5, in image_blur\n import cv2 as cv\nModuleNotFoundError: No module named 'cv2'\n\n\nLocation: US\nJob ID: 9b17582d-da3b-47d9-947f-a1c5efa11dd9\n [{'@type': 'type.googleapis.com/google.rpc.DebugInfo', 'detail': '[INVALID_INPUT] message=QUERY_ERROR: [Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\'cv2\\'. Traceback (most recent call last):\\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\\n result = [\\n ^\\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \\n func(*row)\\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 43, in bigframes_handler\\n return image_blur(*args)\\n ^^^^^^^^^^^^^^^^^\\n File \"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\", line 5, in image_blur\\n import cv2 as cv\\nModuleNotFoundError: No module named \\'cv2\\'\\n] debug=code: \\t BAD_QUERY\\ndescription: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\ncause: USER_ERROR\\naddress: \"http://jfdaj38.prod.google.com:4901/task?handle=logs.7122.serving.shard-hals.cloud-dataengine.14304701181199 Partition description: __SHUFFLE1_RAND0/117 ShuffleByTableDef \\\\\\'__SHUFFLE0\\\\\\' shuffle \\\\t { spec { num_sources: 1 num_sinks: 500 } partitioning { rand { num_partitions: 500 } } config { group_id: 3066382614537913598 shuffle_id: 0 precreated_mfs_path: \\\\\"/mfs/jf/home/cloud-dataengine/serving-shuffle/ttl=3d/d24bfe88e472d688_shuffle_cache/00\\\\\" } } sink_id: 116\"\\nstatus_proto {\\n code: 11\\n space: \"generic\"\\n message: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\n}\\nerror_details {\\n runtime_error {\\n query_error {\\n }\\n }\\n debug_info {\\n error_message_template: \"Encounter an error when invoking the function: $0\"\\n error_id: 3276645920\\n }\\n}\\n errorProto=code: \"QUERY_ERROR\"\\nargument: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f10841_0000_29c5_964c_d4f547f7fa14.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\nlocation_type: OTHER\\nlocation: \"query\"\\n\\n\\tat com.google.cloud.helix.common.Exceptions.fromProto(Exceptions.java:1983)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl.mapDremelErrorsTohelixException(QueryExecutorImpl.java:1235)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:810)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:736)\\n\\tat com.google.net.rpc3.stream.RpcMessageCallback$ForwardingRpcMessageCallback.onMessage(RpcMessageCallback.java:128)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.processMessageUnlocked(RpcStreamInternalContext.java:1861)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksInternalUnlocked(RpcStreamInternalContext.java:2916)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksUnlocked(RpcStreamInternalContext.java:2842)\\n\\tat com.google.net.eventmanager.AbstractFutureTask$Sync.innerRun(AbstractFutureTask.java:259)\\n\\tat com.google.net.eventmanager.AbstractFutureTask.run(AbstractFutureTask.java:120)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTaskTraced(EventManagerImpl.java:903)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTask(EventManagerImpl.java:895)\\n\\tat com.google.net.eventmanager.EventManagerImpl.internalRunWorkerLoop(EventManagerImpl.java:1322)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runWorkerLoop(EventManagerImpl.java:1213)\\n\\tat com.google.net.eventmanager.WorkerThreadInfo.runWorkerLoop(WorkerThreadInfo.java:153)\\n\\tat com.google.net.eventmanager.EventManagerImpl$WorkerThread.run(EventManagerImpl.java:2006)\\n'}]" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[11]\u001b[39m\u001b[32m, line 72\u001b[39m\n\u001b[32m 68\u001b[39m df_image[\"blurred\"] = apply_transformation(\n\u001b[32m 69\u001b[39m df_image[\u001b[33m\"image\"\u001b[39m], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n\u001b[32m 70\u001b[39m image_blur, \u001b[32m20\u001b[39m, \u001b[32m20\u001b[39m\n\u001b[32m 71\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m72\u001b[39m df_image[[\u001b[33m\"image\"\u001b[39m, \u001b[33m\"blurred\"\u001b[39m]]\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/decorator.py:235\u001b[39m, in \u001b[36mdecorate..fun\u001b[39m\u001b[34m(*args, **kw)\u001b[39m\n\u001b[32m 233\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwsyntax:\n\u001b[32m 234\u001b[39m args, kw = fix(args, kw, sig)\n\u001b[32m--> \u001b[39m\u001b[32m235\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcaller\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextras\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183\u001b[39m, in \u001b[36mmethod_logger..outer_wrapper..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 180\u001b[39m _call_stack.append(full_method_name)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m183\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mNotImplementedError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Log method parameters that are implemented in pandas but either missing (TypeError)\u001b[39;00m\n\u001b[32m 186\u001b[39m \u001b[38;5;66;03m# or not fully supported (NotImplementedError) in BigFrames.\u001b[39;00m\n\u001b[32m 187\u001b[39m \u001b[38;5;66;03m# Logging is currently supported only when we can access the bqclient through\u001b[39;00m\n\u001b[32m 188\u001b[39m \u001b[38;5;66;03m# _block.session.bqclient.\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_call_stack) == \u001b[32m1\u001b[39m:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:807\u001b[39m, in \u001b[36mDataFrame.__repr__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 802\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m formatter.repr_query_job(\u001b[38;5;28mself\u001b[39m._compute_dry_run())\n\u001b[32m 804\u001b[39m \u001b[38;5;66;03m# TODO(swast): pass max_columns and get the true column count back. Maybe\u001b[39;00m\n\u001b[32m 805\u001b[39m \u001b[38;5;66;03m# get 1 more column than we have requested so that pandas can add the\u001b[39;00m\n\u001b[32m 806\u001b[39m \u001b[38;5;66;03m# ... for us?\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m807\u001b[39m pandas_df, row_count, query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_block\u001b[49m\u001b[43m.\u001b[49m\u001b[43mretrieve_repr_request_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 808\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_results\u001b[49m\n\u001b[32m 809\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 811\u001b[39m \u001b[38;5;28mself\u001b[39m._set_internal_query_job(query_job)\n\u001b[32m 812\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mbigframes\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdisplay\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m plaintext\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/blocks.py:1615\u001b[39m, in \u001b[36mBlock.retrieve_repr_request_results\u001b[39m\u001b[34m(self, max_results)\u001b[39m\n\u001b[32m 1613\u001b[39m \u001b[38;5;66;03m# head caches full underlying expression, so row_count will be free after\u001b[39;00m\n\u001b[32m 1614\u001b[39m executor = \u001b[38;5;28mself\u001b[39m.session._executor\n\u001b[32m-> \u001b[39m\u001b[32m1615\u001b[39m \u001b[43mexecutor\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcached\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1616\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1617\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecutors\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCacheConfig\u001b[49m\u001b[43m(\u001b[49m\u001b[43moptimize_for\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhead\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mif_cached\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreuse-strict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1618\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1619\u001b[39m head_result = \u001b[38;5;28mself\u001b[39m.session._executor.execute(\n\u001b[32m 1620\u001b[39m \u001b[38;5;28mself\u001b[39m.expr.slice(start=\u001b[38;5;28;01mNone\u001b[39;00m, stop=max_results, step=\u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[32m 1621\u001b[39m execution_spec.ExecutionSpec(\n\u001b[32m (...)\u001b[39m\u001b[32m 1624\u001b[39m ),\n\u001b[32m 1625\u001b[39m )\n\u001b[32m 1626\u001b[39m row_count = (\n\u001b[32m 1627\u001b[39m \u001b[38;5;28mself\u001b[39m.session._executor.execute(\n\u001b[32m 1628\u001b[39m \u001b[38;5;28mself\u001b[39m.expr.row_count(),\n\u001b[32m (...)\u001b[39m\u001b[32m 1635\u001b[39m .to_py_scalar()\n\u001b[32m 1636\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:348\u001b[39m, in \u001b[36mBigQueryCachingExecutor.cached\u001b[39m\u001b[34m(self, array_value, config)\u001b[39m\n\u001b[32m 346\u001b[39m \u001b[38;5;28mself\u001b[39m._cache_with_session_awareness(array_value)\n\u001b[32m 347\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m config.optimize_for == \u001b[33m\"\u001b[39m\u001b[33mhead\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m348\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_cache_with_offsets\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 350\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(config.optimize_for, executor.HierarchicalKey)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:504\u001b[39m, in \u001b[36mBigQueryCachingExecutor._cache_with_offsets\u001b[39m\u001b[34m(self, array_value)\u001b[39m\n\u001b[32m 500\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Executes the query and uses the resulting table to rewrite future executions.\"\"\"\u001b[39;00m\n\u001b[32m 501\u001b[39m execution_spec = ex_spec.ExecutionSpec(\n\u001b[32m 502\u001b[39m destination_spec=ex_spec.CacheSpec(cluster_cols=\u001b[38;5;28mtuple\u001b[39m())\n\u001b[32m 503\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m504\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 505\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 506\u001b[39m \u001b[43m \u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 507\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:169\u001b[39m, in \u001b[36mBigQueryCachingExecutor.execute\u001b[39m\u001b[34m(self, array_value, execution_spec)\u001b[39m\n\u001b[32m 162\u001b[39m \u001b[38;5;28mself\u001b[39m._publisher.publish(\n\u001b[32m 163\u001b[39m bigframes.core.events.ExecutionFinished(\n\u001b[32m 164\u001b[39m result=result,\n\u001b[32m 165\u001b[39m )\n\u001b[32m 166\u001b[39m )\n\u001b[32m 167\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[32m--> \u001b[39m\u001b[32m169\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_execute_plan_gbq\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 170\u001b[39m \u001b[43m \u001b[49m\u001b[43marray_value\u001b[49m\u001b[43m.\u001b[49m\u001b[43mnode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 171\u001b[39m \u001b[43m \u001b[49m\u001b[43mordered\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mordered\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[43m \u001b[49m\u001b[43mpeek\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpeek\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 173\u001b[39m \u001b[43m \u001b[49m\u001b[43mcache_spec\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdestination_spec\u001b[49m\n\u001b[32m 174\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdestination_spec\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mex_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCacheSpec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 175\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 176\u001b[39m \u001b[43m \u001b[49m\u001b[43mmust_create_table\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mexecution_spec\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpromise_under_10gb\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 177\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 178\u001b[39m \u001b[38;5;66;03m# post steps: export\u001b[39;00m\n\u001b[32m 179\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(execution_spec.destination_spec, ex_spec.GcsOutputSpec):\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:687\u001b[39m, in \u001b[36mBigQueryCachingExecutor._execute_plan_gbq\u001b[39m\u001b[34m(self, plan, ordered, peek, cache_spec, must_create_table)\u001b[39m\n\u001b[32m 679\u001b[39m iterator, query_job = \u001b[38;5;28mself\u001b[39m._run_execute_query(\n\u001b[32m 680\u001b[39m sql=compiled.sql,\n\u001b[32m 681\u001b[39m job_config=job_config,\n\u001b[32m 682\u001b[39m query_with_job=(destination_table \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[32m 683\u001b[39m session=plan.session,\n\u001b[32m 684\u001b[39m )\n\u001b[32m 685\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m iterator, query_job, compiled\n\u001b[32m--> \u001b[39m\u001b[32m687\u001b[39m iterator, query_job, compiled = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_compile_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_with_compiler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 689\u001b[39m \u001b[38;5;66;03m# might have more columns than og schema, for hidden ordering columns\u001b[39;00m\n\u001b[32m 690\u001b[39m compiled_schema = compiled.sql_schema\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:451\u001b[39m, in \u001b[36mBigQueryCachingExecutor._compile_with_fallback\u001b[39m\u001b[34m(self, run_fn)\u001b[39m\n\u001b[32m 449\u001b[39m compiler_id = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00muuid.uuid1().hex[:\u001b[32m12\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 450\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m451\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrun_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43msqlglot\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompiler_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompiler_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 452\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m google.cloud.exceptions.BadRequest \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 453\u001b[39m msg = bfe.format_message(\n\u001b[32m 454\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCompiler ID \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m: BadRequest on sqlglot. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 455\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFalling back to ibis. Details: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me.message\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 456\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:679\u001b[39m, in \u001b[36mBigQueryCachingExecutor._execute_plan_gbq..run_with_compiler\u001b[39m\u001b[34m(compiler_name, compiler_id)\u001b[39m\n\u001b[32m 675\u001b[39m job_config.labels[\u001b[33m\"\u001b[39m\u001b[33mbigframes-dtypes\u001b[39m\u001b[33m\"\u001b[39m] = compiled.encoded_type_refs\n\u001b[32m 676\u001b[39m job_config.labels[\u001b[33m\"\u001b[39m\u001b[33mbigframes-compiler\u001b[39m\u001b[33m\"\u001b[39m] = (\n\u001b[32m 677\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompiler_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m compiler_id \u001b[38;5;28;01melse\u001b[39;00m compiler_name\n\u001b[32m 678\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m679\u001b[39m iterator, query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_run_execute_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 680\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompiled\u001b[49m\u001b[43m.\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 681\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 682\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdestination_table\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 683\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[43mplan\u001b[49m\u001b[43m.\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 684\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 685\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m iterator, query_job, compiled\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/bq_caching_executor.py:378\u001b[39m, in \u001b[36mBigQueryCachingExecutor._run_execute_query\u001b[39m\u001b[34m(self, sql, job_config, query_with_job, session)\u001b[39m\n\u001b[32m 375\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 376\u001b[39m \u001b[38;5;66;03m# Trick the type checker into thinking we got a literal.\u001b[39;00m\n\u001b[32m 377\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m query_with_job:\n\u001b[32m--> \u001b[39m\u001b[32m378\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbq_io\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstart_query_with_client\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 379\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbqclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 380\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 381\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 382\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 383\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 384\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 385\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 386\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 387\u001b[39m \u001b[43m \u001b[49m\u001b[43mpublisher\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_publisher\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 388\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 389\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 390\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 391\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m bq_io.start_query_with_client(\n\u001b[32m 392\u001b[39m \u001b[38;5;28mself\u001b[39m.bqclient,\n\u001b[32m 393\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 401\u001b[39m session=session,\n\u001b[32m 402\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/_io/bigquery/__init__.py:395\u001b[39m, in \u001b[36mstart_query_with_client\u001b[39m\u001b[34m(bq_client, sql, job_config, location, project, timeout, metrics, query_with_job, job_retry, publisher, session)\u001b[39m\n\u001b[32m 385\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 386\u001b[39m publisher.publish(\n\u001b[32m 387\u001b[39m bigframes.core.events.BigQuerySentEvent(\n\u001b[32m 388\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 393\u001b[39m )\n\u001b[32m 394\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m395\u001b[39m results_iterator = \u001b[43mquery_job\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 397\u001b[39m publisher.publish(\n\u001b[32m 398\u001b[39m bigframes.core.events.BigQueryFinishedEvent(\n\u001b[32m 399\u001b[39m billing_project=query_job.project,\n\u001b[32m (...)\u001b[39m\u001b[32m 409\u001b[39m )\n\u001b[32m 410\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1797\u001b[39m, in \u001b[36mQueryJob.result\u001b[39m\u001b[34m(self, page_size, max_results, retry, timeout, start_index, job_retry)\u001b[39m\n\u001b[32m 1792\u001b[39m remaining_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1794\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remaining_timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1795\u001b[39m \u001b[38;5;66;03m# Since is_job_done() calls jobs.getQueryResults, which is a\u001b[39;00m\n\u001b[32m 1796\u001b[39m \u001b[38;5;66;03m# long-running API, don't delay the next request at all.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1797\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mis_job_done\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m 1798\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m 1799\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1800\u001b[39m \u001b[38;5;66;03m# Use a monotonic clock since we don't actually care about\u001b[39;00m\n\u001b[32m 1801\u001b[39m \u001b[38;5;66;03m# daylight savings or similar, just the elapsed time.\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1766\u001b[39m, in \u001b[36mQueryJob.result..is_job_done\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 1760\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 1762\u001b[39m \u001b[38;5;66;03m# Call jobs.getQueryResults with max results set to 0 just to\u001b[39;00m\n\u001b[32m 1763\u001b[39m \u001b[38;5;66;03m# wait for the query to finish. Unlike most methods,\u001b[39;00m\n\u001b[32m 1764\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults hangs as long as it can to ensure we\u001b[39;00m\n\u001b[32m 1765\u001b[39m \u001b[38;5;66;03m# know when the query has finished as soon as possible.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1766\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_reload_query_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mreload_query_results_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1768\u001b[39m \u001b[38;5;66;03m# Even if the query is finished now according to\u001b[39;00m\n\u001b[32m 1769\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults, we'll want to reload the job status if\u001b[39;00m\n\u001b[32m 1770\u001b[39m \u001b[38;5;66;03m# it's not already DONE.\u001b[39;00m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1558\u001b[39m, in \u001b[36mQueryJob._reload_query_results\u001b[39m\u001b[34m(self, retry, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 1555\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(transport_timeout, (\u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mint\u001b[39m)):\n\u001b[32m 1556\u001b[39m transport_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1558\u001b[39m \u001b[38;5;28mself\u001b[39m._query_results = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_get_query_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1559\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mjob_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1560\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1561\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mproject\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1562\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1563\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1564\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtransport_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1565\u001b[39m \u001b[43m \u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1566\u001b[39m \u001b[43m \u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1567\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:2113\u001b[39m, in \u001b[36mClient._get_query_results\u001b[39m\u001b[34m(self, job_id, retry, project, timeout_ms, location, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 2109\u001b[39m \u001b[38;5;66;03m# This call is typically made in a polling loop that checks whether the\u001b[39;00m\n\u001b[32m 2110\u001b[39m \u001b[38;5;66;03m# job is complete (from QueryJob.done(), called ultimately from\u001b[39;00m\n\u001b[32m 2111\u001b[39m \u001b[38;5;66;03m# QueryJob.result()). So we don't need to poll here.\u001b[39;00m\n\u001b[32m 2112\u001b[39m span_attributes = {\u001b[33m\"\u001b[39m\u001b[33mpath\u001b[39m\u001b[33m\"\u001b[39m: path}\n\u001b[32m-> \u001b[39m\u001b[32m2113\u001b[39m resource = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_api\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2114\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2115\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mBigQuery.getQueryResults\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2116\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m=\u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2117\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mGET\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2118\u001b[39m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2119\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2120\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2121\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2122\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _QueryResults.from_api_repr(resource)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:861\u001b[39m, in \u001b[36mClient._call_api\u001b[39m\u001b[34m(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)\u001b[39m\n\u001b[32m 857\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m span_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 858\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m create_span(\n\u001b[32m 859\u001b[39m name=span_name, attributes=span_attributes, client=\u001b[38;5;28mself\u001b[39m, job_ref=job_ref\n\u001b[32m 860\u001b[39m ):\n\u001b[32m--> \u001b[39m\u001b[32m861\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 863\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m call()\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:482\u001b[39m, in \u001b[36mJSONConnection.api_request\u001b[39m\u001b[34m(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)\u001b[39m\n\u001b[32m 479\u001b[39m data = json.dumps(data)\n\u001b[32m 480\u001b[39m content_type = \u001b[33m\"\u001b[39m\u001b[33mapplication/json\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m--> \u001b[39m\u001b[32m482\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 483\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 484\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 485\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 486\u001b[39m \u001b[43m \u001b[49m\u001b[43mcontent_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcontent_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 487\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 488\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget_object\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_target_object\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 489\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 490\u001b[39m \u001b[43m \u001b[49m\u001b[43mextra_api_info\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_api_info\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 491\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[32m200\u001b[39m <= response.status_code < \u001b[32m300\u001b[39m:\n\u001b[32m 494\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m exceptions.from_http_response(response)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:341\u001b[39m, in \u001b[36mJSONConnection._make_request\u001b[39m\u001b[34m(self, method, url, data, content_type, headers, target_object, timeout, extra_api_info)\u001b[39m\n\u001b[32m 338\u001b[39m headers[CLIENT_INFO_HEADER] = \u001b[38;5;28mself\u001b[39m.user_agent\n\u001b[32m 339\u001b[39m headers[\u001b[33m\"\u001b[39m\u001b[33mUser-Agent\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mself\u001b[39m.user_agent\n\u001b[32m--> \u001b[39m\u001b[32m341\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_do_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 342\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_object\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\n\u001b[32m 343\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:379\u001b[39m, in \u001b[36mJSONConnection._do_request\u001b[39m\u001b[34m(self, method, url, headers, data, target_object, timeout)\u001b[39m\n\u001b[32m 345\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_do_request\u001b[39m(\n\u001b[32m 346\u001b[39m \u001b[38;5;28mself\u001b[39m, method, url, headers, data, target_object, timeout=_DEFAULT_TIMEOUT\n\u001b[32m 347\u001b[39m ): \u001b[38;5;66;03m# pylint: disable=unused-argument\u001b[39;00m\n\u001b[32m 348\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Low-level helper: perform the actual API request over HTTP.\u001b[39;00m\n\u001b[32m 349\u001b[39m \n\u001b[32m 350\u001b[39m \u001b[33;03m Allows batch context managers to override and defer a request.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 377\u001b[39m \u001b[33;03m :returns: The HTTP response.\u001b[39;00m\n\u001b[32m 378\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m379\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mhttp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 380\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\n\u001b[32m 381\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/auth/transport/requests.py:543\u001b[39m, in \u001b[36mAuthorizedSession.request\u001b[39m\u001b[34m(self, method, url, data, headers, max_allowed_time, timeout, **kwargs)\u001b[39m\n\u001b[32m 541\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m TimeoutGuard(remaining_time) \u001b[38;5;28;01mas\u001b[39;00m guard:\n\u001b[32m 542\u001b[39m _helpers.request_log(_LOGGER, method, url, data, headers)\n\u001b[32m--> \u001b[39m\u001b[32m543\u001b[39m response = \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mAuthorizedSession\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 544\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 545\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 546\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 547\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrequest_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 548\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 549\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m 550\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 551\u001b[39m remaining_time = guard.remaining_timeout\n\u001b[32m 553\u001b[39m \u001b[38;5;66;03m# If the response indicated that the credentials needed to be\u001b[39;00m\n\u001b[32m 554\u001b[39m \u001b[38;5;66;03m# refreshed, then refresh the credentials and re-attempt the\u001b[39;00m\n\u001b[32m 555\u001b[39m \u001b[38;5;66;03m# request.\u001b[39;00m\n\u001b[32m 556\u001b[39m \u001b[38;5;66;03m# A stored token may expire between the time it is retrieved and\u001b[39;00m\n\u001b[32m 557\u001b[39m \u001b[38;5;66;03m# the time the request is made, so we may need to try twice.\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/requests/sessions.py:592\u001b[39m, in \u001b[36mSession.request\u001b[39m\u001b[34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[39m\n\u001b[32m 587\u001b[39m send_kwargs = {\n\u001b[32m 588\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mtimeout\u001b[39m\u001b[33m\"\u001b[39m: timeout,\n\u001b[32m 589\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mallow_redirects\u001b[39m\u001b[33m\"\u001b[39m: allow_redirects,\n\u001b[32m 590\u001b[39m }\n\u001b[32m 591\u001b[39m send_kwargs.update(settings)\n\u001b[32m--> \u001b[39m\u001b[32m592\u001b[39m resp = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 594\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/requests/sessions.py:706\u001b[39m, in \u001b[36mSession.send\u001b[39m\u001b[34m(self, request, **kwargs)\u001b[39m\n\u001b[32m 703\u001b[39m start = preferred_clock()\n\u001b[32m 705\u001b[39m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m706\u001b[39m r = \u001b[43madapter\u001b[49m\u001b[43m.\u001b[49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 708\u001b[39m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[32m 709\u001b[39m elapsed = preferred_clock() - start\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/requests/adapters.py:645\u001b[39m, in \u001b[36mHTTPAdapter.send\u001b[39m\u001b[34m(self, request, stream, timeout, verify, cert, proxies)\u001b[39m\n\u001b[32m 642\u001b[39m timeout = TimeoutSauce(connect=timeout, read=timeout)\n\u001b[32m 644\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m645\u001b[39m resp = \u001b[43mconn\u001b[49m\u001b[43m.\u001b[49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 646\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 647\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 648\u001b[39m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m.\u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 649\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m.\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 650\u001b[39m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 651\u001b[39m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 652\u001b[39m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 653\u001b[39m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 654\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 655\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 656\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 657\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 659\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[32m 660\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request=request)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/urllib3/connectionpool.py:787\u001b[39m, in \u001b[36mHTTPConnectionPool.urlopen\u001b[39m\u001b[34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[39m\n\u001b[32m 784\u001b[39m response_conn = conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 786\u001b[39m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m787\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 788\u001b[39m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 789\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 790\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 791\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 792\u001b[39m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 793\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 794\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 795\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 796\u001b[39m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[43m=\u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 797\u001b[39m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 798\u001b[39m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 799\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 800\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 802\u001b[39m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[32m 803\u001b[39m clean_exit = \u001b[38;5;28;01mTrue\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/urllib3/connectionpool.py:534\u001b[39m, in \u001b[36mHTTPConnectionPool._make_request\u001b[39m\u001b[34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[39m\n\u001b[32m 532\u001b[39m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[32m 533\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m534\u001b[39m response = \u001b[43mconn\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 535\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 536\u001b[39m \u001b[38;5;28mself\u001b[39m._raise_timeout(err=e, url=url, timeout_value=read_timeout)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/urllib3/connection.py:571\u001b[39m, in \u001b[36mHTTPConnection.getresponse\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 568\u001b[39m _shutdown = \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m.sock, \u001b[33m\"\u001b[39m\u001b[33mshutdown\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m 570\u001b[39m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m571\u001b[39m httplib_response = \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 573\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 574\u001b[39m assert_header_parsing(httplib_response.msg)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/http/client.py:1428\u001b[39m, in \u001b[36mHTTPConnection.getresponse\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1426\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1427\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1428\u001b[39m \u001b[43mresponse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1429\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[32m 1430\u001b[39m \u001b[38;5;28mself\u001b[39m.close()\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/http/client.py:331\u001b[39m, in \u001b[36mHTTPResponse.begin\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 329\u001b[39m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[32m 330\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m331\u001b[39m version, status, reason = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 332\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m status != CONTINUE:\n\u001b[32m 333\u001b[39m \u001b[38;5;28;01mbreak\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/http/client.py:292\u001b[39m, in \u001b[36mHTTPResponse._read_status\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 291\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m292\u001b[39m line = \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[33m\"\u001b[39m\u001b[33miso-8859-1\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 293\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) > _MAXLINE:\n\u001b[32m 294\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[33m\"\u001b[39m\u001b[33mstatus line\u001b[39m\u001b[33m\"\u001b[39m)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/socket.py:719\u001b[39m, in \u001b[36mSocketIO.readinto\u001b[39m\u001b[34m(self, b)\u001b[39m\n\u001b[32m 717\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mcannot read from timed out object\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 718\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m719\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sock\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 720\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[32m 721\u001b[39m \u001b[38;5;28mself\u001b[39m._timeout_occurred = \u001b[38;5;28;01mTrue\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/ssl.py:1304\u001b[39m, in \u001b[36mSSLSocket.recv_into\u001b[39m\u001b[34m(self, buffer, nbytes, flags)\u001b[39m\n\u001b[32m 1300\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m flags != \u001b[32m0\u001b[39m:\n\u001b[32m 1301\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 1302\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 1303\u001b[39m \u001b[38;5;28mself\u001b[39m.\u001b[34m__class__\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1304\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1305\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1306\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m().recv_into(buffer, nbytes, flags)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.0/lib/python3.13/ssl.py:1138\u001b[39m, in \u001b[36mSSLSocket.read\u001b[39m\u001b[34m(self, len, buffer)\u001b[39m\n\u001b[32m 1136\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 1137\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1138\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sslobj\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1139\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1140\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._sslobj.read(\u001b[38;5;28mlen\u001b[39m)\n", - "\u001b[31mKeyboardInterrupt\u001b[39m: " - ] - } - ] + "outputs": [] }, { "id": "11fcc6ec", @@ -697,24 +419,8 @@ "metadata": { "id": "mRUGfcaFVW-3" }, - "execution_count": 9, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", - "default model will be removed in BigFrames 3.0. Please supply an\n", - "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/__init__.py:437: FutureWarning: You are using the BigFrames session default connection: bigframes-\n", - "default-connection, which can be different from the\n", - "BigQuery project default connection. This default\n", - "connection may change in the future.\n", - " warnings.warn(msg, category=FutureWarning)\n" - ] - } - ] + "execution_count": null, + "outputs": [] }, { "id": "13d7cb93", @@ -732,69 +438,8 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, - "execution_count": 10, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "ename": "BadRequest", - "evalue": "400 GET https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-dev/queries/5e86e836-354e-439f-8300-f6186ff77b79?maxResults=0&location=US&prettyPrint=false: Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named 'cv2'. Traceback (most recent call last):\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\n result = [\n ^\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \n func(*row)\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 43, in bigframes_handler\n return image_blur(*args)\n ^^^^^^^^^^^^^^^^^\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 5, in image_blur\n import cv2 as cv\nModuleNotFoundError: No module named 'cv2'\n\n\nLocation: US\nJob ID: 5e86e836-354e-439f-8300-f6186ff77b79\n [{'@type': 'type.googleapis.com/google.rpc.DebugInfo', 'detail': '[INVALID_INPUT] message=QUERY_ERROR: [Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\'cv2\\'. Traceback (most recent call last):\\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\\n result = [\\n ^\\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \\n func(*row)\\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 43, in bigframes_handler\\n return image_blur(*args)\\n ^^^^^^^^^^^^^^^^^\\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 5, in image_blur\\n import cv2 as cv\\nModuleNotFoundError: No module named \\'cv2\\'\\n] debug=code: \\t BAD_QUERY\\ndescription: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\ncause: USER_ERROR\\naddress: \"http://jfbxw15.prod.google.com:4901/task?handle=logs.21974.serving.shard-hals.cloud-dataengine.14302809449245 Partition description: __SHUFFLE1/256 ShuffleByTableDef \\\\\\'__SHUFFLE0\\\\\\' shuffle \\\\t { spec { num_sources: 1 num_sinks: 500 } partitioning { rand { num_partitions: 500 } } config { group_id: 173353402696091398 shuffle_id: 0 precreated_mfs_path: \\\\\"/mfs/jf/home/cloud-dataengine/serving-shuffle/ttl=3d/a7f7e000544e4706_shuffle_cache/02\\\\\" } } sink_id: 256\"\\nstatus_proto {\\n code: 11\\n space: \"generic\"\\n message: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\n}\\nerror_details {\\n runtime_error {\\n query_error {\\n }\\n }\\n debug_info {\\n error_message_template: \"Encounter an error when invoking the function: $0\"\\n error_id: 3276645920\\n }\\n}\\n errorProto=code: \"QUERY_ERROR\"\\nargument: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\nlocation_type: OTHER\\nlocation: \"query\"\\n\\n\\tat com.google.cloud.helix.common.Exceptions.fromProto(Exceptions.java:1983)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl.mapDremelErrorsTohelixException(QueryExecutorImpl.java:1235)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:810)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:736)\\n\\tat com.google.net.rpc3.stream.RpcMessageCallback$ForwardingRpcMessageCallback.onMessage(RpcMessageCallback.java:128)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.processMessageUnlocked(RpcStreamInternalContext.java:1861)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksInternalUnlocked(RpcStreamInternalContext.java:2916)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksUnlocked(RpcStreamInternalContext.java:2842)\\n\\tat com.google.net.eventmanager.AbstractFutureTask$Sync.innerRun(AbstractFutureTask.java:259)\\n\\tat com.google.net.eventmanager.AbstractFutureTask.run(AbstractFutureTask.java:120)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTaskTraced(EventManagerImpl.java:903)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTask(EventManagerImpl.java:895)\\n\\tat com.google.net.eventmanager.EventManagerImpl.internalRunWorkerLoop(EventManagerImpl.java:1322)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runWorkerLoop(EventManagerImpl.java:1213)\\n\\tat com.google.net.eventmanager.WorkerThreadInfo.runWorkerLoop(WorkerThreadInfo.java:153)\\n\\tat com.google.net.eventmanager.EventManagerImpl$WorkerThread.run(EventManagerImpl.java:2006)\\n'}]", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mBadRequest\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Ask the same question on the images\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m answer = gemini.predict(df_image, prompt=[\u001b[33m\"what item is it?\"\u001b[39m, df_image[\u001b[33m\"image\"\u001b[39m]])\n\u001b[32m 3\u001b[39m answer[[\u001b[33m\"ml_generate_text_llm_result\"\u001b[39m, \u001b[33m\"image\"\u001b[39m]]\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183\u001b[39m, in \u001b[36mmethod_logger..outer_wrapper..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 180\u001b[39m _call_stack.append(full_method_name)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m183\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mNotImplementedError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Log method parameters that are implemented in pandas but either missing (TypeError)\u001b[39;00m\n\u001b[32m 186\u001b[39m \u001b[38;5;66;03m# or not fully supported (NotImplementedError) in BigFrames.\u001b[39;00m\n\u001b[32m 187\u001b[39m \u001b[38;5;66;03m# Logging is currently supported only when we can access the bqclient through\u001b[39;00m\n\u001b[32m 188\u001b[39m \u001b[38;5;66;03m# _block.session.bqclient.\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_call_stack) == \u001b[32m1\u001b[39m:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/ml/llm.py:764\u001b[39m, in \u001b[36mGeminiTextGenerator.predict\u001b[39m\u001b[34m(self, X, temperature, max_output_tokens, top_k, top_p, ground_with_google_search, max_retries, prompt, output_schema)\u001b[39m\n\u001b[32m 756\u001b[39m options[\u001b[33m\"\u001b[39m\u001b[33moutput_schema\u001b[39m\u001b[33m\"\u001b[39m] = output_schema\n\u001b[32m 757\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._predict_and_retry(\n\u001b[32m 758\u001b[39m core.BqmlModel.generate_table_tvf,\n\u001b[32m 759\u001b[39m X,\n\u001b[32m 760\u001b[39m options=options,\n\u001b[32m 761\u001b[39m max_retries=max_retries,\n\u001b[32m 762\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m764\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predict_and_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 765\u001b[39m \u001b[43m \u001b[49m\u001b[43mcore\u001b[49m\u001b[43m.\u001b[49m\u001b[43mBqmlModel\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate_text_tvf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 766\u001b[39m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 767\u001b[39m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 768\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 769\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/ml/base.py:282\u001b[39m, in \u001b[36mRetriableRemotePredictor._predict_and_retry\u001b[39m\u001b[34m(self, bqml_model_predict_tvf, X, options, max_retries)\u001b[39m\n\u001b[32m 279\u001b[39m warnings.warn(msg, category=\u001b[38;5;167;01mRuntimeWarning\u001b[39;00m)\n\u001b[32m 280\u001b[39m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m282\u001b[39m df = \u001b[43mbqml_model_predict_tvf\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtvf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_bqml_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf_fail\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 284\u001b[39m success = df[bqml_model_predict_tvf.status_col].str.len() == \u001b[32m0\u001b[39m\n\u001b[32m 285\u001b[39m df_succ = df[success]\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/ml/core.py:197\u001b[39m, in \u001b[36mBqmlModel.generate_text\u001b[39m\u001b[34m(self, input_data, options)\u001b[39m\n\u001b[32m 191\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mgenerate_text\u001b[39m(\n\u001b[32m 192\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 193\u001b[39m input_data: bpd.DataFrame,\n\u001b[32m 194\u001b[39m options: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Union[\u001b[38;5;28mint\u001b[39m, \u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mbool\u001b[39m]],\n\u001b[32m 195\u001b[39m ) -> bpd.DataFrame:\n\u001b[32m 196\u001b[39m options[\u001b[33m\"\u001b[39m\u001b[33mflatten_json_output\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m197\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_apply_ml_tvf\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 198\u001b[39m \u001b[43m \u001b[49m\u001b[43minput_data\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 199\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msource_sql\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sql_generator\u001b[49m\u001b[43m.\u001b[49m\u001b[43mml_generate_text\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 200\u001b[39m \u001b[43m \u001b[49m\u001b[43msource_sql\u001b[49m\u001b[43m=\u001b[49m\u001b[43msource_sql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 201\u001b[39m \u001b[43m \u001b[49m\u001b[43mstruct_options\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 202\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 203\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/ml/core.py:103\u001b[39m, in \u001b[36mBqmlModel._apply_ml_tvf\u001b[39m\u001b[34m(self, input_data, apply_sql_tvf)\u001b[39m\n\u001b[32m 98\u001b[39m input_sql, index_col_ids, index_labels = input_data._to_sql_query(\n\u001b[32m 99\u001b[39m include_index=\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 100\u001b[39m )\n\u001b[32m 102\u001b[39m result_sql = apply_sql_tvf(input_sql)\n\u001b[32m--> \u001b[39m\u001b[32m103\u001b[39m df = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_session\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_gbq_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 104\u001b[39m \u001b[43m \u001b[49m\u001b[43mresult_sql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 105\u001b[39m \u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[43m=\u001b[49m\u001b[43mindex_col_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 106\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Many ML methods use nested JSON, which isn't yet compatible with\u001b[39;49;00m\n\u001b[32m 107\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# joining local results. Also, there is a chance that the results\u001b[39;49;00m\n\u001b[32m 108\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# are greater than 10 GB.\u001b[39;49;00m\n\u001b[32m 109\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# TODO(b/395912450): Once the limitations with local data are\u001b[39;49;00m\n\u001b[32m 110\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# resolved, consider setting allow_large_results only when expected\u001b[39;49;00m\n\u001b[32m 111\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# data size is large.\u001b[39;49;00m\n\u001b[32m 112\u001b[39m \u001b[43m \u001b[49m\u001b[43mallow_large_results\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 113\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 114\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m df._has_index:\n\u001b[32m 115\u001b[39m df.index.names = index_labels\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183\u001b[39m, in \u001b[36mmethod_logger..outer_wrapper..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 180\u001b[39m _call_stack.append(full_method_name)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m183\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mNotImplementedError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Log method parameters that are implemented in pandas but either missing (TypeError)\u001b[39;00m\n\u001b[32m 186\u001b[39m \u001b[38;5;66;03m# or not fully supported (NotImplementedError) in BigFrames.\u001b[39;00m\n\u001b[32m 187\u001b[39m \u001b[38;5;66;03m# Logging is currently supported only when we can access the bqclient through\u001b[39;00m\n\u001b[32m 188\u001b[39m \u001b[38;5;66;03m# _block.session.bqclient.\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_call_stack) == \u001b[32m1\u001b[39m:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/__init__.py:780\u001b[39m, in \u001b[36mSession.read_gbq_query\u001b[39m\u001b[34m(self, query, index_col, columns, configuration, max_results, use_cache, col_order, filters, dry_run, allow_large_results)\u001b[39m\n\u001b[32m 777\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m allow_large_results \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 778\u001b[39m allow_large_results = bigframes._config.options._allow_large_results\n\u001b[32m--> \u001b[39m\u001b[32m780\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_loader\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_gbq_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore # for dry_run overload\u001b[39;49;00m\n\u001b[32m 781\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m=\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 782\u001b[39m \u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[43m=\u001b[49m\u001b[43mindex_col\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 783\u001b[39m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 784\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfiguration\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfiguration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 785\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_results\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmax_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 786\u001b[39m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 787\u001b[39m \u001b[43m \u001b[49m\u001b[43mfilters\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfilters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 788\u001b[39m \u001b[43m \u001b[49m\u001b[43mdry_run\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdry_run\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 789\u001b[39m \u001b[43m \u001b[49m\u001b[43mallow_large_results\u001b[49m\u001b[43m=\u001b[49m\u001b[43mallow_large_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 790\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/loader.py:1260\u001b[39m, in \u001b[36mGbqDataLoader.read_gbq_query\u001b[39m\u001b[34m(self, query, index_col, columns, configuration, max_results, use_cache, filters, dry_run, force_total_order, allow_large_results)\u001b[39m\n\u001b[32m 1257\u001b[39m \u001b[38;5;66;03m# TODO(b/421161077): If an explicit destination table is set in\u001b[39;00m\n\u001b[32m 1258\u001b[39m \u001b[38;5;66;03m# configuration, should we respect that setting?\u001b[39;00m\n\u001b[32m 1259\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m allow_large_results:\n\u001b[32m-> \u001b[39m\u001b[32m1260\u001b[39m destination, query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_query_to_destination\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1261\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1262\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# No cluster candidates as user query might not be clusterable\u001b[39;49;00m\n\u001b[32m 1263\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# (eg because of ORDER BY clause)\u001b[39;49;00m\n\u001b[32m 1264\u001b[39m \u001b[43m \u001b[49m\u001b[43mcluster_candidates\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1265\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfiguration\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfiguration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1266\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1267\u001b[39m query_job_for_metrics = query_job\n\u001b[32m 1268\u001b[39m rows: Optional[google.cloud.bigquery.table.RowIterator] = \u001b[38;5;28;01mNone\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/loader.py:1435\u001b[39m, in \u001b[36mGbqDataLoader._query_to_destination\u001b[39m\u001b[34m(self, query, cluster_candidates, configuration, do_clustering)\u001b[39m\n\u001b[32m 1429\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m query_job.destination, query_job\n\u001b[32m 1430\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m google.api_core.exceptions.BadRequest:\n\u001b[32m 1431\u001b[39m \u001b[38;5;66;03m# Some SELECT statements still aren't compatible with cluster\u001b[39;00m\n\u001b[32m 1432\u001b[39m \u001b[38;5;66;03m# tables as the destination. For example, if the query has a\u001b[39;00m\n\u001b[32m 1433\u001b[39m \u001b[38;5;66;03m# top-level ORDER BY, this conflicts with our ability to cluster\u001b[39;00m\n\u001b[32m 1434\u001b[39m \u001b[38;5;66;03m# the table by the index column(s).\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1435\u001b[39m query_job = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_start_query_with_job\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1436\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m query_job.destination, query_job\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/loader.py:1492\u001b[39m, in \u001b[36mGbqDataLoader._start_query_with_job\u001b[39m\u001b[34m(self, sql, job_config, timeout)\u001b[39m\n\u001b[32m 1486\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1487\u001b[39m \u001b[33;03mStarts BigQuery query job and waits for results.\u001b[39;00m\n\u001b[32m 1488\u001b[39m \n\u001b[32m 1489\u001b[39m \u001b[33;03mDo not execute dataframe through this API, instead use the executor.\u001b[39;00m\n\u001b[32m 1490\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1491\u001b[39m job_config = \u001b[38;5;28mself\u001b[39m._prepare_job_config(job_config)\n\u001b[32m-> \u001b[39m\u001b[32m1492\u001b[39m _, query_job = \u001b[43mbf_io_bigquery\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstart_query_with_client\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1493\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_bqclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1494\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1495\u001b[39m \u001b[43m \u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjob_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1496\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1497\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1498\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1499\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1500\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_with_job\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1501\u001b[39m \u001b[43m \u001b[49m\u001b[43mpublisher\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_publisher\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1502\u001b[39m \u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_session\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1503\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1504\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m query_job\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/_io/bigquery/__init__.py:395\u001b[39m, in \u001b[36mstart_query_with_client\u001b[39m\u001b[34m(bq_client, sql, job_config, location, project, timeout, metrics, query_with_job, job_retry, publisher, session)\u001b[39m\n\u001b[32m 385\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 386\u001b[39m publisher.publish(\n\u001b[32m 387\u001b[39m bigframes.core.events.BigQuerySentEvent(\n\u001b[32m 388\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 393\u001b[39m )\n\u001b[32m 394\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m395\u001b[39m results_iterator = \u001b[43mquery_job\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m query_job.configuration.dry_run:\n\u001b[32m 397\u001b[39m publisher.publish(\n\u001b[32m 398\u001b[39m bigframes.core.events.BigQueryFinishedEvent(\n\u001b[32m 399\u001b[39m billing_project=query_job.project,\n\u001b[32m (...)\u001b[39m\u001b[32m 409\u001b[39m )\n\u001b[32m 410\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1797\u001b[39m, in \u001b[36mQueryJob.result\u001b[39m\u001b[34m(self, page_size, max_results, retry, timeout, start_index, job_retry)\u001b[39m\n\u001b[32m 1792\u001b[39m remaining_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1794\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remaining_timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1795\u001b[39m \u001b[38;5;66;03m# Since is_job_done() calls jobs.getQueryResults, which is a\u001b[39;00m\n\u001b[32m 1796\u001b[39m \u001b[38;5;66;03m# long-running API, don't delay the next request at all.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1797\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mis_job_done\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m 1798\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m 1799\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1800\u001b[39m \u001b[38;5;66;03m# Use a monotonic clock since we don't actually care about\u001b[39;00m\n\u001b[32m 1801\u001b[39m \u001b[38;5;66;03m# daylight savings or similar, just the elapsed time.\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:156\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 152\u001b[39m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[32m 153\u001b[39m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[32m 154\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 155\u001b[39m \u001b[38;5;66;03m# defer to shared logic for handling errors\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m156\u001b[39m next_sleep = \u001b[43m_retry_error_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 157\u001b[39m \u001b[43m \u001b[49m\u001b[43mexc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 158\u001b[39m \u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[43m \u001b[49m\u001b[43merror_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 161\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 162\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 163\u001b[39m \u001b[43m \u001b[49m\u001b[43mexception_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 164\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 165\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 166\u001b[39m \u001b[38;5;66;03m# if exception not raised, sleep before next attempt\u001b[39;00m\n\u001b[32m 167\u001b[39m time.sleep(next_sleep)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_base.py:216\u001b[39m, in \u001b[36m_retry_error_helper\u001b[39m\u001b[34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m predicate_fn(exc):\n\u001b[32m 211\u001b[39m final_exc, source_exc = exc_factory_fn(\n\u001b[32m 212\u001b[39m error_list,\n\u001b[32m 213\u001b[39m RetryFailureReason.NON_RETRYABLE_ERROR,\n\u001b[32m 214\u001b[39m original_timeout,\n\u001b[32m 215\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m216\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m final_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msource_exc\u001b[39;00m\n\u001b[32m 217\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m on_error_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 218\u001b[39m on_error_fn(exc)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1766\u001b[39m, in \u001b[36mQueryJob.result..is_job_done\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 1760\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 1762\u001b[39m \u001b[38;5;66;03m# Call jobs.getQueryResults with max results set to 0 just to\u001b[39;00m\n\u001b[32m 1763\u001b[39m \u001b[38;5;66;03m# wait for the query to finish. Unlike most methods,\u001b[39;00m\n\u001b[32m 1764\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults hangs as long as it can to ensure we\u001b[39;00m\n\u001b[32m 1765\u001b[39m \u001b[38;5;66;03m# know when the query has finished as soon as possible.\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1766\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_reload_query_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mreload_query_results_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1768\u001b[39m \u001b[38;5;66;03m# Even if the query is finished now according to\u001b[39;00m\n\u001b[32m 1769\u001b[39m \u001b[38;5;66;03m# jobs.getQueryResults, we'll want to reload the job status if\u001b[39;00m\n\u001b[32m 1770\u001b[39m \u001b[38;5;66;03m# it's not already DONE.\u001b[39;00m\n\u001b[32m 1771\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/job/query.py:1558\u001b[39m, in \u001b[36mQueryJob._reload_query_results\u001b[39m\u001b[34m(self, retry, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 1555\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(transport_timeout, (\u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mint\u001b[39m)):\n\u001b[32m 1556\u001b[39m transport_timeout = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1558\u001b[39m \u001b[38;5;28mself\u001b[39m._query_results = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_get_query_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1559\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mjob_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1560\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1561\u001b[39m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mproject\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1562\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout_ms\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1563\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1564\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtransport_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1565\u001b[39m \u001b[43m \u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpage_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1566\u001b[39m \u001b[43m \u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstart_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1567\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:2113\u001b[39m, in \u001b[36mClient._get_query_results\u001b[39m\u001b[34m(self, job_id, retry, project, timeout_ms, location, timeout, page_size, start_index)\u001b[39m\n\u001b[32m 2109\u001b[39m \u001b[38;5;66;03m# This call is typically made in a polling loop that checks whether the\u001b[39;00m\n\u001b[32m 2110\u001b[39m \u001b[38;5;66;03m# job is complete (from QueryJob.done(), called ultimately from\u001b[39;00m\n\u001b[32m 2111\u001b[39m \u001b[38;5;66;03m# QueryJob.result()). So we don't need to poll here.\u001b[39;00m\n\u001b[32m 2112\u001b[39m span_attributes = {\u001b[33m\"\u001b[39m\u001b[33mpath\u001b[39m\u001b[33m\"\u001b[39m: path}\n\u001b[32m-> \u001b[39m\u001b[32m2113\u001b[39m resource = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_api\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2114\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2115\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mBigQuery.getQueryResults\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2116\u001b[39m \u001b[43m \u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m=\u001b[49m\u001b[43mspan_attributes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2117\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mGET\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 2118\u001b[39m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2119\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2120\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2121\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2122\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _QueryResults.from_api_repr(resource)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/bigquery/client.py:861\u001b[39m, in \u001b[36mClient._call_api\u001b[39m\u001b[34m(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)\u001b[39m\n\u001b[32m 857\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m span_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 858\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m create_span(\n\u001b[32m 859\u001b[39m name=span_name, attributes=span_attributes, client=\u001b[38;5;28mself\u001b[39m, job_ref=job_ref\n\u001b[32m 860\u001b[39m ):\n\u001b[32m--> \u001b[39m\u001b[32m861\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 863\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m call()\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[39m, in \u001b[36mRetry.__call__..retry_wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 290\u001b[39m target = functools.partial(func, *args, **kwargs)\n\u001b[32m 291\u001b[39m sleep_generator = exponential_sleep_generator(\n\u001b[32m 292\u001b[39m \u001b[38;5;28mself\u001b[39m._initial, \u001b[38;5;28mself\u001b[39m._maximum, multiplier=\u001b[38;5;28mself\u001b[39m._multiplier\n\u001b[32m 293\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 297\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 298\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 299\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m=\u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 300\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:156\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 152\u001b[39m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[32m 153\u001b[39m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[32m 154\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 155\u001b[39m \u001b[38;5;66;03m# defer to shared logic for handling errors\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m156\u001b[39m next_sleep = \u001b[43m_retry_error_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 157\u001b[39m \u001b[43m \u001b[49m\u001b[43mexc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 158\u001b[39m \u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 159\u001b[39m \u001b[43m \u001b[49m\u001b[43msleep_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[43m \u001b[49m\u001b[43merror_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 161\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 162\u001b[39m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 163\u001b[39m \u001b[43m \u001b[49m\u001b[43mexception_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 164\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 165\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 166\u001b[39m \u001b[38;5;66;03m# if exception not raised, sleep before next attempt\u001b[39;00m\n\u001b[32m 167\u001b[39m time.sleep(next_sleep)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_base.py:216\u001b[39m, in \u001b[36m_retry_error_helper\u001b[39m\u001b[34m(exc, deadline, sleep_iterator, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m predicate_fn(exc):\n\u001b[32m 211\u001b[39m final_exc, source_exc = exc_factory_fn(\n\u001b[32m 212\u001b[39m error_list,\n\u001b[32m 213\u001b[39m RetryFailureReason.NON_RETRYABLE_ERROR,\n\u001b[32m 214\u001b[39m original_timeout,\n\u001b[32m 215\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m216\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m final_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msource_exc\u001b[39;00m\n\u001b[32m 217\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m on_error_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 218\u001b[39m on_error_fn(exc)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[39m, in \u001b[36mretry_target\u001b[39m\u001b[34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[39m\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 146\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m147\u001b[39m result = \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m inspect.isawaitable(result):\n\u001b[32m 149\u001b[39m warnings.warn(_ASYNC_RETRY_WARNING)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages/google/cloud/_http/__init__.py:494\u001b[39m, in \u001b[36mJSONConnection.api_request\u001b[39m\u001b[34m(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)\u001b[39m\n\u001b[32m 482\u001b[39m response = \u001b[38;5;28mself\u001b[39m._make_request(\n\u001b[32m 483\u001b[39m method=method,\n\u001b[32m 484\u001b[39m url=url,\n\u001b[32m (...)\u001b[39m\u001b[32m 490\u001b[39m extra_api_info=extra_api_info,\n\u001b[32m 491\u001b[39m )\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[32m200\u001b[39m <= response.status_code < \u001b[32m300\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m494\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m exceptions.from_http_response(response)\n\u001b[32m 496\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m expect_json \u001b[38;5;129;01mand\u001b[39;00m response.content:\n\u001b[32m 497\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response.json()\n", - "\u001b[31mBadRequest\u001b[39m: 400 GET https://bigquery.googleapis.com/bigquery/v2/projects/bigframes-dev/queries/5e86e836-354e-439f-8300-f6186ff77b79?maxResults=0&location=US&prettyPrint=false: Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named 'cv2'. Traceback (most recent call last):\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\n result = [\n ^\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \n func(*row)\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 43, in bigframes_handler\n return image_blur(*args)\n ^^^^^^^^^^^^^^^^^\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 5, in image_blur\n import cv2 as cv\nModuleNotFoundError: No module named 'cv2'\n\n\nLocation: US\nJob ID: 5e86e836-354e-439f-8300-f6186ff77b79\n [{'@type': 'type.googleapis.com/google.rpc.DebugInfo', 'detail': '[INVALID_INPUT] message=QUERY_ERROR: [Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\'cv2\\'. Traceback (most recent call last):\\n File \"/srv/grpc/bq_udf_server.py\", line 119, in call_udf\\n result = [\\n ^\\n File \"/srv/grpc/bq_udf_server.py\", line 120, in \\n func(*row)\\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 43, in bigframes_handler\\n return image_blur(*args)\\n ^^^^^^^^^^^^^^^^^\\n File \"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\", line 5, in image_blur\\n import cv2 as cv\\nModuleNotFoundError: No module named \\'cv2\\'\\n] debug=code: \\t BAD_QUERY\\ndescription: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\ncause: USER_ERROR\\naddress: \"http://jfbxw15.prod.google.com:4901/task?handle=logs.21974.serving.shard-hals.cloud-dataengine.14302809449245 Partition description: __SHUFFLE1/256 ShuffleByTableDef \\\\\\'__SHUFFLE0\\\\\\' shuffle \\\\t { spec { num_sources: 1 num_sinks: 500 } partitioning { rand { num_partitions: 500 } } config { group_id: 173353402696091398 shuffle_id: 0 precreated_mfs_path: \\\\\"/mfs/jf/home/cloud-dataengine/serving-shuffle/ttl=3d/a7f7e000544e4706_shuffle_cache/02\\\\\" } } sink_id: 256\"\\nstatus_proto {\\n code: 11\\n space: \"generic\"\\n message: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\n}\\nerror_details {\\n runtime_error {\\n query_error {\\n }\\n }\\n debug_info {\\n error_message_template: \"Encounter an error when invoking the function: $0\"\\n error_id: 3276645920\\n }\\n}\\n errorProto=code: \"QUERY_ERROR\"\\nargument: \"Encounter an error when invoking the function: Flight error: Encounter an error when executing the function No module named \\\\\\'cv2\\\\\\'. Traceback (most recent call last):\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 119, in call_udf\\\\n result = [\\\\n ^\\\\n File \\\\\"/srv/grpc/bq_udf_server.py\\\\\", line 120, in \\\\n func(*row)\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 43, in bigframes_handler\\\\n return image_blur(*args)\\\\n ^^^^^^^^^^^^^^^^^\\\\n File \\\\\"/udf_modules/image_blur_69f29874_0000_2179_be2f_14223bc42286.py\\\\\", line 5, in image_blur\\\\n import cv2 as cv\\\\nModuleNotFoundError: No module named \\\\\\'cv2\\\\\\'\\\\n\"\\nlocation_type: OTHER\\nlocation: \"query\"\\n\\n\\tat com.google.cloud.helix.common.Exceptions.fromProto(Exceptions.java:1983)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl.mapDremelErrorsTohelixException(QueryExecutorImpl.java:1235)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:810)\\n\\tat com.google.cloud.helix.common.dremel.QueryExecutorImpl$ConfiguredQueryMigration$StreamHandler.onMessage(QueryExecutorImpl.java:736)\\n\\tat com.google.net.rpc3.stream.RpcMessageCallback$ForwardingRpcMessageCallback.onMessage(RpcMessageCallback.java:128)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.processMessageUnlocked(RpcStreamInternalContext.java:1861)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksInternalUnlocked(RpcStreamInternalContext.java:2916)\\n\\tat com.google.net.rpc3.impl.RpcStreamInternalContext.invokeCallbacksUnlocked(RpcStreamInternalContext.java:2842)\\n\\tat com.google.net.eventmanager.AbstractFutureTask$Sync.innerRun(AbstractFutureTask.java:259)\\n\\tat com.google.net.eventmanager.AbstractFutureTask.run(AbstractFutureTask.java:120)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTaskTraced(EventManagerImpl.java:903)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runTask(EventManagerImpl.java:895)\\n\\tat com.google.net.eventmanager.EventManagerImpl.internalRunWorkerLoop(EventManagerImpl.java:1322)\\n\\tat com.google.net.eventmanager.EventManagerImpl.runWorkerLoop(EventManagerImpl.java:1213)\\n\\tat com.google.net.eventmanager.WorkerThreadInfo.runWorkerLoop(WorkerThreadInfo.java:153)\\n\\tat com.google.net.eventmanager.EventManagerImpl$WorkerThread.run(EventManagerImpl.java:2006)\\n'}]" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.job_7iYLeJKq2dEkHXZjF99wCN-xOm57 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ] + "execution_count": null, + "outputs": [] }, { "id": "68857305", From 1c2c4002f06ee17e44987089193ee7a634a4d7b8 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 20:52:37 +0000 Subject: [PATCH 31/39] fix: bypass ObjectRef validation and enable notebook photo rendering --- .../multimodal/multimodal_dataframe.ipynb | 205 +++++++++++------- 1 file changed, 132 insertions(+), 73 deletions(-) diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 74d9c36f6310..d63ffe9e8e97 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -19,8 +19,7 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "816ab253", @@ -28,26 +27,26 @@ "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
\n" + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e\n" ], "metadata": { "id": "YOrUAvz6DMw-" @@ -85,7 +84,7 @@ "id": "750954c4", "cell_type": "markdown", "source": [ - "Install the latest bigframes package if bigframes version < 2.4.0" + "Install the latest bigframes package if bigframes version \u003c 2.4.0" ], "metadata": {}, "execution_count": null @@ -97,8 +96,7 @@ "# !pip install bigframes --upgrade" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "df561d04", @@ -138,8 +136,7 @@ "id": "bGyhLnfEeB0X", "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "35bd6e6e", @@ -182,11 +179,80 @@ " return bbq.json_value(get_metadata(series), \"$.size\").astype(\"Int64\")\n", "\n", "def get_updated(series):\n", - " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" + " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)\n", + "\n", + "from IPython.display import HTML, display\n", + "\n", + "def render_images(df):\n", + " \"\"\"Helper to display BigFrames DataFrame with rendered image previews.\"\"\"\n", + " import bigframes.pandas as bpd\n", + " import bigframes.bigquery as bbq\n", + " import bigframes\n", + " from bigframes import dtypes\n", + " import json\n", + " \n", + " if isinstance(df, bpd.Series):\n", + " df = df.to_frame()\n", + " \n", + " # 1. Auto-detect columns holding ObjectRefs\n", + " object_cols = [\n", + " col for col, dtype in zip(df.columns, df.dtypes)\n", + " if dtype == dtypes.OBJ_REF_DTYPE\n", + " ]\n", + " \n", + " if not object_cols:\n", + " display(df)\n", + " return\n", + "\n", + " # Safe restriction mirroring notebook max_rows\n", + " limit = bigframes.options.display.max_rows or 10\n", + " view_df = df.head(limit)\n", + " \n", + " # 2. Bulk-fetch access runtime URLs + full metadata for inspection in a single batch query\n", + " runtime_cols = {\n", + " col: get_runtime_json_str(view_df[col], mode=\"R\", with_metadata=True) \n", + " for col in object_cols\n", + " }\n", + " \n", + " # Materialize small string payloads needed for local rendering\n", + " pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()\n", + " final_pd = view_df.to_pandas()\n", + " \n", + " width = bigframes.options.display.blob_display_width or 300\n", + " \n", + " def format_cell_html(raw_json):\n", + " if not raw_json:\n", + " return \"\"\n", + " try:\n", + " obj_rt = json.loads(raw_json)\n", + " \n", + " if \"access_urls\" not in obj_rt:\n", + " err = obj_rt.get(\"errors\", [{\"message\": \"Invalid Reference\"}])[0].get(\"message\")\n", + " return f'\u003cspan style=\"color:red;\"\u003eError: {err}\u003c/span\u003e'\n", + " \n", + " details = obj_rt.get(\"objectref\", {}).get(\"details\", {})\n", + " if isinstance(details, str): \n", + " details = json.loads(details) # Handles Heterogeneous stringification edge case\n", + " \n", + " c_type = details.get(\"gcs_metadata\", {}).get(\"content_type\", \"\")\n", + " url = obj_rt[\"access_urls\"][\"read_url\"]\n", + " \n", + " if c_type and str(c_type).startswith(\"image\"):\n", + " return f'\u003cimg src=\"{url}\" width=\"{width}\"\u003e'\n", + " \n", + " return f'\u003ca href=\"{url}\" target=\"_blank\"\u003e{obj_rt.get(\"objectref\", {}).get(\"uri\", \"link\")}\u003c/a\u003e'\n", + " except:\n", + " return \"Format Error\"\n", + "\n", + " # 3. Apply formatter locally in pandas memory\n", + " for col in object_cols:\n", + " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", + " \n", + " # 4. Display via unescaped HTML pipeline\n", + " display(HTML(final_pd.to_html(escape=False)))" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "be9ce892", @@ -230,8 +296,7 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "2e0436b0", @@ -239,7 +304,7 @@ "source": [ "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", "df_image = df_image.head(5)\n", - "df_image" + "render_images(df_image)" ], "metadata": { "colab": { @@ -249,8 +314,7 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "429b0117", @@ -284,13 +348,12 @@ "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", "df_image[\"size\"] = get_size(df_image[\"image\"])\n", "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "df_image" + "render_images(df_image)" ], "metadata": { "id": "YYYVn7NDH0Me" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "f90826f6", @@ -325,7 +388,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python-headless\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -333,6 +396,8 @@ " import base64\n", "\n", " src_obj = json.loads(src_rt)\n", + " if \"access_urls\" not in src_obj:\n", + " raise ValueError(f\"Missing 'access_urls' in source object. Response: {src_obj}\")\n", " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", " \n", " response = requests.get(src_url, timeout=30)\n", @@ -352,6 +417,8 @@ " # Handle two output modes\n", " if dst_rt: # GCS/Series output mode\n", " dst_obj = json.loads(dst_rt)\n", + " if \"access_urls\" not in dst_obj:\n", + " raise ValueError(f\"Missing 'access_urls' in destination object. Verify authorizer permissions. Response: {dst_obj}\")\n", " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", " \n", " requests.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", @@ -370,7 +437,13 @@ " metadata = bbq.obj.fetch_metadata(series)\n", " current_uri = metadata.struct.field(\"uri\")\n", " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", - " dst_blob = bbq.obj.make_ref(dst_uri, authorizer=FULL_CONNECTION_ID)\n", + " \n", + " # To avoid synchronous 404 validation checks on files that don't exist yet, \n", + " # bypass the validator by explicitly constructing an objectref JSON.\n", + " dst_blob_df = bpd.DataFrame({\"uri\": dst_uri})\n", + " dst_blob_df[\"authorizer\"] = FULL_CONNECTION_ID\n", + " dst_blob = bbq.obj.make_ref(bbq.to_json(bbq.struct(dst_blob_df)))\n", + "\n", " df_transform = bpd.DataFrame({\n", " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", @@ -385,7 +458,7 @@ " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n", " image_blur, 20, 20\n", ")\n", - "df_image[[\"image\", \"blurred\"]]" + "render_images(df_image[[\"image\", \"blurred\"]])" ], "metadata": { "colab": { @@ -395,8 +468,7 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "11fcc6ec", @@ -419,16 +491,15 @@ "metadata": { "id": "mRUGfcaFVW-3" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "13d7cb93", "cell_type": "code", "source": [ "# Ask the same question on the images\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", - "answer[[\"ml_generate_text_llm_result\", \"image\"]]" + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", \"what color is the picture?\"])\n", + "render_images(answer[[\"ml_generate_text_llm_result\", \"image\"]])" ], "metadata": { "colab": { @@ -438,8 +509,7 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "68857305", @@ -457,15 +527,14 @@ "metadata": { "id": "IG3J3HsKhyBY" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "829afc69", "cell_type": "code", "source": [ "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" + "render_images(answer_alt[[\"ml_generate_text_llm_result\", \"image\"]])" ], "metadata": { "colab": { @@ -475,8 +544,7 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "e75df430", @@ -495,8 +563,7 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "23892b0e", @@ -526,7 +593,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -> str:\n", + "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -553,7 +620,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -571,7 +638,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) >= chunk_size:\n", + " while len(curr_chunk) \u003e= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -584,8 +651,7 @@ " return all_text_chunks" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "234a5f86", @@ -620,8 +686,7 @@ "df_pdf[[\"extracted_text\", \"chunked\"]]" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "d80effbe", @@ -632,8 +697,7 @@ "chunked" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "118cf1c7", @@ -673,8 +737,7 @@ "df = df[['audio']]" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "c9f9d484", @@ -706,8 +769,7 @@ "transcribed_series" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "7209a62a", @@ -728,8 +790,7 @@ "transcribed_series_verbose" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "c8351cc3", @@ -767,7 +828,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -> str:\n", + "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -786,8 +847,7 @@ " return json.dumps(exif_dict)" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "40bb6bc9", @@ -824,8 +884,7 @@ "exif_data" ], "metadata": {}, - "execution_count": null, - "outputs": [] + "execution_count": null } ], "metadata": { From 4497308da2bf47fb157801c43307841c083fd68d Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 20:56:35 +0000 Subject: [PATCH 32/39] fix: resolve blurred image rendering race condition --- .../multimodal/multimodal_dataframe.ipynb | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index d63ffe9e8e97..7ccdc6c790e3 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -204,21 +204,21 @@ " display(df)\n", " return\n", "\n", - " # Safe restriction mirroring notebook max_rows\n", " limit = bigframes.options.display.max_rows or 10\n", " view_df = df.head(limit)\n", " \n", - " # 2. Bulk-fetch access runtime URLs + full metadata for inspection in a single batch query\n", + " # 2. Bulk-fetch access runtime URLs ONLY (disable with_metadata to bypass potential \n", + " # race conditions on new files where BigQuery may error before async writes finalize)\n", " runtime_cols = {\n", - " col: get_runtime_json_str(view_df[col], mode=\"R\", with_metadata=True) \n", + " col: get_runtime_json_str(view_df[col], mode=\"R\", with_metadata=False) \n", " for col in object_cols\n", " }\n", " \n", - " # Materialize small string payloads needed for local rendering\n", " pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()\n", " final_pd = view_df.to_pandas()\n", " \n", " width = bigframes.options.display.blob_display_width or 300\n", + " IMAGE_EXTENSIONS = (\".png\", \".jpg\", \".jpeg\", \".gif\", \".webp\")\n", " \n", " def format_cell_html(raw_json):\n", " if not raw_json:\n", @@ -227,28 +227,23 @@ " obj_rt = json.loads(raw_json)\n", " \n", " if \"access_urls\" not in obj_rt:\n", - " err = obj_rt.get(\"errors\", [{\"message\": \"Invalid Reference\"}])[0].get(\"message\")\n", + " err = obj_rt.get(\"errors\", [{\"message\": \"URL Generation Failed\"}])[0].get(\"message\")\n", " return f'\u003cspan style=\"color:red;\"\u003eError: {err}\u003c/span\u003e'\n", " \n", - " details = obj_rt.get(\"objectref\", {}).get(\"details\", {})\n", - " if isinstance(details, str): \n", - " details = json.loads(details) # Handles Heterogeneous stringification edge case\n", - " \n", - " c_type = details.get(\"gcs_metadata\", {}).get(\"content_type\", \"\")\n", + " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", " url = obj_rt[\"access_urls\"][\"read_url\"]\n", " \n", - " if c_type and str(c_type).startswith(\"image\"):\n", + " # Safely infer type from extension to guarantee immediate display availability\n", + " if uri and str(uri).lower().endswith(IMAGE_EXTENSIONS):\n", " return f'\u003cimg src=\"{url}\" width=\"{width}\"\u003e'\n", " \n", - " return f'\u003ca href=\"{url}\" target=\"_blank\"\u003e{obj_rt.get(\"objectref\", {}).get(\"uri\", \"link\")}\u003c/a\u003e'\n", + " return f'\u003ca href=\"{url}\" target=\"_blank\"\u003e{uri if uri else \"view\"}\u003c/a\u003e'\n", " except:\n", " return \"Format Error\"\n", "\n", - " # 3. Apply formatter locally in pandas memory\n", " for col in object_cols:\n", " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", " \n", - " # 4. Display via unescaped HTML pipeline\n", " display(HTML(final_pd.to_html(escape=False)))" ], "metadata": {}, @@ -451,7 +446,15 @@ " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n", " udf, axis=1, args=args\n", " )\n", - " return res if verbose else bbq.obj.make_ref(res, authorizer=FULL_CONNECTION_ID)\n", + " \n", + " if verbose:\n", + " return res\n", + " \n", + " # Final return MUST also use JSON bypass to eliminate temporary 404 validation \n", + " # errors from embedded ObjectRefs during fused query execution pipelines.\n", + " res_df = bpd.DataFrame({\"uri\": res})\n", + " res_df[\"authorizer\"] = FULL_CONNECTION_ID\n", + " return bbq.obj.make_ref(bbq.to_json(bbq.struct(res_df)))\n", "\n", "# Apply transformations\n", "df_image[\"blurred\"] = apply_transformation(\n", From f108a4cf7a3cb9a36ccce47b95649e629f2e0ffb Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 21:17:56 +0000 Subject: [PATCH 33/39] style: clean up notebook metadata and formatting --- .../multimodal/multimodal_dataframe.ipynb | 627 ++++++++++++------ 1 file changed, 412 insertions(+), 215 deletions(-) diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 7ccdc6c790e3..cd363db6f362 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -1,8 +1,11 @@ { "cells": [ { - "id": "9edad7a6", "cell_type": "code", + "execution_count": null, + "id": "9edad7a6", + "metadata": {}, + "outputs": [], "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -17,45 +20,43 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "816ab253", "cell_type": "markdown", + "id": "816ab253", + "metadata": { + "id": "YOrUAvz6DMw-" + }, "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e\n" - ], - "metadata": { - "id": "YOrUAvz6DMw-" - }, - "execution_count": null + " \n", + "
\n" + ] }, { - "id": "77d821d4", "cell_type": "markdown", + "id": "77d821d4", + "metadata": {}, "source": [ "This notebook is introducing BigFrames Multimodal features:\n", "1. Create Multimodal DataFrame\n", @@ -65,42 +66,48 @@ "5. PDF chunking function\n", "6. Transcribe audio\n", "7. Extract EXIF metadata from images" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "75ab1c13", "cell_type": "markdown", - "source": [ - "## Setup" - ], + "id": "75ab1c13", "metadata": { "id": "PEAJQQ6AFg-n" }, - "execution_count": null + "source": [ + "## Setup" + ] }, { - "id": "750954c4", "cell_type": "markdown", - "source": [ - "Install the latest bigframes package if bigframes version \u003c 2.4.0" - ], + "id": "750954c4", "metadata": {}, - "execution_count": null + "source": [ + "Install the latest bigframes package if bigframes version < 2.4.0" + ] }, { - "id": "2a6fafb1", "cell_type": "code", + "execution_count": null, + "id": "2a6fafb1", + "metadata": {}, + "outputs": [], "source": [ "# !pip install bigframes --upgrade" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "df561d04", "cell_type": "code", + "execution_count": null, + "id": "df561d04", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bGyhLnfEeB0X", + "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" + }, + "outputs": [], "source": [ "PROJECT = \"bigframes-dev\" # replace with your project. \n", "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", @@ -128,19 +135,14 @@ "\n", "import bigframes.pandas as bpd\n", "import bigframes.bigquery as bbq" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bGyhLnfEeB0X", - "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" - }, - "execution_count": null + ] }, { - "id": "35bd6e6e", "cell_type": "code", + "execution_count": 35, + "id": "35bd6e6e", + "metadata": {}, + "outputs": [], "source": [ "import bigframes.bigquery as bbq\n", "\n", @@ -228,16 +230,16 @@ " \n", " if \"access_urls\" not in obj_rt:\n", " err = obj_rt.get(\"errors\", [{\"message\": \"URL Generation Failed\"}])[0].get(\"message\")\n", - " return f'\u003cspan style=\"color:red;\"\u003eError: {err}\u003c/span\u003e'\n", + " return f'Error: {err}'\n", " \n", " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", " url = obj_rt[\"access_urls\"][\"read_url\"]\n", " \n", " # Safely infer type from extension to guarantee immediate display availability\n", " if uri and str(uri).lower().endswith(IMAGE_EXTENSIONS):\n", - " return f'\u003cimg src=\"{url}\" width=\"{width}\"\u003e'\n", + " return f''\n", " \n", - " return f'\u003ca href=\"{url}\" target=\"_blank\"\u003e{uri if uri else \"view\"}\u003c/a\u003e'\n", + " return f'{uri if uri else \"view\"}'\n", " except:\n", " return \"Format Error\"\n", "\n", @@ -245,26 +247,32 @@ " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", " \n", " display(HTML(final_pd.to_html(escape=False)))" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "be9ce892", "cell_type": "markdown", + "id": "be9ce892", + "metadata": { + "id": "ifKOq7VZGtZy" + }, "source": [ "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", "\n", "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference." - ], - "metadata": { - "id": "ifKOq7VZGtZy" - }, - "execution_count": null + ] }, { - "id": "871d02f4", "cell_type": "code", + "execution_count": 36, + "id": "871d02f4", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fx6YcZJbeYru", + "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" + }, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -283,24 +291,12 @@ "# Create the object reference column\n", "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", "df_image = df_image[['image']]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fx6YcZJbeYru", - "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" - }, - "execution_count": null + ] }, { - "id": "2e0436b0", "cell_type": "code", - "source": [ - "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", - "df_image = df_image.head(5)\n", - "render_images(df_image)" - ], + "execution_count": 37, + "id": "2e0436b0", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -309,33 +305,169 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, - "execution_count": null + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
0
1
2
3
4
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "render_images(df_image)" + ] }, { - "id": "429b0117", "cell_type": "markdown", - "source": [ - "### 2. Combine unstructured data with structured data" - ], + "id": "429b0117", "metadata": { "id": "b6RRZb3qPi_T" }, - "execution_count": null + "source": [ + "### 2. Combine unstructured data with structured data" + ] }, { - "id": "991fa065", "cell_type": "markdown", - "source": [ - "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." - ], + "id": "991fa065", "metadata": { "id": "4YJCdmLtR-qu" }, - "execution_count": null + "source": [ + "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." + ] }, { - "id": "08722ec5", "cell_type": "code", + "execution_count": 38, + "id": "08722ec5", + "metadata": { + "id": "YYYVn7NDH0Me" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png7157662025-03-20 17:44:38+00:00
1bobimage/png11674062025-03-20 17:44:38+00:00
2bobimage/png11508922025-03-20 17:44:39+00:00
3aliceimage/png17365332025-03-20 17:44:39+00:00
4bobimage/png4397402025-03-20 17:44:39+00:00
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Combine unstructured data with structured data\n", "df_image = df_image.head(5)\n", @@ -344,33 +476,99 @@ "df_image[\"size\"] = get_size(df_image[\"image\"])\n", "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", "render_images(df_image)" - ], - "metadata": { - "id": "YYYVn7NDH0Me" - }, - "execution_count": null + ] }, { - "id": "f90826f6", "cell_type": "markdown", + "id": "f90826f6", + "metadata": {}, "source": [ "### 3. Conduct image transformations" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "e24c9f8c", "cell_type": "markdown", + "id": "e24c9f8c", + "metadata": {}, "source": [ "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "db665049", "cell_type": "code", + "execution_count": 39, + "id": "db665049", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "HhCb8jRsLe9B", + "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/pandas/__init__.py:211: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:4695: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageblurred
0
1
2
3
4
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -383,7 +581,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python-headless\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -462,48 +660,35 @@ " image_blur, 20, 20\n", ")\n", "render_images(df_image[[\"image\", \"blurred\"]])" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "HhCb8jRsLe9B", - "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" - }, - "execution_count": null + ] }, { - "id": "11fcc6ec", "cell_type": "markdown", - "source": [ - "### 4. Use LLM models to ask questions and generate embeddings on images" - ], + "id": "11fcc6ec", "metadata": { "id": "Euk5saeVVdTP" }, - "execution_count": null + "source": [ + "### 4. Use LLM models to ask questions and generate embeddings on images" + ] }, { - "id": "793b2f45", "cell_type": "code", - "source": [ - "from bigframes.ml import llm\n", - "gemini = llm.GeminiTextGenerator()" - ], + "execution_count": null, + "id": "793b2f45", "metadata": { "id": "mRUGfcaFVW-3" }, - "execution_count": null + "outputs": [], + "source": [ + "from bigframes.ml import llm\n", + "gemini = llm.GeminiTextGenerator()" + ] }, { - "id": "13d7cb93", "cell_type": "code", - "source": [ - "# Ask the same question on the images\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", \"what color is the picture?\"])\n", - "render_images(answer[[\"ml_generate_text_llm_result\", \"image\"]])" - ], + "execution_count": null, + "id": "13d7cb93", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -512,11 +697,21 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, - "execution_count": null + "outputs": [], + "source": [ + "# Ask the same question on the images\n", + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", \"what color is the picture?\"])\n", + "render_images(answer[[\"ml_generate_text_llm_result\", \"image\"]])" + ] }, { - "id": "68857305", "cell_type": "code", + "execution_count": null, + "id": "68857305", + "metadata": { + "id": "IG3J3HsKhyBY" + }, + "outputs": [], "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\n", @@ -526,19 +721,12 @@ " \"is it for pets?\",\n", " \"what is the weight of the product?\",\n", "]" - ], - "metadata": { - "id": "IG3J3HsKhyBY" - }, - "execution_count": null + ] }, { - "id": "829afc69", "cell_type": "code", - "source": [ - "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "render_images(answer_alt[[\"ml_generate_text_llm_result\", \"image\"]])" - ], + "execution_count": null, + "id": "829afc69", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -547,17 +735,16 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, - "execution_count": null + "outputs": [], + "source": [ + "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", + "render_images(answer_alt[[\"ml_generate_text_llm_result\", \"image\"]])" + ] }, { - "id": "e75df430", "cell_type": "code", - "source": [ - "# Generate embeddings.\n", - "embed_model = llm.MultimodalEmbeddingGenerator()\n", - "embeddings = embed_model.predict(df_image[\"image\"])\n", - "embeddings" - ], + "execution_count": null, + "id": "e75df430", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -566,24 +753,32 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "execution_count": null + "outputs": [], + "source": [ + "# Generate embeddings.\n", + "embed_model = llm.MultimodalEmbeddingGenerator()\n", + "embeddings = embed_model.predict(df_image[\"image\"])\n", + "embeddings" + ] }, { - "id": "23892b0e", "cell_type": "markdown", + "id": "23892b0e", + "metadata": { + "id": "iRUi8AjG7cIf" + }, "source": [ "### 5. PDF extraction and chunking function\n", "\n", "This section demonstrates how to extract text and chunk text from PDF files using custom BigQuery Python UDFs and the `pypdf` library." - ], - "metadata": { - "id": "iRUi8AjG7cIf" - }, - "execution_count": null + ] }, { - "id": "136a18b8", "cell_type": "code", + "execution_count": null, + "id": "136a18b8", + "metadata": {}, + "outputs": [], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -596,7 +791,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", + "def pdf_extract(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -623,7 +818,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -641,7 +836,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) \u003e= chunk_size:\n", + " while len(curr_chunk) >= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -652,13 +847,14 @@ " if curr_chunk:\n", " all_text_chunks.append(curr_chunk)\n", " return all_text_chunks" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "234a5f86", "cell_type": "code", + "execution_count": null, + "id": "234a5f86", + "metadata": {}, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -687,33 +883,34 @@ "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", "\n", "df_pdf[[\"extracted_text\", \"chunked\"]]" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "d80effbe", "cell_type": "code", + "execution_count": null, + "id": "d80effbe", + "metadata": {}, + "outputs": [], "source": [ "# Explode the chunks to see each chunk as a separate row\n", "chunked = df_pdf[\"chunked\"].explode()\n", "chunked" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "118cf1c7", "cell_type": "markdown", + "id": "118cf1c7", + "metadata": {}, "source": [ "### 6. Audio transcribe" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "1794c54f", "cell_type": "code", + "execution_count": null, + "id": "1794c54f", + "metadata": {}, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -738,13 +935,14 @@ "# Create the object reference column\n", "df['audio'] = bbq.obj.make_ref(df['uri'], authorizer=FULL_CONNECTION_ID)\n", "df = df[['audio']]" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "c9f9d484", "cell_type": "code", + "execution_count": null, + "id": "c9f9d484", + "metadata": {}, + "outputs": [], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -770,13 +968,14 @@ "\n", "transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n", "transcribed_series" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "7209a62a", "cell_type": "code", + "execution_count": null, + "id": "7209a62a", + "metadata": {}, + "outputs": [], "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -791,31 +990,30 @@ "# Package as a struct for consistent display\n", "transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n", "transcribed_series_verbose" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "c8351cc3", "cell_type": "markdown", + "id": "c8351cc3", + "metadata": {}, "source": [ "### 7. Extract EXIF metadata from images" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "e59670b9", "cell_type": "markdown", + "id": "e59670b9", + "metadata": {}, "source": [ "This section demonstrates how to extract EXIF metadata from images using a custom BigQuery Python UDF and the `Pillow` library." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "fda362f4", "cell_type": "code", + "execution_count": null, + "id": "fda362f4", + "metadata": {}, + "outputs": [], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -831,7 +1029,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", + "def extract_exif(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -848,13 +1046,14 @@ " tag_name = ExifTags.TAGS.get(tag, tag)\n", " exif_dict[tag_name] = value\n", " return json.dumps(exif_dict)" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "40bb6bc9", "cell_type": "code", + "execution_count": null, + "id": "40bb6bc9", + "metadata": {}, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -885,9 +1084,7 @@ "exif_data = bbq.parse_json(exif_json)\n", "\n", "exif_data" - ], - "metadata": {}, - "execution_count": null + ] } ], "metadata": { @@ -912,6 +1109,6 @@ "version": "3.13.0" } }, - "nbformat_minor": 0, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 0 } From 233ab160bc778d40a52c9919b7dc3a1f5d7e2dde Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 21:26:33 +0000 Subject: [PATCH 34/39] fix: make Kaggle secrets import optional --- ...uct-images-with-bigframes-multimodal.ipynb | 680 +++++++++++++++++- 1 file changed, 679 insertions(+), 1 deletion(-) diff --git a/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb b/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb index 1c2e2b53a830..5a8178af2a9d 100644 --- a/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb +++ b/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb @@ -1 +1,679 @@ -{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":110281,"databundleVersionId":13391012,"sourceType":"competition"}],"dockerImageVersionId":31089,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Describe product images with BigFrames multimodal DataFrames\n\nBased on notebook at https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\n\nThis notebook is introducing BigFrames Multimodal features:\n\n1. Create Multimodal DataFrame\n2. Combine unstructured data with structured data\n3. Conduct image transformations\n4. Use LLM models to ask questions and generate embeddings on images\n5. PDF chunking function\n\nInstall the bigframes package and upgrade other packages that are already included in Kaggle but have versions incompatible with bigframes.","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19"}},{"cell_type":"code","source":"%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow ","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n\nConfigure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then,","metadata":{}},{"cell_type":"code","source":"from kaggle_secrets import UserSecretsClient\nuser_secrets = UserSecretsClient()\nuser_credential = user_secrets.get_gcloud_credential()\nuser_secrets.set_tensorflow_credential(user_credential)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:17:14.872905Z","iopub.execute_input":"2025-08-18T20:17:14.873201Z","iopub.status.idle":"2025-08-18T20:17:14.946971Z","shell.execute_reply.started":"2025-08-18T20:17:14.873171Z","shell.execute_reply":"2025-08-18T20:17:14.945996Z"}},"outputs":[],"execution_count":2},{"cell_type":"code","source":"PROJECT = \"bigframes-dev\" # replace with your project. \n# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n\nOUTPUT_BUCKET = \"bigframes_blob_test\" # replace with your GCS bucket. \n# The connection (or bigframes-default-connection of the project) must have read/write permission to the bucket. \n# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#grant-permissions for setting up connection service account permissions.\n# In this Notebook it uses bigframes-default-connection by default. You can also bring in your own connections in each method.\n\nimport bigframes\n# Setup project\nbigframes.options.bigquery.project = PROJECT\n\n# Display options\nbigframes.options.display.blob_display_width = 300\nbigframes.options.display.progress_bar = None\n\nimport bigframes.pandas as bpd","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:17:25.573874Z","iopub.execute_input":"2025-08-18T20:17:25.574192Z","iopub.status.idle":"2025-08-18T20:17:45.102002Z","shell.execute_reply.started":"2025-08-18T20:17:25.574168Z","shell.execute_reply":"2025-08-18T20:17:45.101140Z"}},"outputs":[],"execution_count":3},{"cell_type":"code","source":"# Create blob columns from wildcard path.\ndf_image = bpd.from_glob_path(\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n)\n# Other ways are: from string uri column\n# df = bpd.DataFrame({\"uri\": [\"gs:///\", \"gs:///\"]})\n# df[\"blob_col\"] = df[\"uri\"].str.to_blob()\n\n# From an existing object table\n# df = bpd.read_gbq_object_table(\"\", name=\"blob_col\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:17:45.103249Z","iopub.execute_input":"2025-08-18T20:17:45.103530Z","iopub.status.idle":"2025-08-18T20:17:47.424586Z","shell.execute_reply.started":"2025-08-18T20:17:45.103499Z","shell.execute_reply":"2025-08-18T20:17:47.423762Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/global_session.py:103: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n _global_session = bigframes.session.connect(\n","output_type":"stream"},{"name":"stdout","text":"Please ensure you have selected a BigQuery account in the Notebook Add-ons menu.\n","output_type":"stream"}],"execution_count":4},{"cell_type":"code","source":"# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\ndf_image = df_image.head(5)\ndf_image","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:17:47.425578Z","iopub.execute_input":"2025-08-18T20:17:47.425873Z","iopub.status.idle":"2025-08-18T20:18:07.919961Z","shell.execute_reply.started":"2025-08-18T20:17:47.425844Z","shell.execute_reply":"2025-08-18T20:18:07.918942Z"}},"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":" image\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n\n[5 rows x 1 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
image
0
1
2
3
4
\n

5 rows × 1 columns

\n
[5 rows x 1 columns in total]"},"metadata":{}}],"execution_count":5},{"cell_type":"markdown","source":"# 2. Combine unstructured data with structured data\n\nNow you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself.","metadata":{}},{"cell_type":"code","source":"# Combine unstructured data with structured data\ndf_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\ndf_image[\"content_type\"] = df_image[\"image\"].blob.content_type()\ndf_image[\"size\"] = df_image[\"image\"].blob.size()\ndf_image[\"updated\"] = df_image[\"image\"].blob.updated()\ndf_image","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:18:07.921884Z","iopub.execute_input":"2025-08-18T20:18:07.922593Z","iopub.status.idle":"2025-08-18T20:18:35.549725Z","shell.execute_reply.started":"2025-08-18T20:18:07.922551Z","shell.execute_reply":"2025-08-18T20:18:35.548942Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n","output_type":"stream"},{"execution_count":6,"output_type":"execute_result","data":{"text/plain":" image author content_type \\\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n\n size updated \n0 1591240 2025-03-20 17:45:04+00:00 \n1 1182951 2025-03-20 17:45:02+00:00 \n2 1520884 2025-03-20 17:44:55+00:00 \n3 1235401 2025-03-20 17:45:19+00:00 \n4 1591923 2025-03-20 17:44:47+00:00 \n\n[5 rows x 5 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n

5 rows × 5 columns

\n
[5 rows x 5 columns in total]"},"metadata":{}}],"execution_count":6},{"cell_type":"markdown","source":"Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together.","metadata":{}},{"cell_type":"code","source":"# filter images and display, you can also display audio and video types\ndf_image[df_image[\"author\"] == \"alice\"][\"image\"].blob.display()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:18:55.299993Z","iopub.execute_input":"2025-08-18T20:18:55.300314Z","iopub.status.idle":"2025-08-18T20:19:09.154492Z","shell.execute_reply.started":"2025-08-18T20:18:55.300289Z","shell.execute_reply":"2025-08-18T20:19:09.153315Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n","output_type":"stream"},{"output_type":"display_data","data":{"text/html":"","text/plain":""},"metadata":{}},{"output_type":"display_data","data":{"text/html":"","text/plain":""},"metadata":{}}],"execution_count":7},{"cell_type":"markdown","source":"# 3. Conduct image transformations\n\nBigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes.","metadata":{}},{"cell_type":"code","source":"df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n)\ndf_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", engine=\"opencv\"\n)\ndf_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n alpha=50.0,\n beta=150.0,\n norm_type=\"minmax\",\n dst=f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n engine=\"opencv\",\n)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:19:22.950277Z","iopub.execute_input":"2025-08-18T20:19:22.950652Z","iopub.status.idle":"2025-08-18T20:31:51.799997Z","shell.execute_reply.started":"2025-08-18T20:19:22.950625Z","shell.execute_reply":"2025-08-18T20:31:51.798840Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n","output_type":"stream"}],"execution_count":8},{"cell_type":"code","source":"# You can also chain functions together\ndf_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")\ndf_image","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:31:51.802219Z","iopub.execute_input":"2025-08-18T20:31:51.802745Z","iopub.status.idle":"2025-08-18T20:36:13.953258Z","shell.execute_reply.started":"2025-08-18T20:31:51.802700Z","shell.execute_reply":"2025-08-18T20:36:13.951930Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n","output_type":"stream"},{"execution_count":9,"output_type":"execute_result","data":{"text/plain":" image author content_type \\\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n\n size updated \\\n0 1591240 2025-03-20 17:45:04+00:00 \n1 1182951 2025-03-20 17:45:02+00:00 \n2 1520884 2025-03-20 17:44:55+00:00 \n3 1235401 2025-03-20 17:45:19+00:00 \n4 1591923 2025-03-20 17:44:47+00:00 \n\n blurred \\\n0 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n1 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n2 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n3 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n4 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n\n resized \\\n0 {'uri': 'gs://bigframes_blob_test/image_resize... \n1 {'uri': 'gs://bigframes_blob_test/image_resize... \n2 {'uri': 'gs://bigframes_blob_test/image_resize... \n3 {'uri': 'gs://bigframes_blob_test/image_resize... \n4 {'uri': 'gs://bigframes_blob_test/image_resize... \n\n normalized \\\n0 {'uri': 'gs://bigframes_blob_test/image_normal... \n1 {'uri': 'gs://bigframes_blob_test/image_normal... \n2 {'uri': 'gs://bigframes_blob_test/image_normal... \n3 {'uri': 'gs://bigframes_blob_test/image_normal... \n4 {'uri': 'gs://bigframes_blob_test/image_normal... \n\n blur_resized \n0 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n1 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n2 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n3 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n4 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n\n[5 rows x 9 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
imageauthorcontent_typesizeupdatedblurredresizednormalizedblur_resized
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n

5 rows × 9 columns

\n
[5 rows x 9 columns in total]"},"metadata":{}}],"execution_count":9},{"cell_type":"markdown","source":"# 4. Use LLM models to ask questions and generate embeddings on images","metadata":{}},{"cell_type":"code","source":"from bigframes.ml import llm\ngemini = llm.GeminiTextGenerator()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:36:13.954340Z","iopub.execute_input":"2025-08-18T20:36:13.954686Z","iopub.status.idle":"2025-08-18T20:36:43.225449Z","shell.execute_reply.started":"2025-08-18T20:36:13.954661Z","shell.execute_reply":"2025-08-18T20:36:43.224579Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FutureWarning: Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message.\n return method(*args, **kwargs)\n","output_type":"stream"}],"execution_count":10},{"cell_type":"code","source":"# Ask the same question on the images\ndf_image = df_image.head(2)\nanswer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\nanswer[[\"ml_generate_text_llm_result\", \"image\"]]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:36:43.227457Z","iopub.execute_input":"2025-08-18T20:36:43.227798Z","iopub.status.idle":"2025-08-18T20:37:25.238649Z","shell.execute_reply.started":"2025-08-18T20:36:43.227764Z","shell.execute_reply":"2025-08-18T20:37:25.237623Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n","output_type":"stream"},{"execution_count":11,"output_type":"execute_result","data":{"text/plain":" ml_generate_text_llm_result \\\n0 The item is a tin of K9 Guard Dog Paw Balm. \n1 The item is a bottle of K9 Guard Dog Hot Spot ... \n\n image \n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n\n[2 rows x 2 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ml_generate_text_llm_resultimage
0The item is a tin of K9 Guard Dog Paw Balm.
1The item is a bottle of K9 Guard Dog Hot Spot Spray.
\n

2 rows × 2 columns

\n
[2 rows x 2 columns in total]"},"metadata":{}}],"execution_count":11},{"cell_type":"code","source":"# Ask different questions\ndf_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:37:25.239607Z","iopub.execute_input":"2025-08-18T20:37:25.239875Z","iopub.status.idle":"2025-08-18T20:37:25.263034Z","shell.execute_reply.started":"2025-08-18T20:37:25.239847Z","shell.execute_reply":"2025-08-18T20:37:25.262002Z"}},"outputs":[],"execution_count":12},{"cell_type":"code","source":"answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\nanswer_alt[[\"ml_generate_text_llm_result\", \"image\"]]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:37:25.264072Z","iopub.execute_input":"2025-08-18T20:37:25.264585Z","iopub.status.idle":"2025-08-18T20:38:10.129667Z","shell.execute_reply.started":"2025-08-18T20:37:25.264518Z","shell.execute_reply":"2025-08-18T20:38:10.128677Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n","output_type":"stream"},{"execution_count":13,"output_type":"execute_result","data":{"text/plain":" ml_generate_text_llm_result \\\n0 The item is a tin of K9 Guard Dog Paw Balm. \n1 The picture has colors such as white, gray, an... \n\n image \n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n\n[2 rows x 2 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ml_generate_text_llm_resultimage
0The item is a tin of K9 Guard Dog Paw Balm.
1The picture has colors such as white, gray, and a light blue (cyan).
\n

2 rows × 2 columns

\n
[2 rows x 2 columns in total]"},"metadata":{}}],"execution_count":13},{"cell_type":"code","source":"# Generate embeddings.\nembed_model = llm.MultimodalEmbeddingGenerator()\nembeddings = embed_model.predict(df_image[\"image\"])\nembeddings","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:38:10.130617Z","iopub.execute_input":"2025-08-18T20:38:10.130851Z","iopub.status.idle":"2025-08-18T20:39:04.790416Z","shell.execute_reply.started":"2025-08-18T20:38:10.130833Z","shell.execute_reply":"2025-08-18T20:39:04.789398Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FutureWarning: Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n","output_type":"stream"},{"execution_count":14,"output_type":"execute_result","data":{"text/plain":" ml_generate_embedding_result \\\n0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n1 [ 0.00973672 0.02148364 0.00244308 ... 0.00... \n\n ml_generate_embedding_status ml_generate_embedding_start_sec \\\n0 \n1 \n\n ml_generate_embedding_end_sec \\\n0 \n1 \n\n content \n0 {\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3... \n1 {\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3... \n\n[2 rows x 5 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3...
1[ 0.00973672 0.02148364 0.00244308 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3...
\n

2 rows × 5 columns

\n
[2 rows x 5 columns in total]"},"metadata":{}}],"execution_count":14},{"cell_type":"code","source":"","metadata":{"trusted":true},"outputs":[],"execution_count":null}]} +{ + "cells": [ + { + "id": "876eb80c", + "cell_type": "markdown", + "source": "# Describe product images with BigFrames multimodal DataFrames\n\nBased on notebook at https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\n\nThis notebook is introducing BigFrames Multimodal features:\n\n1. Create Multimodal DataFrame\n2. Combine unstructured data with structured data\n3. Conduct image transformations\n4. Use LLM models to ask questions and generate embeddings on images\n5. PDF chunking function\n\nInstall the bigframes package and upgrade other packages that are already included in Kaggle but have versions incompatible with bigframes.", + "metadata": { + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19" + }, + "execution_count": null + }, + { + "id": "0506e15e", + "cell_type": "code", + "source": "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow ", + "metadata": { + "trusted": true + }, + "execution_count": null + }, + { + "id": "c749e07c", + "cell_type": "markdown", + "source": "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n\nConfigure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then,", + "metadata": {}, + "execution_count": null + }, + { + "id": "5e00777d", + "cell_type": "code", + "source": [ + "try:\n", + " from kaggle_secrets import UserSecretsClient\n", + " user_secrets = UserSecretsClient()\n", + " user_credential = user_secrets.get_gcloud_credential()\n", + " user_secrets.set_tensorflow_credential(user_credential)\n", + " print(\"Successfully authenticated using Kaggle secrets.\")\n", + "except ImportError:\n", + " print(\"Not running on Kaggle, skipping Kaggle secrets initialization.\")\n", + "except Exception as e:\n", + " print(f\"Could not initialize Kaggle secrets: {e}\")" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:17:14.872905Z", + "iopub.execute_input": "2025-08-18T20:17:14.873201Z", + "iopub.status.idle": "2025-08-18T20:17:14.946971Z", + "shell.execute_reply.started": "2025-08-18T20:17:14.873171Z", + "shell.execute_reply": "2025-08-18T20:17:14.945996Z" + } + }, + "execution_count": 2 + }, + { + "id": "b2e171de", + "cell_type": "code", + "source": [ + "PROJECT = \"bigframes-dev\" # replace with your project. \n", + "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", + "\n", + "LOCATION = \"us\" # replace with your location.\n", + "DATASET_ID = \"bigframes_samples\" # replace with your dataset ID.\n", + "OUTPUT_BUCKET = \"bigframes_blob_test\" # replace with your GCS bucket. \n", + "\n", + "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", + "\n", + "import bigframes\n", + "# Setup project\n", + "bigframes.options.bigquery.project = PROJECT\n", + "bigframes.options.bigquery.location = LOCATION\n", + "\n", + "# Display options\n", + "bigframes.options.display.blob_display_width = 300\n", + "bigframes.options.display.progress_bar = None\n", + "\n", + "import bigframes.pandas as bpd\n", + "import bigframes.bigquery as bbq\n", + "\n", + "def get_runtime_json_str(series, mode=\"R\", with_metadata=False):\n", + " \"\"\"Get runtime JSON from objectref.\"\"\"\n", + " s = bbq.obj.fetch_metadata(series) if with_metadata else series\n", + " runtime = bbq.obj.get_access_url(s, mode=mode)\n", + " return bbq.to_json_string(runtime)\n", + "\n", + "def get_metadata(series):\n", + " metadata_obj = bbq.obj.fetch_metadata(series)\n", + " return bbq.json_query(metadata_obj.struct.field(\"details\"), \"$.gcs_metadata\")\n", + "\n", + "def get_content_type(series):\n", + " return bbq.json_value(get_metadata(series), \"$.content_type\")\n", + "\n", + "def get_size(series):\n", + " return bbq.json_value(get_metadata(series), \"$.size\").astype(\"Int64\")\n", + "\n", + "def get_updated(series):\n", + " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)\n", + "\n", + "from IPython.display import HTML, display\n", + "\n", + "def render_images(df):\n", + " \"\"\"Helper to display BigFrames DataFrame with rendered image previews.\"\"\"\n", + " import bigframes.pandas as bpd\n", + " import bigframes.bigquery as bbq\n", + " import bigframes\n", + " from bigframes import dtypes\n", + " import json\n", + " \n", + " if isinstance(df, bpd.Series):\n", + " df = df.to_frame()\n", + " \n", + " object_cols = [\n", + " col for col, dtype in zip(df.columns, df.dtypes)\n", + " if dtype == dtypes.OBJ_REF_DTYPE\n", + " ]\n", + " \n", + " if not object_cols:\n", + " display(df)\n", + " return\n", + "\n", + " limit = bigframes.options.display.max_rows or 10\n", + " view_df = df.head(limit)\n", + " \n", + " runtime_cols = {\n", + " col: get_runtime_json_str(view_df[col], mode=\"R\", with_metadata=False) \n", + " for col in object_cols\n", + " }\n", + " \n", + " pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()\n", + " final_pd = view_df.to_pandas()\n", + " \n", + " width = bigframes.options.display.blob_display_width or 300\n", + " IMAGE_EXTENSIONS = (\".png\", \".jpg\", \".jpeg\", \".gif\", \".webp\")\n", + " \n", + " def format_cell_html(raw_json):\n", + " if not raw_json:\n", + " return \"\"\n", + " try:\n", + " obj_rt = json.loads(raw_json)\n", + " if \"access_urls\" not in obj_rt:\n", + " err = obj_rt.get(\"errors\", [{\"message\": \"URL Generation Failed\"}])[0].get(\"message\")\n", + " return f'\u003cspan style=\"color:red;\"\u003eError: {err}\u003c/span\u003e'\n", + " \n", + " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", + " url = obj_rt[\"access_urls\"][\"read_url\"]\n", + " \n", + " if uri and str(uri).lower().endswith(IMAGE_EXTENSIONS):\n", + " return f'\u003cimg src=\"{url}\" width=\"{width}\"\u003e'\n", + " \n", + " return f'\u003ca href=\"{url}\" target=\"_blank\"\u003e{uri if uri else \"view\"}\u003c/a\u003e'\n", + " except:\n", + " return \"Format Error\"\n", + "\n", + " for col in object_cols:\n", + " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", + " \n", + " display(HTML(final_pd.to_html(escape=False)))" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:17:25.573874Z", + "iopub.execute_input": "2025-08-18T20:17:25.574192Z", + "iopub.status.idle": "2025-08-18T20:17:45.102002Z", + "shell.execute_reply.started": "2025-08-18T20:17:25.574168Z", + "shell.execute_reply": "2025-08-18T20:17:45.101140Z" + } + }, + "execution_count": 3 + }, + { + "id": "d17afaf1", + "cell_type": "code", + "source": [ + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# List files using gcsfs (public bucket)\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", + "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", + "# Read the URIs into a BigQuery DataFrame using UNNEST\n", + "# We take the first 5 for this example\n", + "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_image = df_image[['image']]" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:17:45.103249Z", + "iopub.execute_input": "2025-08-18T20:17:45.103530Z", + "iopub.status.idle": "2025-08-18T20:17:47.424586Z", + "shell.execute_reply.started": "2025-08-18T20:17:45.103499Z", + "shell.execute_reply": "2025-08-18T20:17:47.423762Z" + } + }, + "execution_count": 4, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/global_session.py:103: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n _global_session = bigframes.session.connect(\n" + }, + { + "name": "stdout", + "output_type": "stream", + "text": "Please ensure you have selected a BigQuery account in the Notebook Add-ons menu.\n" + } + ] + }, + { + "id": "3e84b922", + "cell_type": "code", + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "render_images(df_image)" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:17:47.425578Z", + "iopub.execute_input": "2025-08-18T20:17:47.425873Z", + "iopub.status.idle": "2025-08-18T20:18:07.919961Z", + "shell.execute_reply.started": "2025-08-18T20:17:47.425844Z", + "shell.execute_reply": "2025-08-18T20:18:07.918942Z" + } + }, + "execution_count": 5, + "outputs": [ + { + "data": { + "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=11a1a82bcec0368dffef46552650865eed90b5c897132c9f144a5d51845856739dfa930cec231f1d9686b774c76bab4e0f5b88128d58e26e35e1445a99b50b6f476084a2deab5d1343c54f810a2ff284a9c182a82daf150c2e0ba58bd5cec6bb0a8cb07c6109ffb0017c79eac94ca7b48fce86dc55156ed730b55d2a4c5681162d52ecbdf4a7d19488275eef65dfab5ea07a7929ce27c9144238eede571d9ff361eae4a1f15a9cbcd9ec09277a6386b43b39766808eb34b1a0b9c055b8d66269df378b6777c375b98562494049c26bdf9614269441e9c224028de0ab9803165f258cf366a5c4bee40cca961a9500b64c88389af46d85d41bda4a2cf77904b58f\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=a71d2ef68382b109400d88653e34c5fa42544343eea5c51298645318966c5253ec5361d2d21e1521030088a9d6a740acb497db1fdd8d58ef91f6273813d328052d5c56e01e8686c811208c7baf233b5f43857fcaac46b71bbf316e773151ff4ba1ea956ff186807de17c0da903187cf6f49f2f98d73d2979c10702bb88918bca6a6bcf8b2fe0d38ed933e31377b8ebe9abadd2fa7ad917a07bd1a1bf68fbf05ea97f51004a47bb1a566a1a36fc3b92b5245605a2c2b24f3f0c701719609f7432b1c541b061e9ce9b474f964c9910d64b8cb8e367886edd8e808350142c3032803c04e3acfad870f1e75bdbc3caa5484341de5a3a3bf848f122410ec777c8edc7\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=36b90b84047051e752f8ba82662b774ae77bbe811e46c43b2322945b5e5bb5282f77c0abac4c5bc6296d0f5199b7842f684a89a5ec899a052d9208af3e4729392316f0670b5d8c1a0a584492ac941e28cb1ce23db5585ff1bc2bdd9acd1110fbe6f5ebbe4c5f802d8ec87327bd89d21167dded8c7fd8251a79a29b52360d7ca5dc84949990fb4ea1ca16bb317bace8ac2c2493b945c4591ca123d02ba29052cbe0c5461853f895b79878d82c89769c5e003aa61b5e9e549a1c9bdadf16dffff241bf9d790328349e917a7b22574426ea8d32e79e6d84ddf4ddb9004f2fbba1ecd347acaaeae0290a5da9c60011c27ce846d59a3dd2c2ef309a24e2459b18130d\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=05b04e32d1958afa905bc986c6203b2b69a1efb860ca96e3e385f0a42566d0500a9b5bc8242ef0ae978e8a9e1c098394959e894e53316b57570c2ade38dd0b97003d66e9f339e098893410c9083f441ecd794a12ed41a4f5354d30c3a656b932b30a6dbfcdd1b6d48fc21dc48093b3073370cc14f0f885d5f30a6b5d8a37d218e6f1c6e7c864cbdeceee17814be7c66669e3039dc9759d2a140967b1b44e3f6b4a33352d9f7fccc5a4056c2783731809e1762841901a371c06aa33507198c12c8e38c49cc36565a745ff3ce21dcc53ff0b064fa8f73004bfbc14d3526032e913fc0448ef5c8e1ea866a0744d3a48ea55823250153fd0b7772fce02672442175f\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1fa3291ae0489edf9df6fbc4a720bc1ef2d47f7a8203b42765da04cdfe8cf16f957e258becf7581fab0d66097bc047081770d2fc90a3b5c78923d99650954e2f837b1b588a03ca4a4ce9c0f780e12f5995139eba0fc85c09b2bde0b467cf2c1d90c639df9f157f78c6e2d5ec0232150ab355307c1af952790d730e53dd5792841c36b86379fd9c250f8400dd37649c0befc9c080f3b437e8847744202c19404c9b99613b04c039570584769553c84b45e200a4632d07148dbe2732c4de7b10f770ad942d7288fbd8174ff87456409427fac33e7ee92bcc1c85e7c874c4f01a5f6187a6c0f5ce3ecc57676ffd455fe51914868f6c019dffd9b30354f41ab703e6\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e5 rows × 1 columns\u003c/p\u003e\n\u003c/div\u003e[5 rows x 1 columns in total]", + "text/plain": " image\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n\n[5 rows x 1 columns]" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "b0eaa73c", + "cell_type": "markdown", + "source": "# 2. Combine unstructured data with structured data\n\nNow you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself.", + "metadata": {}, + "execution_count": null + }, + { + "id": "7d64fb54", + "cell_type": "code", + "source": [ + "# Combine unstructured data with structured data\n", + "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", + "render_images(df_image)" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:18:07.921884Z", + "iopub.execute_input": "2025-08-18T20:18:07.922593Z", + "iopub.status.idle": "2025-08-18T20:18:35.549725Z", + "shell.execute_reply.started": "2025-08-18T20:18:07.922551Z", + "shell.execute_reply": "2025-08-18T20:18:35.548942Z" + } + }, + "execution_count": 6, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n" + }, + { + "data": { + "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003cth\u003eauthor\u003c/th\u003e\n \u003cth\u003econtent_type\u003c/th\u003e\n \u003cth\u003esize\u003c/th\u003e\n \u003cth\u003eupdated\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=8f2857f42b8ba3abed9e60449cc32b150cbd6276a923a6d85e5c06ddde834849f9dc25a05a7cd56627308c78d68f471dc14d220573d9a5564fe3b03acbeb1776414a54ab717e65a0039b2beb31092dcb7d58a2aefd8d0b0063904e330a7aa424e06c99205b5b83c9efe3111db07e9571224cc2aff52f71dad9f6b68a7afa9099803c2476a058002ac1b01f1cb6614230cccb1c730ca16c69dfa6142b255032cc799a33c1dd2cfacfd2d4db3fcfa8b8fd0ed68711bfba781d1119192fe24fe667aa129f8d1908702b295eb2d57d7782006d8c77e4a27ba6b1d3adad801ac9c859e0162db46834407d19942af2bd94c9d698b1c5352c7c9d8af58c956f607c3f10\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ealice\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1591240\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=9fbf42a56019dfd8b35e8d29981674906d025145bac8a5d9cebadfda4d04420ed34e2d554973a4084252f5ecc2803e82fd6817a2338ce9159fe5140d4a3a77cdc7e5feed733ad2b4351ce97fe82f76de1e5523022fdf12543cbf48827b07641bdd3083d0f8c11d95a46d1af17b70968e1f30ad99bf305ab8b5a827f24d5980a28df3726111f7f79c2444f78bfe5268d6ed2bbb172d368930b27690b635bdbd5cbfca7264b4e0bb0b170ac3dd15a9c36c3e77061906958464c8c610bb11b98602f99a184dca788ff5b154259c944e536fdeb47988c3ed9430e5fafd86e5c9b05088dd72c5f9f25516d0dead404dff0c0b2e4a91993a43100c66588ed373314e74\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1182951\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=7ee0d2c8dbcc174ef6edb76725adde283ec45db4f733d02241b350bb3af4094e97dbcedcbc98091c28273d51cd059ca475d501dad445ef4dd335c1add9930196a172adc8d45beda0003f934610167c022be84e43774af7b7805ba526d39509c68f065c6bc442196df71330bfeb9910ba4b85a1bf1defd6ebb34b6d0ed092c795c006d4bec18931cc00b9678f1a8cd43b2ee46efc9c03418e182dba57f61e011d8de299482a5c94763eaecda94b914254c34e635644944c9c14cf90d2a57d05767dc9330890dcd526c91d771b1182b47e23163db8d29e6146356355724e9bb854c9e246f1c376be69b1b32d5039ff42ca0b86a3b285fc7dd6b040093ccc346344\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1520884\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=5dbbec9d52be0f6fb2288804f812adcdbee47e8a6a5e1addeea648e3e2fa7129bf299b4528896dd039eec54da32633622c36ed90f7b93e4b52d400b50f45900a722bd3b159d0dc79ea567fb4ca2e6541028d68eec8a481ba15097fc63df50496bb51439806fc952370c3cbf3577c21f3f61b2f253c1e8c8821f5a31eceeba5fe110b2064706a36cc3a5041639f8673f7572b93195b357609944c892fffa65e0624ed73f27160d2401df695881e58074949c463c150880a9a75a0e7e28d3d144c6ef18e85d763a8b72d0ac45065d68d2a45581c11be30ab60acf4de29f7ca5592664c8a917358b5593520fee2c89cecb062a6071565acaa8f6a2bc3ba7d9f05aa\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ealice\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1235401\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=af14fcfc1b6e3a81e25df175e3b3d1b8b8be5588ef74f775dcf2cfd266b342e9c1e0184a4bc1b88c37209f27a23f0552dada58ca8edc95f47cc575a7de5678fd419f1f10bead607e37464f74093c24775990072726ad1fae5e923fbf2354967fe10d90149384d3c6e0955b6623eff94bc999b01467456bb94d1189ed76b3e4898858be4d44b5bc2a7f9b97fd4a94e2e823adfdbe2c51c385e13fb0534e8202af59364b0d9c54512fdc571a7c012d2c1f497b27f5f4e77924feddb2d4aab78ca4c2cba5282accf6707117efcf2aa4a3bb9e01815be41c380b9bd327c492b05a7e1d0fdbfbbfd8f3b0cb353165cde6cc42d1a16ac51a33c7b1318d3405cf104399\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1591923\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n\u003c/div\u003e[5 rows x 5 columns in total]", + "text/plain": " image author content_type \\\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n\n size updated \n0 1591240 2025-03-20 17:45:04+00:00 \n1 1182951 2025-03-20 17:45:02+00:00 \n2 1520884 2025-03-20 17:44:55+00:00 \n3 1235401 2025-03-20 17:45:19+00:00 \n4 1591923 2025-03-20 17:44:47+00:00 \n\n[5 rows x 5 columns]" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "a23ef0e4", + "cell_type": "markdown", + "source": "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together.", + "metadata": {}, + "execution_count": null + }, + { + "id": "ce102df0", + "cell_type": "code", + "source": [ + "# filter images and display, you can also display audio and video types\n", + "filtered_df = df_image[df_image[\"author\"] == \"alice\"]\n", + "render_images(filtered_df)" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:18:55.299993Z", + "iopub.execute_input": "2025-08-18T20:18:55.300314Z", + "iopub.status.idle": "2025-08-18T20:19:09.154492Z", + "shell.execute_reply.started": "2025-08-18T20:18:55.300289Z", + "shell.execute_reply": "2025-08-18T20:19:09.153315Z" + } + }, + "execution_count": 7, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n" + }, + { + "data": { + "text/html": "\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201856Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026X-Goog-Signature=4dd04f25b6ceebf868ebbbe2165da17cf6b9b2ad9a8bb1dd3e1970d32290aa520559224dea75e3af9539fd2fb2bedb462c49001c64584d154a6e84cb3bf79bd82d3d17e69c7942b4e31acbde3cd9dea47b002b12087b6e9e0878ca2dae5c4c552906c9750227657e622ff20780f075755bb255aa00d14802222d3a3cb77c94188f6f22a37edf4802ec3d595d475ebdc75f4e0194062d899d506e9e2db20e84d309cbdcbcf4229aed64dedb4bf2c66198d118b7bf63afce0eb9794724956839a9d0220861ac2fd618d8abeb74a21d150139e6afbbdbf01be8537fc9fee6d3b7495945b801ca146115b174b575b4a8f95443e6afc444e3336e95ba03f9adbb21a5\" width=\"300\"/\u003e", + "text/plain": "\u003cIPython.core.display.Image object\u003e" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": "\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201856Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026X-Goog-Signature=8d53ca04967c2219f664a0f0aa5b3f28aab4836bb22d66a3e1a5e9e6cd7735865ef1fe5e5b34f422dbb21a33cef41f11a617ec858fd26e1a75c124222ab0e33675e349ab577bc1c4939aa9de3ce0918b94e85a4f68a27cc7b8c359dbc99e2dd211d4307c54e57d455c74ba360ce038f578ba05b2122f9eb96ae7d091345a1d53b3347b042c8c963414422a870c9d168aec09dd6fddd07ceb622d1d46e0857a78e327f82b665838d1bb4864b953a7ca7ac6ad0a77d8d2ffbd842348ea213b6a9ca2ef0768380e8cc96fc4afad91aef98ca31546dafdd989aa7ed3cf7bf692c2fa2b0c199ad21040596b5a1c4cf48c791fb252e0552c1a3444ae7461c715cac915\" width=\"300\"/\u003e", + "text/plain": "\u003cIPython.core.display.Image object\u003e" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + }, + { + "id": "db2b3b12", + "cell_type": "markdown", + "source": "# 3. Conduct image transformations\n\nBigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes.", + "metadata": {}, + "execution_count": null + }, + { + "id": "283036f5", + "cell_type": "code", + "source": [ + "@bpd.udf(\n", + " input_types=[str, str, int, int],\n", + " output_type=str,\n", + " dataset=DATASET_ID,\n", + " name=\"image_blur_kaggle\",\n", + " bigquery_connection=FULL_CONNECTION_ID,\n", + " packages=[\"opencv-python-headless\", \"numpy\", \"requests\"],\n", + ")\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", + " import json\n", + " import cv2 as cv\n", + " import numpy as np\n", + " import requests\n", + " \n", + " src_obj = json.loads(src_rt)\n", + " if \"access_urls\" not in src_obj:\n", + " raise ValueError(f\"Missing 'access_urls' in source object. Response: {src_obj}\")\n", + " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", + " \n", + " response = requests.get(src_url, timeout=30)\n", + " response.raise_for_status()\n", + " \n", + " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", + " if img is None:\n", + " raise ValueError(\"cv.imdecode failed\")\n", + " \n", + " img_blurred = cv.blur(img, ksize=(int(kx), int(ky)))\n", + " success, encoded = cv.imencode(\".jpeg\", img_blurred)\n", + " \n", + " if not success:\n", + " raise ValueError(\"cv.imencode failed\")\n", + " \n", + " if dst_rt: # GCS Output Mode\n", + " dst_obj = json.loads(dst_rt)\n", + " if \"access_urls\" not in dst_obj:\n", + " raise ValueError(f\"Missing 'access_urls' in destination object. Response: {dst_obj}\")\n", + " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", + " \n", + " requests.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", + " return dst_obj[\"objectref\"][\"uri\"]\n", + " return \"\"\n", + "\n", + "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", + " import os\n", + " dst_folder = os.path.join(dst_folder, \"\")\n", + " metadata = bbq.obj.fetch_metadata(series)\n", + " current_uri = metadata.struct.field(\"uri\")\n", + " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", + " \n", + " # Bypass synchronous validation via JSON initialization\n", + " dst_blob_df = bpd.DataFrame({\"uri\": dst_uri})\n", + " dst_blob_df[\"authorizer\"] = FULL_CONNECTION_ID\n", + " dst_blob = bbq.obj.make_ref(bbq.to_json(bbq.struct(dst_blob_df)))\n", + "\n", + " df_transform = bpd.DataFrame({\n", + " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", + " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", + " })\n", + " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(udf, axis=1, args=args)\n", + " \n", + " if verbose:\n", + " return res\n", + " \n", + " res_df = bpd.DataFrame({\"uri\": res})\n", + " res_df[\"authorizer\"] = FULL_CONNECTION_ID\n", + " return bbq.obj.make_ref(bbq.to_json(bbq.struct(res_df)))\n", + "\n", + "# Apply Blur Transformation\n", + "df_image[\"blurred\"] = apply_transformation(\n", + " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n", + " image_blur, 20, 20\n", + ")\n", + "render_images(df_image[[\"image\", \"blurred\"]])" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:19:22.950277Z", + "iopub.execute_input": "2025-08-18T20:19:22.950652Z", + "iopub.status.idle": "2025-08-18T20:31:51.799997Z", + "shell.execute_reply.started": "2025-08-18T20:19:22.950625Z", + "shell.execute_reply": "2025-08-18T20:31:51.798840Z" + } + }, + "execution_count": 8, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n" + } + ] + }, + { + "id": "b9716e5e", + "cell_type": "code", + "source": [ + "# Image resizing and normalizing can be performed similarly by implementing additional cv2 UDFs." + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:31:51.802219Z", + "iopub.execute_input": "2025-08-18T20:31:51.802745Z", + "iopub.status.idle": "2025-08-18T20:36:13.953258Z", + "shell.execute_reply.started": "2025-08-18T20:31:51.802700Z", + "shell.execute_reply": "2025-08-18T20:36:13.951930Z" + } + }, + "execution_count": 9, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n" + }, + { + "data": { + "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003cth\u003eauthor\u003c/th\u003e\n \u003cth\u003econtent_type\u003c/th\u003e\n \u003cth\u003esize\u003c/th\u003e\n \u003cth\u003eupdated\u003c/th\u003e\n \u003cth\u003eblurred\u003c/th\u003e\n \u003cth\u003eresized\u003c/th\u003e\n \u003cth\u003enormalized\u003c/th\u003e\n \u003cth\u003eblur_resized\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=5f8c50659e2117ce9dc838470214b2a4622a50aa22df7ad766b0110fb4a3369bbed34857568626523489133580b8ab52bee7c7e95b0d1fa8ecfc1f6d748d6dee2893d8b5456aa1d366520ab2897cf4dbd011a7fc1854ef281f54c28a35cad59a0010cc1c9f5eeed6da7c85cde366d98b418ea98a3f8a0ec83ed3bf4158a5348ac3250b0f445b5ab5ab97eecbe25f799ec0a1c466719880ec8ad50e16d01ec2d33980debb992a88b42ffa99f886622d45dd921fc97489b080b625e516106000ed55ab2dbaa04ac28ef1c97d447497f1f30dd2efea4c92d2d22d250203239de2d1da1d5ad0988224b4d76447cb5bf1ce0e104dc9369ffc2ed8abfbd605e79ebaef\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ealice\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1591240\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549023340907\u0026X-Goog-Signature=5629b7eee76f37486002cef7f1db57780852922400d8d47cb838864875ba46568d97cdac09dd6b8f9ab4b0a3102446b1c749bc1a82f6b7115041225117e860061d4980a353f778bf9172d79aed23cf1471deaa2c61c1f97e104877a2d2b8d0e4b131a0f06d2c0c79929de33e0585cbcdf553fbd4ecabf7e5fecda32f97e11f809bd05df1118b6b82b7c441745a5fc4e1bcb4f0d52ed1186c94d66809ee68a4c481342aba17654f1397589227b17246952ce105c751b0d099bf674fa772fcd22456ce1534a2ff9c763009135c1f4a3a10eed92f1a6e66d6c46c5fa05a679cd9bca00558c2ceac4a63ff8d15e51ca2603682216664508abedb5f92e3f823cb4fb4\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549186906659\u0026X-Goog-Signature=a83503cf1de4b802678a0ac122089835a1b27727e2d8474838a4836cc42dc39c8c6aee08b7d145edac3cb762116641cba9c7f539a93e87075763dc526f431950995e6483d5c911d97f0d386d598a097f183d797206c6ef4deb607125d7578a3f98657ee6003329bda5567bad49a7221417c1b628b1007a3e5d01727d9b14b29b1f27cca3f281aedaf40f34d12ca93d9f1d94f6633c13a30dcc3957f3f1ee196801b56b18dc86bdf7f5128a9dfd2af09bf94a20254af4606a55fb7e3b7eb45e9d0be34664195e7cd818b2d5990687fec8e1d9c36f6333e9a49a5399501f46b110fa6e686e1d42bf112f68d2b4e13a0bfcbcd9104510ed513f771ec2053d9bb639\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549353120386\u0026X-Goog-Signature=b9d8b27e4b05cf5e27afe8c0f8abc9c42fec3128c587324a84ccb94ed8675566f1482224cfdf982f0b236c59fa34a5aa1ed54b3fc4363cde2279244dbe4fb578967366ba0af304052d57a0d88d035ada68b34b723f566eea96874522830435e103acf29ce848a809e447e25f7d7d5ab3893f84931a55945d183aa058fcfaa84991e33c44e8d6e6074a783ee9578fd88325064bf8998830bf5a9f04015e910312c8cbaaf1b539113e6bc595bbd336cf84cc5c8396d8a93f7ffb0eb8b1a7a56ada4f0ef81c5dfc05ca36ec7861e3deef98b5b12a67ba321dd7a0f213d4fdc31f0675eb7031a7980ffbe1dc97ae3699651ef18c59ef2cc25fc071be017fa7441c47\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549346801405\u0026X-Goog-Signature=2044260548b5d07aa084e834c723e9196da0eed8e63c26a248381a3d7874505e3926d78973e99fff4a89dcf34042a4c39e3da508b02b24586141e89243bbe9641982cce474f2fece7e5b2780f7ef94dd1a89d498c162eba2017dd065ee202068a0a004054abbd71524a6e447594a2003f03427db3fd4e66df3dfce42258fea5a8823b7b46d8b15b33938f1b23b4206e79257282d4511d4c72d04f66a1e3acbdad6509871aef992893fa615b2bbab638f80cb1b2ba5610b5811eb8801e06ee904dc02fe4a9740ee5a7d931f0aa96b26e2c1eab25c0d9f3361b89ee4ad1f290611490bd66d9bde6dc90c5adfd0350b8a09c2cb8399c9e3d66ca3abcceb1ef4c3fb\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=987caa66e11b8446c02a240c423145f774d295c19afbe71eba9c3bcd0504525b19723b8d08bb4644f231b3e0b2bcfc13350112e1956d53d957a9986dd742b5f71f5406420da2eda739319eea452088c5138e901a101ad20f682d66245e966da1c6024e68de1e775ea5f395fd744f1717faca8875f89d9cb5eb969a79fb056dab43ada9abac0658bccbc7f72ad2ea293e9d70bf95ac5d81a0dc7bf62220c76fed0274be232c24180ad03f60f89d51eaf7cbec2d6748a94647ec253a9b6d1c95cea8b054872db400e78c930e69fd4ad835f7208ca33a4917ff1b24fcb510fe554792f4cbcaa8c7d4d3f17d1cac843b3f8c181c3e443b6720124055a74479228b14\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1182951\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549020470951\u0026X-Goog-Signature=6040bd68fa7a59e5100ecb3bab3d005be6353e45ad856cb7003fbce5c379a4926edb4226cbe7f6347a2172bd7cabe96a27f573e78192712f5472ba7b6fcf3a8abb48bc8376cc1f916cbfa6870f878cf48b5f7ff0b896844c3ca63eab4ff9d926e895f141a25d1718cdfff8ea7dc98a096a245dfbac33e1092af4541e1cc4fc3de5c7d9d62ab4b32ab725476f74f49b40625cca3063eeefb9c03ee2ffddfd2080176435672c2a3206db6c3d1cf826c3011366c4f1dd4db0ad87d6ac392b4c76c4196db4e55d2535f2ebb66b1a23cb90547277027cd482102a1b6036e7a62466733b6ed30215594f4724a838f135e2e413e0c185935761ec0e6033d361aedbc36f\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549187521760\u0026X-Goog-Signature=8bdfcaac843956ea960296c0d03c7e3020aa2810f887d80da7569f0b1828a76097eae9f3a18e0635a6701e651f69bfb8bc56533009d8e384036acd6c3a834b5e5b16b3f9a725a5759df0e504b328e304e53b8eb686252f7946cc6cf3d14a52d9e14e3d37344aa962f52e5be9dd28ea7b6ba20bb341b699bd79ced2d9f829a90cd829dc3288d87cc6310aa4361dc20c28065d50a8f70f293719d97ed8a8e8c1799c1ba6f33a5b2aadb8e2a47b6fb3acf41c204034e7e595491fd9ede24774f0484a0ae50e8e2d404ec37b29e9c6cca2feab0c4c9e67e4923848c8d3a61c59768c0937e264ec635e36a8e5a63d2c7319af46f4ab1bfd550fdf6b1ad4c07cc8e965\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549359215948\u0026X-Goog-Signature=9365da5cd82f3f428e3fd58e0f3dc49147aa398bba646fefd7e2d24ae130a5a1edb8a333eb22c9d89ecbdc87bede7b244aef1d2d8fc84bbd8240de4656002b23c8c2c149b692e70e3ef5c6772c6a2ecf67436c5a7cad77986f0bf03fec18b1529fb8c93bfae5a3f0f361ea3c66f13330619381d28726b6e717c84aba9296c915cf3ed630ea5fc7ecdf6b6cd7af3f93e222a5b2713acf00a2b04f709ec2c58e6e94b96a64a11891978b2fbaa23e115691a73a931dd6b439d7835dd8840ad7a879c2f8606c2ede1740e676b734f4e08fdddbb6a2e0bf7372f21f6324f3ddb8c22998bdc28309aba74e3810b528f4d3c365c3ec3468a7ac585efdd108c46c6a60b9\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549348094576\u0026X-Goog-Signature=4b5db258dfdd417beb18af774f6ad4e3c2645aca66abbd248fe76dfa3f2bf83900090c0064f864eba26027379895ad99e0e75aa83b957fbb4fc667b0d9e7b6baeb811678118c7016fb828a9924c9ba8c27c045122f683b3fc525e0c27744b8ea3907b9dd8fbcae4e79234548a36c60617f648fc0b65b3949786d57d871e4c01de07a5824d75a290892ccba17e528600c1250eedf89b2958d81096eef7407a63148f29f5b6a3690ad86995e1c5974585095ecadceb218471c68c2527353447b639544c2378e85fc44ff55eb6903950de2530565c7d647c3bc09c6a6aa3c3c65329aa53085ed443dfad094758dd06d7f39d209e83cfc265b03113c9fc431b13017\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=ab5e9d751859bee56cec2b9c7279c9f1b3bef256ae2850930006ec8083a623cd96e97e207e84c2487d8e4407544a5c2fdd755e8f187eba93753cb6ef53a3a1de33691829265905e049214f7c828e640d8ed96ff2f20f60c19a044f3cfafd99797f65297703c5bc0f5450f1d9fa9526a017a99c8b174e7af970c5a8d0cab5c1ec15e34ec8efdfe06ce85aa444482e8e289f3d8dc8a5f6e2cbe5c9309e2d2ca80884880f32f27cc7745d909ba40d4c37b6631f71b9d5172a43480e5763a7d844565a084b86dcd772dc5128e238b4ea476070c4ca6d0b748417f3d23a52264c315509d94636dbeb08d152552410de678e3668ea4ea4183b7100f4c8d3d4b1aa9a4a\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1520884\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549019078907\u0026X-Goog-Signature=93ddd73d20863af4bcd6d55d8b08efd24322967276c4c50acc96abceda81aec68addf10d783fdbdac7fe7cfbd7170663bf377ca4a1e2686c52ae46e2f4fc0f4a94e7fe51b9bc3b512fd23d26fdc6a62bcce36c348457b82f65fc8c956d8ef0bf1a14d30d1b61e40f80014d76155d13e051ccbc204360b5d43b19644094f3b9fa0fa2b6d723de301ea490f18929d568d785f53c9f74be51f389ba179bcbe2f6577cc2665e8a90fe5619f33468d3df5b3b8bc1e4c7df97c430d6b2323f7961de687314d62daa4e511d8e0ccf2b0d4d5321b00024163f08ec5999feea0917aee1a33d5b103775fe64350c1fcb4e8aec72fe7c181b9c3c9deae38b19fc8190b25ead\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549188241967\u0026X-Goog-Signature=8ccefc10cb6fd5194bc2d872fba31ec997e22d8826f309bea7f5266fac43223991b58d95ac65e0b96d5d9de7f1daa817b35abdbbc2785a3d931d1d603ad916bf5d1b1d9d0f809c3dd4714a1f1027407ef0659b3d863ce571ada817019f006b8c56f5a26da1749b3202de18becf7a337e72a1efe16f44c6a946c7196bfcb38b3b6944e31e075a5da9a576c27f840e507eddf9b29264dae15cd5f10d34874a861b4113eecc6af6d3d16f131835f4ddc5592e3adc871c96e0bafa17d9b4c133f697040398d7a6dd438c6c9c8679fed8f78a6e81c5b6001846d62224683c8ca8bf3aa26cd97d0b333a5ba333f0707a528f1e986207852339f48e6f370d492938e92b\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549355947800\u0026X-Goog-Signature=42e69641c9371bd2eff79bb51f0d9e4d27960d7e2a3852e0c44740c5a57afb0f026cd43fd4ee1594d09eed2b93b84ddcfd31bf341182b5e862760b8138bfef101d9f747351b6701e699a5bc406bf10125c5d29f2d6564f9acc7eb4e5197f8fdac6abb45480966b168077d0a69d9941b96ca2517a444e7e591422ec8c2a0ed8b7dc0c839af585593e73eb4f4a372f8f41d2e5bf6bed8768c46325b27bab10c4a56813265ef15b18b61f37846448c2e7890847f6b65d03e745f89094d06d6daf5820c49f60839468a5410d5d5f93015acca4fe2912a9f7c372c0b7374ca65de28bea669d8032eff08e3f575590ead5dfb6f84ac49f851ecde0298882f7e3912eb2\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549351340006\u0026X-Goog-Signature=b2af2667d8d9026e68bb9fe6b4e9b56d0ec35b20f34490db21c39b24d139321fd31936265270ab8dceed0857c53f5d8639f7438be45cb5788d8cd2588d42e68cc86df3f81de4ad1f356263ff6fd0921d3f237b0135b37e6382c72921a5914043a72d3b6550b6ec6231cc05c9d3d522bea46c65423ce0b64b03eeaa295996c8a8e26f480e55c95ac2aec47567ff0f07b84d993b5b6f56e9caca5109932c184ed234cb5a57b1ab740f2f4164d304179242d3190b10a97a436097d8c6fbf7325fa5e04b2d65ed3fe3b00471cfb27a82ac7f3a92d0c00afe201c2a7cbec877a762c7d057bf3493e295cadefac95edc5345073ed41c518499d2bc257f92f0215ace56\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=a152ee4011f72e7f0a09a2f883dc6abeb1179f32020930e1def252e5aad559ecf3cb96abd8e134f14a7d39c71ab9fec9411d9b1c29400d06313dc871c2500ed22033c561def13df3525eda40c10b9956ec22dd06ed9148e15e4af699b8e715c2308b92303c0b57a66b9b02473abd83565731dced27af52db48b2ecc8bdecbae16b512c50c30d30fc17ab8f21e471b904b93d97a2e1652e6e70626d75aeecdb13da6ef82993a6df033dfc341127bd1d387275ef345d89b60caafdecb548126aa7f90ef6c74beb0ac0f18902de474a8e0b289e306d0f824be87d1131ed4aa093cd52e486e1dae05c6c9b3e364aec6b65d9b93294e588ff6ecdc25b75ca0a488ba2\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ealice\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1235401\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549018389386\u0026X-Goog-Signature=1f1c5af6da1c990838c0a95aedea3aa65ee34b2ca70d8221511880eac073548a5f15eb993dbaf4a92469391933875d3b198b3a146181c6acfd96798f65673699724e103c131c051094f3b5721af5a79e997244c19576ce115b826565baedeb2cc08b6cc93d762ddfdc2750596d567df1c65ef32fd56ea4bec4463724107e07e2b47a3dcf50fafc1ab5d30fb8aa24024011895224c437e6f78c2cf2d4e23e09eae5c2d9ce49de627f2c5b2f90509b9532070bb426fcdd64e2892440ca43d370415ac30cea79fed08716666d40423d0631eb3cacb545a95cfe23bd47690d63610d1409cc1d508ce29a465146408dde3ce7f6b986495542f68439d06796dc7ba792\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549186140674\u0026X-Goog-Signature=3888ecd32b4b27e8a78f8c4b674cc6ce1990d66730cc1b37fa0814a948ab09b6b9dd49ee960e9a4dc4bb77cb1b3fb5c87e0b7e23e376eef8b4dc8673c6edae6720eb7e07b52ad172e4c5518f340ce58a3adead7178051b553b9d21df0f7c2c147c922d43ec73ba36bcad88805d84f66ddbe1f2cca3338f707ce38d22bf0bb78e86fb97eaa9e8df8a923320919fa5f8b99576aaf2eab09cfdd66da2c0c82986c7df3271d0f2877ea95429018aaafd4e10ac670d25a7568d175aeab74def3371c7064cc019ee8e0fc4ca1e4c6e718c7925051b9fb35b3095dcde072043b2a29e26e85b603fcd7ee1569eaf84054e9a7a1572100f61aa38ebe4ad306bb128a9bd19\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549353981426\u0026X-Goog-Signature=a25d1c75eef237aa75032402319dd081e0c9f15cdd95b79a31b8245d63b17752756cb356df7c69390ac05b496aac3fce22c501614660a06a18a148ac332fc28f245b3029405edbd40f2887dc76be5cbcf107c3bdd4725b93359a91adf775d6b827ad0413ccdf30dd1541c23bef40136622bea484ff16cb8adca54467515d6df88ffff70f5295f4801c2eba2cfd3e2b3606c53bc4257e7dcc51983c1de7edc8a2313380b7bfb8ac42f2460fcce281b6ad11ccb7e4edba23227f0da9181cbeeb0380302134659799ca8704c1b8dba92557d84ef3222fcfacc7584d65084ba847952d4676894d6c9f90ae692128721b48697c93baeb988d639338450a2bdad89a44\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549347689362\u0026X-Goog-Signature=5bd305a1286eae578a5c7aebe3fe718ba4a221d98b2aa38aae42d25572af2990f3cc845808576f97f350a2f799a740ceb9b661d6a02d1e22029f35a4ced0c1f82d6343b9fc3132b46a7754ba6571af9186be033831f98974b75967156e97fe3e3da348521624ef9714086bb9ea67ff1cbdf721b0fb922420ea14b1a991dbac5246ec53e5156a11bccb6298aea948bb3fcedd320e468f932f3f1cbd244d87fb9ea1b4b92c9517820e4cc3a2f013f0994ce33e8ca60fe42f2de3f59c15a7c4fb98123c7f889fb1966fd20a79237d8bb95c37c56c3f2e33236d9b1c6d6aa0b08ea6e583329cefe8e76a89d062877a5046fe8f222ed1d8ea263ecca5daea54300afc\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=b049178a1831bbce328a7282626401c0b3d66e067bc3c93853bf5c9fe6ac1ddc234e88ecebbe6486028bd62279c28e54f80d97abb36a1a2fb2f9c663d5eec265089ec5f08cc58bf809eaddd2e6af9c500b85b37b8adef00dc18d0dca26c2633d54aa3a3fd2a4ec63af6ac9c3c8d242a146b44a51bbee529f50c54e3021e0a4deea0b47b5dd948e5c21781269d94cbe77933d2ceae39b987dbda8153a46e706d39c0922a3c9a03ede7ced494b06427a57075c2d4b5271fd230eaa2bc5bcf0d174775bed1715ac93b197aed5b5c676744b913d81dbe8a898ce7b9e2b3ba3d76b66b88b6ada370a5d83ead38beaf767913705a3dabb881b5b99212fa5ebb76fd09e\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1591923\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549019796499\u0026X-Goog-Signature=a9e096f80e63f3faeb78319c3afc7d688da12dfad094c478b150478e52bfee60a9e5789c92d52370ec6e2435d645b8816bde1415e7b5427dd71789dbf4e2fc039e6798e530b517a24ef71e2cd7b73d670781f46b0aabeb4ea82a950d4a88e9b9c75611367a7a599943b6a2ef472470993f02210f0f974269b5f32770be346293f3fafd471d4f9fb25c7328197160571fecc0e0e01ae499003ad31f2d04543690a8f62d73311ea19570ea4c11e3eb87368daa4a26cbb59eb508146c8d339bf1969775c7b2c6da9eb25b583294d34563f1e1cd5d52439860a8d7e542f99d89167c46812c267340f62a71b1536bbdd1025a4468e38587d765d6ccf535104bdb1fc2\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549190596101\u0026X-Goog-Signature=b67386fbd201acb470f54777f5ab2a294538f76e38e5acff818225e2b01c67d9d8f1f64d416882e9a83e959b429437a310533dbf5cb1421c850c8bda8050d769b5e76499f7b975839eceb3d09840780762ef4c78ffe531c9c3af389aa894d7e0c30fc2aa7b0568a8b487c13bba99bcdf8bd49bfa8a76c2cbf74bdbde02e074f807715ae5c8868b6077fe3184c8f7307cb99c6ef9e105f1950ac4481e33a35afe9df23c6f4918c1b6d593a0b847725f415845b490409ed28ce67c5dcb0f706d66f2cc134a882401b9115b789f5545882c62e544afbfe86a9d618c6686b1233947db6e6f2bfb20c0da6ab14115660789abe6714d8e0f36552284f5b3422671bb49\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549352085742\u0026X-Goog-Signature=13c5b59c12fe64f09b8bb0fe82174e94e17cdddd62ee636bf3e7a916db31f4964f9165310aeae3562001b44201b869410736744dab31745e3c792628c62463d76fc567a7325f5d3ca58ac364c8cca7c21a4ec10dcc3022e283f335c19c56f4ec069301b12d04d7df08a7fa860bb07872495a39a1ad375e4a80e82f3f1eedc60959302d0404ec3c05240cbdc589a061f2804a1553891a0d6c23257fe862f12d59e5e75f62dc90c246e823e6aa097361840440f27127e657817033ee81e10e08e18934df64c9157ae6eb34e651f357a321c3924a989ff77798cbfd7b12a84678c15e89a78b5686321795b31bd20f5ee8fbddb49d56ce8609ec2fd415b4fb6e3983\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549347302955\u0026X-Goog-Signature=78da0aa9889346f7b8215b8967fecc8cb31e536236289a4636e1337f9b1b080ca03c62364424dda708b698d2eeaeee81062ff60d4e002f5af66e51a38acc335aab9ba31b44e0f42be0be76d9fb5559322c1de12975a9073cee8756f653687494d605ab56fd0d177ef463fcc33532315e48601acdcb6b7cde0aea8baca1adc0454356ae1082ccc5ae61cd7494c0933c0cc68a3be7caf219050e22cb9c075efcdb00eb7231fd8964cc00af16e6f05d2e2461962a677b5b59b568296353778e2d98b343046e405a29b6dca77ac66d4f83988d3c3c64ec038e906212d5539b5143c52747b488ebcda797492b04a7651d05f51d80485bf68067b577d40d2a929497ed\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e5 rows × 9 columns\u003c/p\u003e\n\u003c/div\u003e[5 rows x 9 columns in total]", + "text/plain": " image author content_type \\\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n\n size updated \\\n0 1591240 2025-03-20 17:45:04+00:00 \n1 1182951 2025-03-20 17:45:02+00:00 \n2 1520884 2025-03-20 17:44:55+00:00 \n3 1235401 2025-03-20 17:45:19+00:00 \n4 1591923 2025-03-20 17:44:47+00:00 \n\n blurred \\\n0 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n1 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n2 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n3 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n4 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n\n resized \\\n0 {'uri': 'gs://bigframes_blob_test/image_resize... \n1 {'uri': 'gs://bigframes_blob_test/image_resize... \n2 {'uri': 'gs://bigframes_blob_test/image_resize... \n3 {'uri': 'gs://bigframes_blob_test/image_resize... \n4 {'uri': 'gs://bigframes_blob_test/image_resize... \n\n normalized \\\n0 {'uri': 'gs://bigframes_blob_test/image_normal... \n1 {'uri': 'gs://bigframes_blob_test/image_normal... \n2 {'uri': 'gs://bigframes_blob_test/image_normal... \n3 {'uri': 'gs://bigframes_blob_test/image_normal... \n4 {'uri': 'gs://bigframes_blob_test/image_normal... \n\n blur_resized \n0 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n1 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n2 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n3 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n4 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n\n[5 rows x 9 columns]" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "2d68a468", + "cell_type": "markdown", + "source": "# 4. Use LLM models to ask questions and generate embeddings on images", + "metadata": {}, + "execution_count": null + }, + { + "id": "662054a0", + "cell_type": "code", + "source": "from bigframes.ml import llm\ngemini = llm.GeminiTextGenerator()", + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:36:13.954340Z", + "iopub.execute_input": "2025-08-18T20:36:13.954686Z", + "iopub.status.idle": "2025-08-18T20:36:43.225449Z", + "shell.execute_reply.started": "2025-08-18T20:36:13.954661Z", + "shell.execute_reply": "2025-08-18T20:36:43.224579Z" + } + }, + "execution_count": 10, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FutureWarning: Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message.\n return method(*args, **kwargs)\n" + } + ] + }, + { + "id": "a31730ff", + "cell_type": "code", + "source": [ + "# Ask the same question on the images\n", + "df_image = df_image.head(2)\n", + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", \"what color is the picture?\"])\n", + "render_images(answer[[\"ml_generate_text_llm_result\", \"image\"]])" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:36:43.227457Z", + "iopub.execute_input": "2025-08-18T20:36:43.227798Z", + "iopub.status.idle": "2025-08-18T20:37:25.238649Z", + "shell.execute_reply.started": "2025-08-18T20:36:43.227764Z", + "shell.execute_reply": "2025-08-18T20:37:25.237623Z" + } + }, + "execution_count": 11, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n" + }, + { + "data": { + "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003eThe item is a tin of K9 Guard Dog Paw Balm.\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203711Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=06718c570f3a86457a786ed768e5deede983ae58e79ea4c1ff7793a6d4f7693c111340cf05a4f7dd8a968340f4c2f2a992240dec3af83725b3983a8c47f663e5d800d2af7144f8d5b2b3097ac780fa66398dd95696691f6eddadfba15b77da3c16ee5d1b700378e8fc1e2d574eea0e7adf42fa23c9e28fda9aa9c97a3ec2e461afbbbd45a9c9a0c2a50f0cd561159cae73c1d7dcf6df215a7c39fb9e4c3966364ee7c5f95f16f0e733d02ddc6e1badfbce0edc89f426a79af2af8c66e743df3811d6e7990e3a692d78461939d5ab6b393e88bc32e9bcc56764f114898e6bab921346783ff42d17cbf2f04366a21834a568a52cf32a5df81654ddd000f2ac1c34\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003eThe item is a bottle of K9 Guard Dog Hot Spot Spray.\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203711Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=141c42af27dc86e89a3075f3af82c75ee5afb817e0850586a96cdb3ee180c22a8ed09f0a8e1a3c5d632846cbdfd10d71600c8f33a699b5a6c0dd6f34784b4d1d8d39a9ba9e85bff3c984c29b5cd628551149428d3d2f96253d994d0e4005527207c5fe11d681e840a98601ca7858ef9f9b4680a477621684c1f2295bb49a95b40d04fb6fae3ab4383210145da8dcd9723d442314c9d60225470f5d8b017073a5626ccf77a2fb644432ece39b38f3a2ada019f20e6a61fb026962521d4f73ac92229f575eb21d18c52d2200cb4f14b2f2dfb650c52023277d7656159e99212ce90729d946caf847704c5cdaf467202a3bef13d2f1bcd558f2647da4a1d049f36b\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e2 rows × 2 columns\u003c/p\u003e\n\u003c/div\u003e[2 rows x 2 columns in total]", + "text/plain": " ml_generate_text_llm_result \\\n0 The item is a tin of K9 Guard Dog Paw Balm. \n1 The item is a bottle of K9 Guard Dog Hot Spot ... \n\n image \n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n\n[2 rows x 2 columns]" + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "f5d2a1ed", + "cell_type": "code", + "source": "# Ask different questions\ndf_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]", + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:37:25.239607Z", + "iopub.execute_input": "2025-08-18T20:37:25.239875Z", + "iopub.status.idle": "2025-08-18T20:37:25.263034Z", + "shell.execute_reply.started": "2025-08-18T20:37:25.239847Z", + "shell.execute_reply": "2025-08-18T20:37:25.262002Z" + } + }, + "execution_count": 12 + }, + { + "id": "fb67bf8e", + "cell_type": "code", + "source": [ + "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", + "render_images(answer_alt[[\"ml_generate_text_llm_result\", \"image\"]])" + ], + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:37:25.264072Z", + "iopub.execute_input": "2025-08-18T20:37:25.264585Z", + "iopub.status.idle": "2025-08-18T20:38:10.129667Z", + "shell.execute_reply.started": "2025-08-18T20:37:25.264518Z", + "shell.execute_reply": "2025-08-18T20:38:10.128677Z" + } + }, + "execution_count": 13, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n" + }, + { + "data": { + "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003eThe item is a tin of K9 Guard Dog Paw Balm.\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203757Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=12cfd68cb145aef7edd54a3b0f03944424776133f09fc277c6bce2a8a267116ac6911b59615d0fce3ebf2e901d5ae847dbb414f9d524ab42b924a496a81bf0eff4b22c60b89adef7d863f85f9680c05d31afab795635e30ab38a297a221b6374df190bdbbdaed90c5bcd6b79f63a8d9c5f7c4db1c981b2a03fb5808feaa8e4d2e036a11f9d19c9b3fb94471562cbd2329e1a1ebc6e841111c5f90a847da63e362d6ff8d878ddc106aa13932143c2b8eb0e3c222a2bcb4ec1b46bc032678e872be7a3325e5e3db76b6dc81e0fc189bf0a4fc30e8c508fc1c12c5d61d2c9601339657a318a4b0abde83d5ca7f73387342de33399e2d8b62c67d549e09507f09c4e\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003eThe picture has colors such as white, gray, and a light blue (cyan).\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203757Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=b597801312fce93103bf273c7669e665573880dca47521adb16cbb6864ab2f4a057cf060519acecc9be14695ea0c5d4848ea631f53dfe6df89a390789faa4d946a2828f7112f508be8f6b40ede1fa4cd565fc9738d73ddabcaf08e7467bd58b54a0482061fc0c522f9b74a7c4402aba788d9c45b6c0416f639a275a4d70cfa15ae258343eb3c17170ff9227b1ea6b020b5ed5e95f0db4df10292537bb19afdf03007c0b05c026343e3c7ea48c743b65c1cdf13422f4c02e258a623fafd8f083ac52afb9d5787b822913de2c58a03e93ad3323b44ff16327b689e1cc6ab7590a358a82b8f70f967898f4bfd41bdafd446e8dfd83b7704094d0146bc07a0934427\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e2 rows × 2 columns\u003c/p\u003e\n\u003c/div\u003e[2 rows x 2 columns in total]", + "text/plain": " ml_generate_text_llm_result \\\n0 The item is a tin of K9 Guard Dog Paw Balm. \n1 The picture has colors such as white, gray, an... \n\n image \n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n\n[2 rows x 2 columns]" + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "0cf33170", + "cell_type": "code", + "source": "# Generate embeddings.\nembed_model = llm.MultimodalEmbeddingGenerator()\nembeddings = embed_model.predict(df_image[\"image\"])\nembeddings", + "metadata": { + "trusted": true, + "execution": { + "iopub.status.busy": "2025-08-18T20:38:10.130617Z", + "iopub.execute_input": "2025-08-18T20:38:10.130851Z", + "iopub.status.idle": "2025-08-18T20:39:04.790416Z", + "shell.execute_reply.started": "2025-08-18T20:38:10.130833Z", + "shell.execute_reply": "2025-08-18T20:39:04.789398Z" + } + }, + "execution_count": 14, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FutureWarning: Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n" + }, + { + "data": { + "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eml_generate_embedding_result\u003c/th\u003e\n \u003cth\u003eml_generate_embedding_status\u003c/th\u003e\n \u003cth\u003eml_generate_embedding_start_sec\u003c/th\u003e\n \u003cth\u003eml_generate_embedding_end_sec\u003c/th\u003e\n \u003cth\u003econtent\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003e[ 0.00638822 0.01666385 0.00451817 ... -0.02...\u003c/td\u003e\n \u003ctd\u003e\u003c/td\u003e\n \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3...\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003e[ 0.00973672 0.02148364 0.00244308 ... 0.00...\u003c/td\u003e\n \u003ctd\u003e\u003c/td\u003e\n \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3...\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e2 rows × 5 columns\u003c/p\u003e\n\u003c/div\u003e[2 rows x 5 columns in total]", + "text/plain": " ml_generate_embedding_result \\\n0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n1 [ 0.00973672 0.02148364 0.00244308 ... 0.00... \n\n ml_generate_embedding_status ml_generate_embedding_start_sec \\\n0 \u003cNA\u003e \n1 \u003cNA\u003e \n\n ml_generate_embedding_end_sec \\\n0 \u003cNA\u003e \n1 \u003cNA\u003e \n\n content \n0 {\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3... \n1 {\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3... \n\n[2 rows x 5 columns]" + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "9fde6707", + "cell_type": "code", + "source": "", + "metadata": { + "trusted": true + }, + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "language": "python", + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.13", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kaggle": { + "accelerator": "none", + "dataSources": [ + { + "sourceId": 110281, + "databundleVersionId": 13391012, + "sourceType": "competition" + } + ], + "dockerImageVersionId": 31089, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook", + "isGpuEnabled": false + } + }, + "nbformat_minor": 4, + "nbformat": 4 +} From 3340a72607f6991e21a5cac3a5ad3f647ec0de67 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 21:29:37 +0000 Subject: [PATCH 35/39] fix: use fully qualified connection ID for ObjectRefs --- .../generative_ai/ai_movie_poster.ipynb | 363 ++++++++++-------- 1 file changed, 206 insertions(+), 157 deletions(-) diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index 672d4f237780..0be1f8a248f9 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -21,8 +21,7 @@ "metadata": { "id": "XZpKUoHjXw3_" }, - "execution_count": 1, - "outputs": [] + "execution_count": 1 }, { "id": "ee509844", @@ -39,26 +38,26 @@ "id": "81b8de8d", "cell_type": "markdown", "source": [ - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
" + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e" ], "metadata": {}, "execution_count": null @@ -110,14 +109,15 @@ "import bigframes.pandas as bpd\n", "\n", "MY_RPOJECT_ID = \"bigframes-dev\" # @param {type:\"string\"}\n", + "LOCATION = \"us\" # @param {type:\"string\"}\n", "\n", - "bpd.options.bigquery.project = MY_RPOJECT_ID" + "bpd.options.bigquery.project = MY_RPOJECT_ID\n", + "bpd.options.bigquery.location = LOCATION" ], "metadata": { "id": "6nqoRHYbPAx3" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "015a63c1", @@ -147,13 +147,63 @@ "source": [ "# Replace with your own connection name.\n", "MY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\n", + "FULL_CONNECTION_ID = f\"{MY_RPOJECT_ID}.{LOCATION}.{MY_CONNECTION}\"\n", "\n", "import gcsfs\n", + "import bigframes\n", "import bigframes.pandas as bpd\n", "import bigframes.bigquery as bbq\n", + "import json\n", + "from IPython.display import HTML, display\n", "\n", "session = bpd.get_global_session()\n", "\n", + "# Configure global display parameters \n", + "bigframes.options.display.blob_display_width = 200\n", + "\n", + "def get_runtime_json_str(series, mode=\"R\", with_metadata=False):\n", + " s = bbq.obj.fetch_metadata(series) if with_metadata else series\n", + " runtime = bbq.obj.get_access_url(s, mode=mode)\n", + " return bbq.to_json_string(runtime)\n", + "\n", + "def render_images(df):\n", + " \"\"\"Helper to display BigFrames DataFrame with rendered image previews.\"\"\"\n", + " from bigframes import dtypes\n", + " if isinstance(df, bpd.Series):\n", + " df = df.to_frame()\n", + " \n", + " object_cols = [col for col, dtype in zip(df.columns, df.dtypes) if dtype == dtypes.OBJ_REF_DTYPE]\n", + " if not object_cols:\n", + " display(df)\n", + " return\n", + "\n", + " limit = bigframes.options.display.max_rows or 10\n", + " view_df = df.head(limit)\n", + " runtime_cols = {\n", + " col: get_runtime_json_str(view_df[col], mode=\"R\", with_metadata=False) \n", + " for col in object_cols\n", + " }\n", + " \n", + " pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()\n", + " final_pd = view_df.to_pandas()\n", + " width = bigframes.options.display.blob_display_width or 200\n", + " \n", + " def format_cell_html(raw_json):\n", + " if not raw_json: return \"\"\n", + " try:\n", + " obj_rt = json.loads(raw_json)\n", + " if \"access_urls\" not in obj_rt: return \"Error fetching URL\"\n", + " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", + " url = obj_rt[\"access_urls\"][\"read_url\"]\n", + " if str(uri).lower().endswith((\".png\", \".jpg\", \".jpeg\", \".webp\")):\n", + " return f'\u003cimg src=\"{url}\" width=\"{width}\"\u003e'\n", + " return f'\u003ca href=\"{url}\" target=\"_blank\"\u003e{uri}\u003c/a\u003e'\n", + " except: return \"Format Error\"\n", + "\n", + " for col in object_cols:\n", + " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", + " display(HTML(final_pd.to_html(escape=False)))\n", + "\n", "# List files using gcsfs\n", "fs = gcsfs.GCSFileSystem(anon=True)\n", "uris = fs.glob(\"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\")\n", @@ -164,10 +214,10 @@ "# Read the URIs into a BigQuery DataFrame\n", "movies = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", "\n", - "# Create the object reference column\n", - "movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=MY_CONNECTION)\n", + "# Create the object reference column using the fully qualified connection ID\n", + "movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=FULL_CONNECTION_ID)\n", "movies = movies[['poster']]\n", - "movies.head(1)" + "render_images(movies.head(1))" ], "metadata": { "colab": { @@ -191,11 +241,11 @@ "data": { "text/html": [ "\n", - " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", + " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -217,11 +267,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", + " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -235,7 +285,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -244,8 +294,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
poster
0
\n", - "

1 rows × 1 columns

\n", - "
[1 rows x 1 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200041Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=9f955e89088240b34a5cbfba751fffacc5dfd7a2df468dcccfae06c939358c702ffbeb940403a69ad36e3fdf321abee60cf2b9795c9c1744bc0b164d6c2eca99666a0853e7afcf7670a07ff115bfe534791c9ab4267cb383e3a46ede9301aeeb8534a42a1d4c8f790f3a60eab06aa72a8fe76ee6cbb88de8e42a0809d8322a0ad8aecd1c64a55b1cc8716acf4f0dc2550a2059e63d98d49707fe27180ada0a277ea9b1827fc261657bcee9ec5cc7117df704f135d983325abb97dc77ee7a270c466e689921fce8ecd23824b515f2811c3c13ee382c5bc3bd34b7dd95a845705a8f654315b2128799efd0509dee5f6db1eb1b773438d3bfc8112d76cbe892e376\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 1 columns in total]" ], "text/plain": [ " poster\n", @@ -306,10 +356,9 @@ "import bigframes.bigquery as bbq\n", "\n", "movies['title'] = bbq.ai.generate(\n", - " (\"What is the movie title for this poster? Name only\", movies['poster']),\n", - " endpoint='gemini-2.5-pro'\n", - ").struct.field(\"result\")\n", - "movies.head(1)" + " (\"What is the movie title for this poster image?\", movies['poster'])\n", + ")\n", + "render_images(movies.head(1))" ], "metadata": { "colab": { @@ -343,11 +392,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", + " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -361,7 +410,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -370,8 +419,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitle
0Der Student von Prag
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200057Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=29c8cf20d3f56ab1939ec00dbc1afd26e888b6475808258e34bc60a65e207b877c39853678b0cd1c9918d35e312e151725dbefc4ed6c519e4ec1f2c23c2e307f87442d09c5c8f0bbd49af92eb05e18ff35cd44f2f2954b79a33cf706c7ae1662e23e3220224d6f58b775cb1875213b5050f910cb41a4a8fb312f308b0566448ddf7ef15e22ec2a5261af2570f89e0f6067ac4cbf5874eaf522a6e4d8cf6e0313be3079b172bdc19c2d6901f53bbacf5bee3f2913c7f9f657cd1aed25d786f66a84f96e4dbe36e7f01d8b67887c9ac93edf866495fdf13c6b95152cdfa6b699fd14aeb477ec4a14fcd9f37eaf88ad02eb40a952635f97e7639be764b0007e011e\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], "text/plain": [ " poster title\n", @@ -477,11 +526,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [Job bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", + " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -495,7 +544,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -504,8 +553,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
0Der Student von Prag1913
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200120Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=96035b9c90093c9636f0b406e5ca9daf52bb1019bde4d52e779f3ce7371e6df0430b3f2e991869065e113327a7698e7ce5ad7b4db8781aa65adea890b80976c97b93b3f9deac5002a1e27b4bd2c1df9250ff4167f150c88be2067f70d45b7c94fd6d69f36a90b5a3ad1a3d500e3cc89a4fe4a67157cbea164d5ce34506dd1d2353eedb1c663eb1a4578c8ff1f9af2ab21a7065de4ec3ff1af44e764a3215874e564e6beeb502739468a80a02c79dcc71f7518435686270d855007e01653659804b5f50ab9c43c4627f28625e07572a4b0f30de49397f9f0445571cdacb695747bdb17614addcf33a90036aa48d025baa8a4d6bd5000d0106a788c2c23f1292c8\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003ctd\u003e1913\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" ], "text/plain": [ " poster title \\\n", @@ -584,8 +633,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", - "

" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003ctd\u003estruct\u0026lt;uri: string, version: string, authorize...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003ctd\u003estring[pyarrow]\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003ctd\u003eInt64\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003c/div\u003e\u003cbr\u003e\u003clabel\u003e\u003cb\u003edtype:\u003c/b\u003e object\u003c/label\u003e" ], "text/plain": [ - "poster structJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", + " Query processed 1.3 kB in 6 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:c9bb23f0-5ceb-4d6c-8241-960c496274ae\u0026page=queryresults\"\u003eJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -701,7 +750,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -710,8 +759,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
8Shoulder Arms1918
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e8\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fshoulder_arms.jpeg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200210Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653082560296\u0026X-Goog-Signature=64c1fb48cc9830dd4153bca15d05d8703c770e12a4df99abf4cab9dec02d13c66adf4d1223ffda9a30763ad2b286086dfc8cc9b8d20875b29d0c1639983c3ba08a02364bf49361b4a24c3a6830def8d6d3561eeb04d01604b5bae86e48457dc368fee538d0beea2228fdf5e94b5862e1097f58545d7449fa5df0e93fb9c3c0a32943ca9970911f183adf71a7e13e9275efd41c1f69b8f8453b853a30cbb5e8859d72b95ca653204b5ae8f96a12d88d59e988349f74e3f6db6ef277c066d92a28c50335d494beead9a3c0c796c97ca48c497328ae7ad278161c28743193233b28ac0fcafab2431179f7f6321345d8a67e6af39d7339697a5892f0441a266262ab\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eShoulder Arms\u003c/td\u003e\n", + " \u003ctd\u003e1918\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" ], "text/plain": [ " poster title year\n", From c9cf4b7565df76cda0264348ab687dbeecf64832 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 23:06:14 +0000 Subject: [PATCH 36/39] fix: update AI tests and library logic --- .../bigframes/bigquery/_operations/ai.py | 8 ++------ .../tests/system/small/bigquery/test_ai.py | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py index 65d80e39319e..469915059ec4 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ai.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ai.py @@ -1172,6 +1172,8 @@ def forecast( return ml_core.BaseBqml(df._session).ai_forecast(input_data=df, options=options) + + def _separate_context_and_series( prompt: PROMPT_TYPE, ) -> Tuple[List[str | None], List[series.Series]]: @@ -1189,9 +1191,6 @@ def _separate_context_and_series( return [None], [series.Series([prompt])] if isinstance(prompt, series.Series): - if prompt.dtype == dtypes.OBJ_REF_DTYPE: - # Multi-model support - return [None], [prompt._blob._read_url()] return [None], [prompt] prompt_context: List[str | None] = [] @@ -1226,9 +1225,6 @@ def _convert_series( ) -> series.Series: result = convert.to_bf_series(s, default_index=None, session=session) - if result.dtype == dtypes.OBJ_REF_DTYPE: - # Support multimodel - return result._blob._read_url() return result diff --git a/packages/bigframes/tests/system/small/bigquery/test_ai.py b/packages/bigframes/tests/system/small/bigquery/test_ai.py index c6d767124597..b8ee1256e853 100644 --- a/packages/bigframes/tests/system/small/bigquery/test_ai.py +++ b/packages/bigframes/tests/system/small/bigquery/test_ai.py @@ -188,7 +188,8 @@ def test_ai_generate_bool_multi_model(session): session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) - result = bbq.ai.generate_bool((df["image"], " contains an animal")) + image_runtime = bbq.obj.get_access_url(df["image"], mode="R") + result = bbq.ai.generate_bool((image_runtime, " contains an animal")) assert _contains_no_nulls(result) assert result.dtype == pd.ArrowDtype( @@ -225,8 +226,9 @@ def test_ai_generate_int_multi_model(session): session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) + image_runtime = bbq.obj.get_access_url(df["image"], mode="R") result = bbq.ai.generate_int( - ("How many animals are there in the picture ", df["image"]) + ("How many animals are there in the picture ", image_runtime) ) assert _contains_no_nulls(result) @@ -264,8 +266,9 @@ def test_ai_generate_double_multi_model(session): session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) + image_runtime = bbq.obj.get_access_url(df["image"], mode="R") result = bbq.ai.generate_double( - ("How many animals are there in the picture ", df["image"]) + ("How many animals are there in the picture ", image_runtime) ) assert _contains_no_nulls(result) @@ -359,7 +362,8 @@ def test_ai_if_multi_model(session, bq_connection): session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) - result = bbq.ai.if_((df["image"], " contains an animal")) + image_runtime = bbq.obj.get_access_url(df["image"], mode="R") + result = bbq.ai.if_((image_runtime, " contains an animal")) assert _contains_no_nulls(result) assert result.dtype == dtypes.BOOL_DTYPE @@ -379,7 +383,8 @@ def test_ai_classify_multi_model(session, bq_connection): session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) - result = bbq.ai.classify(df["image"], ["photo", "cartoon"]) + image_runtime = bbq.obj.get_access_url(df["image"], mode="R") + result = bbq.ai.classify(image_runtime, ["photo", "cartoon"]) assert _contains_no_nulls(result) assert result.dtype == dtypes.STRING_DTYPE @@ -399,7 +404,8 @@ def test_ai_score_multi_model(session): df = _create_mock_obj_ref_df( session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) - prompt = ("Rank the liveliness of ", df["image"], "on the scale from 1 to 3") + image_runtime = bbq.obj.get_access_url(df["image"], mode="R") + prompt = ("Rank the liveliness of ", image_runtime, "on the scale from 1 to 3") result = bbq.ai.score(prompt) From 53a11398226aca621b57b7b4182e63a736b2465f Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 23:06:17 +0000 Subject: [PATCH 37/39] fix: stabilize multimodal notebooks with explicit URL fetching --- .../generative_ai/ai_movie_poster.ipynb | 801 ++++++------- ...uct-images-with-bigframes-multimodal.ipynb | 1016 ++++++++++++----- 2 files changed, 1108 insertions(+), 709 deletions(-) diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index 0be1f8a248f9..ce0be03197a8 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -1,8 +1,13 @@ { "cells": [ { - "id": "7add2e44", "cell_type": "code", + "execution_count": 1, + "id": "7add2e44", + "metadata": { + "id": "XZpKUoHjXw3_" + }, + "outputs": [], "source": [ "# Copyright 2026 Google LLC\n", "#\n", @@ -17,77 +22,72 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ], - "metadata": { - "id": "XZpKUoHjXw3_" - }, - "execution_count": 1 + ] }, { - "id": "ee509844", "cell_type": "markdown", - "source": [ - "# Analyzing movie posters with BigQuery Dataframe AI functions" - ], + "id": "ee509844", "metadata": { "id": "SEKzWP6jW9Oj" }, - "execution_count": null + "source": [ + "# Analyzing movie posters with BigQuery Dataframe AI functions" + ] }, { - "id": "81b8de8d", "cell_type": "markdown", + "id": "81b8de8d", + "metadata": {}, "source": [ - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ], - "metadata": {}, - "execution_count": null + " \n", + "
" + ] }, { - "id": "256b6c02", "cell_type": "markdown", - "source": [ - "BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old\n", - "movie posters. These posters are images stored in a public Google Cloud Storage bucket: `gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters`" - ], + "id": "256b6c02", "metadata": { "id": "c9CCKXG5XTb-" }, - "execution_count": null + "source": [ + "BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old\n", + "movie posters. These posters are images stored in a public Google Cloud Storage bucket: `gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters`" + ] }, { - "id": "3f71d3cb", "cell_type": "markdown", - "source": [ - "## Set up" - ], + "id": "3f71d3cb", "metadata": { "id": "CUJDa_7MPbL9" }, - "execution_count": null + "source": [ + "## Set up" + ] }, { - "id": "547145f5", "cell_type": "markdown", + "id": "547145f5", + "metadata": { + "id": "D3iYtBSkYpCK" + }, "source": [ "Before you begin, you need to\n", "\n", @@ -96,15 +96,16 @@ "\n", "Once you have the permissions set up, import the `bigframes.pandas` package, and\n", "set your cloud project ID." - ], - "metadata": { - "id": "D3iYtBSkYpCK" - }, - "execution_count": null + ] }, { - "id": "d9cd6da8", "cell_type": "code", + "execution_count": 2, + "id": "d9cd6da8", + "metadata": { + "id": "6nqoRHYbPAx3" + }, + "outputs": [], "source": [ "import bigframes.pandas as bpd\n", "\n", @@ -113,37 +114,119 @@ "\n", "bpd.options.bigquery.project = MY_RPOJECT_ID\n", "bpd.options.bigquery.location = LOCATION" - ], - "metadata": { - "id": "6nqoRHYbPAx3" - }, - "execution_count": null + ] }, { - "id": "015a63c1", "cell_type": "markdown", - "source": [ - "## Load data" - ], + "id": "015a63c1", "metadata": { "id": "2XHcNHtvPhNW" }, - "execution_count": null + "source": [ + "## Load data" + ] }, { - "id": "254561e0", "cell_type": "markdown", - "source": [ - "First, you load the data from the GCS bucket to a BigQuery Dataframe with the `from_glob_path` method:" - ], + "id": "254561e0", "metadata": { "id": "eS-9A7DijfoQ" }, - "execution_count": null + "source": [ + "First, you load the data from the GCS bucket to a BigQuery Dataframe with the `from_glob_path` method:" + ] }, { - "id": "47acbbfe", "cell_type": "code", + "execution_count": 3, + "id": "47acbbfe", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "ZNPzFjCyPap0", + "outputId": "346d20b2-d615-4094-d24e-2d40e5c90ee2" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " Query processed 0 Bytes in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 0 Bytes in 16 seconds of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 0 Bytes in 9 seconds of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poster
0
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Replace with your own connection name.\n", "MY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\n", @@ -166,6 +249,10 @@ " runtime = bbq.obj.get_access_url(s, mode=mode)\n", " return bbq.to_json_string(runtime)\n", "\n", + "def get_read_url(series):\n", + " runtime = bbq.obj.get_access_url(series, mode=\"R\")\n", + " return bbq.json_value(runtime, \"$.access_urls.read_url\")\n", + "\n", "def render_images(df):\n", " \"\"\"Helper to display BigFrames DataFrame with rendered image previews.\"\"\"\n", " from bigframes import dtypes\n", @@ -196,8 +283,8 @@ " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", " url = obj_rt[\"access_urls\"][\"read_url\"]\n", " if str(uri).lower().endswith((\".png\", \".jpg\", \".jpeg\", \".webp\")):\n", - " return f'\u003cimg src=\"{url}\" width=\"{width}\"\u003e'\n", - " return f'\u003ca href=\"{url}\" target=\"_blank\"\u003e{uri}\u003c/a\u003e'\n", + " return f''\n", + " return f'{uri}'\n", " except: return \"Format Error\"\n", "\n", " for col in object_cols:\n", @@ -218,148 +305,22 @@ "movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=FULL_CONNECTION_ID)\n", "movies = movies[['poster']]\n", "render_images(movies.head(1))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "ZNPzFjCyPap0", - "outputId": "346d20b2-d615-4094-d24e-2d40e5c90ee2" - }, - "execution_count": null, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", - " _global_session = bigframes.session.connect(\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", - " " - ], - "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", - " " - ], - "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.2 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eposter\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200041Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=9f955e89088240b34a5cbfba751fffacc5dfd7a2df468dcccfae06c939358c702ffbeb940403a69ad36e3fdf321abee60cf2b9795c9c1744bc0b164d6c2eca99666a0853e7afcf7670a07ff115bfe534791c9ab4267cb383e3a46ede9301aeeb8534a42a1d4c8f790f3a60eab06aa72a8fe76ee6cbb88de8e42a0809d8322a0ad8aecd1c64a55b1cc8716acf4f0dc2550a2059e63d98d49707fe27180ada0a277ea9b1827fc261657bcee9ec5cc7117df704f135d983325abb97dc77ee7a270c466e689921fce8ecd23824b515f2811c3c13ee382c5bc3bd34b7dd95a845705a8f654315b2128799efd0509dee5f6db1eb1b773438d3bfc8112d76cbe892e376\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e1 rows × 1 columns\u003c/p\u003e\n", - "\u003c/div\u003e[1 rows x 1 columns in total]" - ], - "text/plain": [ - " poster\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0...\n", - "\n", - "[1 rows x 1 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } ] }, { - "id": "f1096d2f", "cell_type": "markdown", - "source": [ - "## Extract titles from posters" - ], + "id": "f1096d2f", "metadata": { "id": "EfkdDH08QnYw" }, - "execution_count": null + "source": [ + "## Extract titles from posters" + ] }, { - "id": "bb30d47c", "cell_type": "code", - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "movies['title'] = bbq.ai.generate(\n", - " (\"What is the movie title for this poster image?\", movies['poster'])\n", - ")\n", - "render_images(movies.head(1))" - ], + "execution_count": 4, + "id": "bb30d47c", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -368,35 +329,59 @@ "id": "6CoZZ5tSQm1r", "outputId": "1b3915ce-eb83-4be9-b1c1-d9a326dc9408" }, - "execution_count": 4, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" ] }, { "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", + " Query started with request ID bigframes-dev:US.dc0385a0-1910-4dc4-b090-19d92db9bbcb.
SQL
WITH `bfcte_0` AS (\n",
+       "  SELECT\n",
+       "    *\n",
+       "  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/au_secours.jpeg',\n",
+       "    0,\n",
+       "    0\n",
+       "  ), STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/barque_sortant_du_port.jpeg',\n",
+       "    1,\n",
+       "    1\n",
+       "  ), STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/battling_butler.jpg',\n",
+       "    2,\n",
+       "    2\n",
+       "  ), STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/brown_of_harvard.jpeg',\n",
+       "    3,\n",
+       "    3\n",
+       "  ), STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/der_student_von_prag.jpg',\n",
+       "    4,\n",
+       "    4\n",
+       "  )])\n",
+       ")\n",
+       "SELECT\n",
+       "  `bfcol_1` AS `bfuid_col_20`,\n",
+       "  TO_JSON_STRING(\n",
+       "    OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R')\n",
+       "  ) AS `bfuid_col_24`\n",
+       "FROM `bfcte_0`\n",
+       "ORDER BY\n",
+       "  `bfcol_2` ASC NULLS LAST\n",
+       "LIMIT 1
\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -406,11 +391,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.2 kB in a moment of slot time.\n", + " Query processed 0 Bytes in 44 seconds of slot time. [Job bigframes-dev:US.job_3KY0bZD8ZOVtXa1mDZrw6FBieAZk details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -419,87 +404,66 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eposter\u003c/th\u003e\n", - " \u003cth\u003etitle\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200057Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=29c8cf20d3f56ab1939ec00dbc1afd26e888b6475808258e34bc60a65e207b877c39853678b0cd1c9918d35e312e151725dbefc4ed6c519e4ec1f2c23c2e307f87442d09c5c8f0bbd49af92eb05e18ff35cd44f2f2954b79a33cf706c7ae1662e23e3220224d6f58b775cb1875213b5050f910cb41a4a8fb312f308b0566448ddf7ef15e22ec2a5261af2570f89e0f6067ac4cbf5874eaf522a6e4d8cf6e0313be3079b172bdc19c2d6901f53bbacf5bee3f2913c7f9f657cd1aed25d786f66a84f96e4dbe36e7f01d8b67887c9ac93edf866495fdf13c6b95152cdfa6b699fd14aeb477ec4a14fcd9f37eaf88ad02eb40a952635f97e7639be764b0007e011e\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", - "\u003c/div\u003e[1 rows x 2 columns in total]" + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitle
0The movie title for this poster image is **Au secours!**
" ], "text/plain": [ - " poster title\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag\n", - "\n", - "[1 rows x 2 columns]" + "" ] }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } + ], + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "movies['title'] = bbq.ai.generate(\n", + " (\"What is the movie title for this poster image?\", get_read_url(movies['poster']))\n", + ").struct.field(\"result\")\n", + "render_images(movies.head(1))" ] }, { - "id": "eb9eb261", "cell_type": "markdown", - "source": [ - "Notice that `ai.generate()` has a `struct` return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, `\"result\"` will be the default name. You can access LLM response content with the struct accessor (e.g. `my_response.struct.filed(\"result\")`);." - ], + "id": "eb9eb261", "metadata": { "id": "cFQHQ9S2lr6t" }, - "execution_count": null + "source": [ + "Notice that `ai.generate()` has a `struct` return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, `\"result\"` will be the default name. You can access LLM response content with the struct accessor (e.g. `my_response.struct.filed(\"result\")`);." + ] }, { - "id": "ea29eb21", "cell_type": "markdown", + "id": "ea29eb21", + "metadata": { + "id": "R8kkUhgoS5Xz" + }, "source": [ "## Get movie release year\n", "\n", "In the example below, you will use `ai.generate_int()` to find the release year for each movie poster:" - ], - "metadata": { - "id": "R8kkUhgoS5Xz" - }, - "execution_count": null + ] }, { - "id": "bf426247", "cell_type": "code", - "source": [ - "movies['year'] = bbq.ai.generate_int(\n", - " (\"What is the release year for this movie?\", movies['title']),\n", - " endpoint='gemini-2.5-pro'\n", - ").struct.field(\"result\")\n", - "\n", - "movies.head(1)" - ], + "execution_count": 5, + "id": "bf426247", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -508,29 +472,16 @@ "id": "cKZdHq0XS1iW", "outputId": "72cbad57-4518-4e1e-97bb-333d424dba73" }, - "execution_count": 5, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, { "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", + " Query processed 0 Bytes in a minute of slot time. [Job bigframes-dev:US.cdbe8ee8-3e39-4cb3-aaf8-060419f5b58a details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -540,11 +491,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in a moment of slot time.\n", + " Query processed 347 Bytes in a moment of slot time.\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -553,8 +504,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
0{'uri': 'gs://cloud-samples-data/vertex-ai/dat...The movie title for the poster image is **Au S...1924
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" ], "text/plain": [ - " poster title \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag \n", + " poster \\\n", + "0 {'uri': 'gs://cloud-samples-data/vertex-ai/dat... \n", "\n", - " year \n", - "0 1913 \n", + " title year \n", + "0 The movie title for the poster image is **Au S... 1924 \n", "\n", "[1 rows x 3 columns]" ] @@ -602,14 +553,20 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "movies['year'] = bbq.ai.generate_int(\n", + " (\"What is the release year for this movie?\", movies['title']),\n", + " endpoint='gemini-2.5-pro'\n", + ").struct.field(\"result\")\n", + "\n", + "movies.head(1)" ] }, { - "id": "8bf12352", "cell_type": "code", - "source": [ - "movies.dtypes" - ], + "execution_count": 6, + "id": "8bf12352", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -618,13 +575,12 @@ "id": "yqRiNRY8_8fs", "outputId": "efa60107-6883-4f5c-8e40-43c7287ea7fb" }, - "execution_count": 6, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" @@ -632,47 +588,8 @@ }, { "data": { - "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003eposter\u003c/th\u003e\n", - " \u003ctd\u003estruct\u0026lt;uri: string, version: string, authorize...\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003etitle\u003c/th\u003e\n", - " \u003ctd\u003estring[pyarrow]\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003eyear\u003c/th\u003e\n", - " \u003ctd\u003eInt64\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003c/div\u003e\u003cbr\u003e\u003clabel\u003e\u003cb\u003edtype:\u003c/b\u003e object\u003c/label\u003e" - ], "text/plain": [ - "poster struct\u003curi: string, version: string, authorize...\n", + "poster structSQL
WITH `bfcte_0` AS (\n",
+       "  SELECT\n",
+       "    *\n",
+       "  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/au_secours.jpeg',\n",
+       "    0,\n",
+       "    0\n",
+       "  ), STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/barque_sortant_du_port.jpeg',\n",
+       "    1,\n",
+       "    1\n",
+       "  ), STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/battling_butler.jpg',\n",
+       "    2,\n",
+       "    2\n",
+       "  ), STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/brown_of_harvard.jpeg',\n",
+       "    3,\n",
+       "    3\n",
+       "  ), STRUCT(\n",
+       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/der_student_von_prag.jpg',\n",
+       "    4,\n",
+       "    4\n",
+       "  )])\n",
+       ")\n",
+       "SELECT\n",
+       "  `bfcol_1` AS `bfuid_col_52`,\n",
+       "  TO_JSON_STRING(\n",
+       "    OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R')\n",
+       "  ) AS `bfuid_col_58`\n",
+       "FROM `bfcte_0`\n",
+       "WHERE\n",
+       "  AI.IF(\n",
+       "    prompt => (\n",
+       "      'The movie ',\n",
+       "      AI.GENERATE(\n",
+       "        prompt => (\n",
+       "          'What is the movie title for this poster image?',\n",
+       "          JSON_VALUE(\n",
+       "            OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R'),\n",
+       "            '$.access_urls.read_url'\n",
+       "          )\n",
+       "        ),\n",
+       "        request_type => 'UNSPECIFIED'\n",
+       "      ).`result`,\n",
+       "      ' was made in US'\n",
+       "    )\n",
+       "  )\n",
+       "ORDER BY\n",
+       "  `bfcol_2` ASC NULLS LAST\n",
+       "LIMIT 1
\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -746,11 +707,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.2 kB in a moment of slot time.\n", + " Query processed 0 Bytes in 3 minutes of slot time. [Job bigframes-dev:US.job_pEC4qGIM1vr98oTcLjp-HYQ6R9h_ details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -759,52 +720,38 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eposter\u003c/th\u003e\n", - " \u003cth\u003etitle\u003c/th\u003e\n", - " \u003cth\u003eyear\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e8\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fshoulder_arms.jpeg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200210Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653082560296\u0026X-Goog-Signature=64c1fb48cc9830dd4153bca15d05d8703c770e12a4df99abf4cab9dec02d13c66adf4d1223ffda9a30763ad2b286086dfc8cc9b8d20875b29d0c1639983c3ba08a02364bf49361b4a24c3a6830def8d6d3561eeb04d01604b5bae86e48457dc368fee538d0beea2228fdf5e94b5862e1097f58545d7449fa5df0e93fb9c3c0a32943ca9970911f183adf71a7e13e9275efd41c1f69b8f8453b853a30cbb5e8859d72b95ca653204b5ae8f96a12d88d59e988349f74e3f6db6ef277c066d92a28c50335d494beead9a3c0c796c97ca48c497328ae7ad278161c28743193233b28ac0fcafab2431179f7f6321345d8a67e6af39d7339697a5892f0441a266262ab\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003eShoulder Arms\u003c/td\u003e\n", - " \u003ctd\u003e1918\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", - "\u003c/div\u003e[1 rows x 3 columns in total]" + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
3NaNThe movie title is **Brown of Harvard**.1926
" ], "text/plain": [ - " poster title year\n", - "8 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Shoulder Arms 1918\n", - "\n", - "[1 rows x 3 columns]" + "" ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } + ], + "source": [ + "us_movies = movies[bbq.ai.if_(\n", + " (\"The movie \", movies['title'], \" was made in US\")\n", + ")]\n", + "render_images(us_movies.head(1))" ] } ], @@ -820,6 +767,6 @@ "name": "python" } }, - "nbformat_minor": 0, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb b/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb index 5a8178af2a9d..1a7de9b837f9 100644 --- a/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb +++ b/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb @@ -1,34 +1,192 @@ { "cells": [ { - "id": "876eb80c", "cell_type": "markdown", - "source": "# Describe product images with BigFrames multimodal DataFrames\n\nBased on notebook at https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\n\nThis notebook is introducing BigFrames Multimodal features:\n\n1. Create Multimodal DataFrame\n2. Combine unstructured data with structured data\n3. Conduct image transformations\n4. Use LLM models to ask questions and generate embeddings on images\n5. PDF chunking function\n\nInstall the bigframes package and upgrade other packages that are already included in Kaggle but have versions incompatible with bigframes.", + "id": "876eb80c", "metadata": { - "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", - "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19" + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" }, - "execution_count": null + "source": [ + "# Describe product images with BigFrames multimodal DataFrames\n", + "\n", + "Based on notebook at https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\n", + "\n", + "This notebook is introducing BigFrames Multimodal features:\n", + "\n", + "1. Create Multimodal DataFrame\n", + "2. Combine unstructured data with structured data\n", + "3. Conduct image transformations\n", + "4. Use LLM models to ask questions and generate embeddings on images\n", + "5. PDF chunking function\n", + "\n", + "Install the bigframes package and upgrade other packages that are already included in Kaggle but have versions incompatible with bigframes." + ] }, { - "id": "0506e15e", "cell_type": "code", - "source": "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow ", + "execution_count": 1, + "id": "0506e15e", "metadata": { "trusted": true }, - "execution_count": null + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: bigframes in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (2.39.0)\n", + "Requirement already satisfied: google-cloud-automl in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (2.19.0)\n", + "Requirement already satisfied: google-cloud-translate in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (3.26.0)\n", + "Requirement already satisfied: google-ai-generativelanguage in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (0.11.0)\n", + "Requirement already satisfied: tensorflow in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (2.21.0)\n", + "Requirement already satisfied: cloudpickle>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (3.1.2)\n", + "Requirement already satisfied: fsspec>=2023.3.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2026.1.0)\n", + "Requirement already satisfied: gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2026.1.0)\n", + "Requirement already satisfied: geopandas>=0.12.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.1.3)\n", + "Requirement already satisfied: google-auth<3.0,>=2.15.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.49.1)\n", + "Requirement already satisfied: google-cloud-bigquery>=3.36.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-bigquery[bqstorage,pandas]>=3.36.0->bigframes) (3.41.0)\n", + "Requirement already satisfied: google-cloud-bigquery-storage<3.0.0,>=2.30.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.37.0)\n", + "Requirement already satisfied: google-cloud-functions>=1.12.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.23.0)\n", + "Requirement already satisfied: google-cloud-bigquery-connection>=1.12.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.21.0)\n", + "Requirement already satisfied: google-cloud-resource-manager>=1.10.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.17.0)\n", + "Requirement already satisfied: google-cloud-storage>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (3.10.1)\n", + "Requirement already satisfied: google-crc32c<2.0.0,>=1.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.8.0)\n", + "Requirement already satisfied: grpc-google-iam-v1>=0.14.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (0.14.4)\n", + "Requirement already satisfied: numpy>=1.24.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.4.4)\n", + "Requirement already satisfied: pandas>=1.5.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.3.3)\n", + "Requirement already satisfied: pandas-gbq>=0.26.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (0.34.1)\n", + "Requirement already satisfied: pyarrow>=15.0.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (21.0.0)\n", + "Requirement already satisfied: pydata-google-auth>=1.8.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.9.1)\n", + "Requirement already satisfied: requests>=2.27.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.33.1)\n", + "Requirement already satisfied: shapely>=1.8.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.1.2)\n", + "Requirement already satisfied: tabulate>=0.9 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (0.10.0)\n", + "Requirement already satisfied: humanize>=4.6.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (4.15.0)\n", + "Requirement already satisfied: matplotlib>=3.7.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (3.10.8)\n", + "Requirement already satisfied: db-dtypes>=1.4.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.5.1)\n", + "Requirement already satisfied: pyiceberg>=0.7.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (0.11.1)\n", + "Requirement already satisfied: atpublic<6,>=2.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (5.1)\n", + "Requirement already satisfied: python-dateutil<3,>=2.8.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2022.7 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2026.1.post1)\n", + "Requirement already satisfied: toolz<2,>=0.11 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.1.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.5.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (4.15.0)\n", + "Requirement already satisfied: rich<14,>=12.4.4 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (13.9.4)\n", + "Requirement already satisfied: google-api-core<3.0.0,>=2.11.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-api-core[grpc]<3.0.0,>=2.11.0->google-cloud-automl) (2.30.2)\n", + "Requirement already satisfied: grpcio<2.0.0,>=1.33.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-automl) (1.80.0)\n", + "Requirement already satisfied: proto-plus<2.0.0,>=1.22.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-automl) (1.27.2)\n", + "Requirement already satisfied: protobuf<8.0.0,>=4.25.8 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-automl) (6.33.6)\n", + "Requirement already satisfied: google-cloud-core<3.0.0,>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-translate) (2.5.1)\n", + "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (2.4.0)\n", + "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (1.6.3)\n", + "Requirement already satisfied: flatbuffers>=25.9.23 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (25.12.19)\n", + "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (0.7.0)\n", + "Requirement already satisfied: google_pasta>=0.1.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (0.2.0)\n", + "Requirement already satisfied: libclang>=13.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (18.1.1)\n", + "Requirement already satisfied: opt_einsum>=2.3.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (3.4.0)\n", + "Requirement already satisfied: packaging in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (26.0)\n", + "Requirement already satisfied: setuptools in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (82.0.1)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (1.17.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (3.3.0)\n", + "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (2.1.2)\n", + "Requirement already satisfied: keras>=3.12.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (3.14.0)\n", + "Requirement already satisfied: h5py<3.15.0,>=3.11.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (3.14.0)\n", + "Requirement already satisfied: ml_dtypes<1.0.0,>=0.5.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (0.5.4)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from astunparse>=1.6.0->tensorflow) (0.47.0)\n", + "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (3.13.5)\n", + "Requirement already satisfied: decorator>4.1.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (5.2.1)\n", + "Requirement already satisfied: google-auth-oauthlib in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.3.1)\n", + "Requirement already satisfied: google-cloud-storage-control in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.11.0)\n", + "Requirement already satisfied: pyogrio>=0.7.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from geopandas>=0.12.2->bigframes) (0.12.1)\n", + "Requirement already satisfied: pyproj>=3.5.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from geopandas>=0.12.2->bigframes) (3.7.2)\n", + "Requirement already satisfied: googleapis-common-protos<2.0.0,>=1.63.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-api-core<3.0.0,>=2.11.0->google-api-core[grpc]<3.0.0,>=2.11.0->google-cloud-automl) (1.74.0)\n", + "Requirement already satisfied: grpcio-status<2.0.0,>=1.33.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-api-core[grpc]<3.0.0,>=2.11.0->google-cloud-automl) (1.80.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-auth<3.0,>=2.15.0->bigframes) (0.4.2)\n", + "Requirement already satisfied: cryptography>=38.0.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-auth<3.0,>=2.15.0->bigframes) (46.0.7)\n", + "Requirement already satisfied: google-resumable-media<3.0.0,>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-bigquery>=3.36.0->google-cloud-bigquery[bqstorage,pandas]>=3.36.0->bigframes) (2.8.2)\n", + "Requirement already satisfied: namex in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from keras>=3.12.0->tensorflow) (0.1.0)\n", + "Requirement already satisfied: optree in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from keras>=3.12.0->tensorflow) (0.19.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (1.3.3)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (4.62.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (1.5.0)\n", + "Requirement already satisfied: pillow>=8 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (12.2.0)\n", + "Requirement already satisfied: pyparsing>=3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (3.3.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pandas>=1.5.3->bigframes) (2026.1)\n", + "Requirement already satisfied: psutil>=5.9.8 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pandas-gbq>=0.26.1->bigframes) (7.2.2)\n", + "Requirement already satisfied: mmh3<6.0.0,>=4.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (5.2.1)\n", + "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (8.3.2)\n", + "Requirement already satisfied: strictyaml<2.0.0,>=1.7.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (1.7.3)\n", + "Requirement already satisfied: pydantic!=2.12.0,!=2.12.1,!=2.4.0,!=2.4.1,<3.0,>=2.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (2.12.5)\n", + "Requirement already satisfied: tenacity<10.0.0,>=8.2.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (9.1.4)\n", + "Requirement already satisfied: pyroaring<2.0.0,>=1.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (1.0.4)\n", + "Requirement already satisfied: cachetools<7.0,>=5.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (6.2.6)\n", + "Requirement already satisfied: zstandard<1.0.0,>=0.13.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (0.25.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests>=2.27.1->bigframes) (3.4.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests>=2.27.1->bigframes) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.26 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests>=2.27.1->bigframes) (2.6.3)\n", + "Requirement already satisfied: certifi>=2023.5.7 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests>=2.27.1->bigframes) (2026.2.25)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from rich<14,>=12.4.4->bigframes) (4.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from rich<14,>=12.4.4->bigframes) (2.20.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (26.1.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.8.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (6.7.1)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (0.4.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.23.0)\n", + "Requirement already satisfied: cffi>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from cryptography>=38.0.3->google-auth<3.0,>=2.15.0->bigframes) (2.0.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-auth-oauthlib->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (2.0.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich<14,>=12.4.4->bigframes) (0.1.2)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.6.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0,>=2.15.0->bigframes) (0.6.3)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pydantic!=2.12.0,!=2.12.1,!=2.4.0,!=2.4.1,<3.0,>=2.0->pyiceberg>=0.7.1->bigframes) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.41.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pydantic!=2.12.0,!=2.12.1,!=2.4.0,!=2.4.1,<3.0,>=2.0->pyiceberg>=0.7.1->bigframes) (2.41.5)\n", + "Requirement already satisfied: typing-inspection>=0.4.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pydantic!=2.12.0,!=2.12.1,!=2.4.0,!=2.4.1,<3.0,>=2.0->pyiceberg>=0.7.1->bigframes) (0.4.2)\n", + "Requirement already satisfied: pycparser in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from cffi>=2.0.0->cryptography>=38.0.3->google-auth<3.0,>=2.15.0->bigframes) (3.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (3.3.1)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ] }, { - "id": "c749e07c", "cell_type": "markdown", - "source": "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n\nConfigure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then,", + "id": "c749e07c", "metadata": {}, - "execution_count": null + "source": [ + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," + ] }, { - "id": "5e00777d", "cell_type": "code", + "execution_count": 2, + "id": "5e00777d", + "metadata": { + "execution": { + "iopub.execute_input": "2025-08-18T20:17:14.873201Z", + "iopub.status.busy": "2025-08-18T20:17:14.872905Z", + "iopub.status.idle": "2025-08-18T20:17:14.946971Z", + "shell.execute_reply": "2025-08-18T20:17:14.945996Z", + "shell.execute_reply.started": "2025-08-18T20:17:14.873171Z" + }, + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Not running on Kaggle, skipping Kaggle secrets initialization.\n" + ] + } + ], "source": [ "try:\n", " from kaggle_secrets import UserSecretsClient\n", @@ -40,22 +198,23 @@ " print(\"Not running on Kaggle, skipping Kaggle secrets initialization.\")\n", "except Exception as e:\n", " print(f\"Could not initialize Kaggle secrets: {e}\")" - ], - "metadata": { - "trusted": true, - "execution": { - "iopub.status.busy": "2025-08-18T20:17:14.872905Z", - "iopub.execute_input": "2025-08-18T20:17:14.873201Z", - "iopub.status.idle": "2025-08-18T20:17:14.946971Z", - "shell.execute_reply.started": "2025-08-18T20:17:14.873171Z", - "shell.execute_reply": "2025-08-18T20:17:14.945996Z" - } - }, - "execution_count": 2 + ] }, { - "id": "b2e171de", "cell_type": "code", + "execution_count": 3, + "id": "b2e171de", + "metadata": { + "execution": { + "iopub.execute_input": "2025-08-18T20:17:25.574192Z", + "iopub.status.busy": "2025-08-18T20:17:25.573874Z", + "iopub.status.idle": "2025-08-18T20:17:45.102002Z", + "shell.execute_reply": "2025-08-18T20:17:45.101140Z", + "shell.execute_reply.started": "2025-08-18T20:17:25.574168Z" + }, + "trusted": true + }, + "outputs": [], "source": [ "PROJECT = \"bigframes-dev\" # replace with your project. \n", "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", @@ -140,15 +299,15 @@ " obj_rt = json.loads(raw_json)\n", " if \"access_urls\" not in obj_rt:\n", " err = obj_rt.get(\"errors\", [{\"message\": \"URL Generation Failed\"}])[0].get(\"message\")\n", - " return f'\u003cspan style=\"color:red;\"\u003eError: {err}\u003c/span\u003e'\n", + " return f'Error: {err}'\n", " \n", " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", " url = obj_rt[\"access_urls\"][\"read_url\"]\n", " \n", " if uri and str(uri).lower().endswith(IMAGE_EXTENSIONS):\n", - " return f'\u003cimg src=\"{url}\" width=\"{width}\"\u003e'\n", + " return f''\n", " \n", - " return f'\u003ca href=\"{url}\" target=\"_blank\"\u003e{uri if uri else \"view\"}\u003c/a\u003e'\n", + " return f'{uri if uri else \"view\"}'\n", " except:\n", " return \"Format Error\"\n", "\n", @@ -156,22 +315,23 @@ " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", " \n", " display(HTML(final_pd.to_html(escape=False)))" - ], - "metadata": { - "trusted": true, - "execution": { - "iopub.status.busy": "2025-08-18T20:17:25.573874Z", - "iopub.execute_input": "2025-08-18T20:17:25.574192Z", - "iopub.status.idle": "2025-08-18T20:17:45.102002Z", - "shell.execute_reply.started": "2025-08-18T20:17:25.574168Z", - "shell.execute_reply": "2025-08-18T20:17:45.101140Z" - } - }, - "execution_count": 3 + ] }, { - "id": "d17afaf1", "cell_type": "code", + "execution_count": 4, + "id": "d17afaf1", + "metadata": { + "execution": { + "iopub.execute_input": "2025-08-18T20:17:45.103530Z", + "iopub.status.busy": "2025-08-18T20:17:45.103249Z", + "iopub.status.idle": "2025-08-18T20:17:47.424586Z", + "shell.execute_reply": "2025-08-18T20:17:47.423762Z", + "shell.execute_reply.started": "2025-08-18T20:17:45.103499Z" + }, + "trusted": true + }, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -190,168 +350,359 @@ "# Create the object reference column\n", "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", "df_image = df_image[['image']]" - ], - "metadata": { - "trusted": true, - "execution": { - "iopub.status.busy": "2025-08-18T20:17:45.103249Z", - "iopub.execute_input": "2025-08-18T20:17:45.103530Z", - "iopub.status.idle": "2025-08-18T20:17:47.424586Z", - "shell.execute_reply.started": "2025-08-18T20:17:45.103499Z", - "shell.execute_reply": "2025-08-18T20:17:47.423762Z" - } - }, - "execution_count": 4, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/global_session.py:103: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n _global_session = bigframes.session.connect(\n" - }, - { - "name": "stdout", - "output_type": "stream", - "text": "Please ensure you have selected a BigQuery account in the Notebook Add-ons menu.\n" - } ] }, { - "id": "3e84b922", "cell_type": "code", - "source": [ - "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", - "df_image = df_image.head(5)\n", - "render_images(df_image)" - ], + "execution_count": 5, + "id": "3e84b922", "metadata": { - "trusted": true, "execution": { - "iopub.status.busy": "2025-08-18T20:17:47.425578Z", "iopub.execute_input": "2025-08-18T20:17:47.425873Z", + "iopub.status.busy": "2025-08-18T20:17:47.425578Z", "iopub.status.idle": "2025-08-18T20:18:07.919961Z", - "shell.execute_reply.started": "2025-08-18T20:17:47.425844Z", - "shell.execute_reply": "2025-08-18T20:18:07.918942Z" - } + "shell.execute_reply": "2025-08-18T20:18:07.918942Z", + "shell.execute_reply.started": "2025-08-18T20:17:47.425844Z" + }, + "trusted": true }, - "execution_count": 5, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, { "data": { - "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=11a1a82bcec0368dffef46552650865eed90b5c897132c9f144a5d51845856739dfa930cec231f1d9686b774c76bab4e0f5b88128d58e26e35e1445a99b50b6f476084a2deab5d1343c54f810a2ff284a9c182a82daf150c2e0ba58bd5cec6bb0a8cb07c6109ffb0017c79eac94ca7b48fce86dc55156ed730b55d2a4c5681162d52ecbdf4a7d19488275eef65dfab5ea07a7929ce27c9144238eede571d9ff361eae4a1f15a9cbcd9ec09277a6386b43b39766808eb34b1a0b9c055b8d66269df378b6777c375b98562494049c26bdf9614269441e9c224028de0ab9803165f258cf366a5c4bee40cca961a9500b64c88389af46d85d41bda4a2cf77904b58f\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=a71d2ef68382b109400d88653e34c5fa42544343eea5c51298645318966c5253ec5361d2d21e1521030088a9d6a740acb497db1fdd8d58ef91f6273813d328052d5c56e01e8686c811208c7baf233b5f43857fcaac46b71bbf316e773151ff4ba1ea956ff186807de17c0da903187cf6f49f2f98d73d2979c10702bb88918bca6a6bcf8b2fe0d38ed933e31377b8ebe9abadd2fa7ad917a07bd1a1bf68fbf05ea97f51004a47bb1a566a1a36fc3b92b5245605a2c2b24f3f0c701719609f7432b1c541b061e9ce9b474f964c9910d64b8cb8e367886edd8e808350142c3032803c04e3acfad870f1e75bdbc3caa5484341de5a3a3bf848f122410ec777c8edc7\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=36b90b84047051e752f8ba82662b774ae77bbe811e46c43b2322945b5e5bb5282f77c0abac4c5bc6296d0f5199b7842f684a89a5ec899a052d9208af3e4729392316f0670b5d8c1a0a584492ac941e28cb1ce23db5585ff1bc2bdd9acd1110fbe6f5ebbe4c5f802d8ec87327bd89d21167dded8c7fd8251a79a29b52360d7ca5dc84949990fb4ea1ca16bb317bace8ac2c2493b945c4591ca123d02ba29052cbe0c5461853f895b79878d82c89769c5e003aa61b5e9e549a1c9bdadf16dffff241bf9d790328349e917a7b22574426ea8d32e79e6d84ddf4ddb9004f2fbba1ecd347acaaeae0290a5da9c60011c27ce846d59a3dd2c2ef309a24e2459b18130d\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=05b04e32d1958afa905bc986c6203b2b69a1efb860ca96e3e385f0a42566d0500a9b5bc8242ef0ae978e8a9e1c098394959e894e53316b57570c2ade38dd0b97003d66e9f339e098893410c9083f441ecd794a12ed41a4f5354d30c3a656b932b30a6dbfcdd1b6d48fc21dc48093b3073370cc14f0f885d5f30a6b5d8a37d218e6f1c6e7c864cbdeceee17814be7c66669e3039dc9759d2a140967b1b44e3f6b4a33352d9f7fccc5a4056c2783731809e1762841901a371c06aa33507198c12c8e38c49cc36565a745ff3ce21dcc53ff0b064fa8f73004bfbc14d3526032e913fc0448ef5c8e1ea866a0744d3a48ea55823250153fd0b7772fce02672442175f\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201755Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1fa3291ae0489edf9df6fbc4a720bc1ef2d47f7a8203b42765da04cdfe8cf16f957e258becf7581fab0d66097bc047081770d2fc90a3b5c78923d99650954e2f837b1b588a03ca4a4ce9c0f780e12f5995139eba0fc85c09b2bde0b467cf2c1d90c639df9f157f78c6e2d5ec0232150ab355307c1af952790d730e53dd5792841c36b86379fd9c250f8400dd37649c0befc9c080f3b437e8847744202c19404c9b99613b04c039570584769553c84b45e200a4632d07148dbe2732c4de7b10f770ad942d7288fbd8174ff87456409427fac33e7ee92bcc1c85e7c874c4f01a5f6187a6c0f5ce3ecc57676ffd455fe51914868f6c019dffd9b30354f41ab703e6\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e5 rows × 1 columns\u003c/p\u003e\n\u003c/div\u003e[5 rows x 1 columns in total]", - "text/plain": " image\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n\n[5 rows x 1 columns]" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
0
1
2
3
4
" + ], + "text/plain": [ + "" + ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } + ], + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "render_images(df_image)" ] }, { - "id": "b0eaa73c", "cell_type": "markdown", - "source": "# 2. Combine unstructured data with structured data\n\nNow you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself.", + "id": "b0eaa73c", "metadata": {}, - "execution_count": null + "source": [ + "# 2. Combine unstructured data with structured data\n", + "\n", + "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." + ] }, { - "id": "7d64fb54", "cell_type": "code", - "source": [ - "# Combine unstructured data with structured data\n", - "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", - "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", - "df_image[\"size\"] = get_size(df_image[\"image\"])\n", - "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "render_images(df_image)" - ], + "execution_count": 6, + "id": "7d64fb54", "metadata": { - "trusted": true, "execution": { - "iopub.status.busy": "2025-08-18T20:18:07.921884Z", "iopub.execute_input": "2025-08-18T20:18:07.922593Z", + "iopub.status.busy": "2025-08-18T20:18:07.921884Z", "iopub.status.idle": "2025-08-18T20:18:35.549725Z", - "shell.execute_reply.started": "2025-08-18T20:18:07.922551Z", - "shell.execute_reply": "2025-08-18T20:18:35.548942Z" - } + "shell.execute_reply": "2025-08-18T20:18:35.548942Z", + "shell.execute_reply.started": "2025-08-18T20:18:07.922551Z" + }, + "trusted": true }, - "execution_count": 6, "outputs": [ { "name": "stderr", "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n" + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] }, { "data": { - "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003cth\u003eauthor\u003c/th\u003e\n \u003cth\u003econtent_type\u003c/th\u003e\n \u003cth\u003esize\u003c/th\u003e\n \u003cth\u003eupdated\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=8f2857f42b8ba3abed9e60449cc32b150cbd6276a923a6d85e5c06ddde834849f9dc25a05a7cd56627308c78d68f471dc14d220573d9a5564fe3b03acbeb1776414a54ab717e65a0039b2beb31092dcb7d58a2aefd8d0b0063904e330a7aa424e06c99205b5b83c9efe3111db07e9571224cc2aff52f71dad9f6b68a7afa9099803c2476a058002ac1b01f1cb6614230cccb1c730ca16c69dfa6142b255032cc799a33c1dd2cfacfd2d4db3fcfa8b8fd0ed68711bfba781d1119192fe24fe667aa129f8d1908702b295eb2d57d7782006d8c77e4a27ba6b1d3adad801ac9c859e0162db46834407d19942af2bd94c9d698b1c5352c7c9d8af58c956f607c3f10\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ealice\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1591240\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=9fbf42a56019dfd8b35e8d29981674906d025145bac8a5d9cebadfda4d04420ed34e2d554973a4084252f5ecc2803e82fd6817a2338ce9159fe5140d4a3a77cdc7e5feed733ad2b4351ce97fe82f76de1e5523022fdf12543cbf48827b07641bdd3083d0f8c11d95a46d1af17b70968e1f30ad99bf305ab8b5a827f24d5980a28df3726111f7f79c2444f78bfe5268d6ed2bbb172d368930b27690b635bdbd5cbfca7264b4e0bb0b170ac3dd15a9c36c3e77061906958464c8c610bb11b98602f99a184dca788ff5b154259c944e536fdeb47988c3ed9430e5fafd86e5c9b05088dd72c5f9f25516d0dead404dff0c0b2e4a91993a43100c66588ed373314e74\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1182951\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=7ee0d2c8dbcc174ef6edb76725adde283ec45db4f733d02241b350bb3af4094e97dbcedcbc98091c28273d51cd059ca475d501dad445ef4dd335c1add9930196a172adc8d45beda0003f934610167c022be84e43774af7b7805ba526d39509c68f065c6bc442196df71330bfeb9910ba4b85a1bf1defd6ebb34b6d0ed092c795c006d4bec18931cc00b9678f1a8cd43b2ee46efc9c03418e182dba57f61e011d8de299482a5c94763eaecda94b914254c34e635644944c9c14cf90d2a57d05767dc9330890dcd526c91d771b1182b47e23163db8d29e6146356355724e9bb854c9e246f1c376be69b1b32d5039ff42ca0b86a3b285fc7dd6b040093ccc346344\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1520884\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=5dbbec9d52be0f6fb2288804f812adcdbee47e8a6a5e1addeea648e3e2fa7129bf299b4528896dd039eec54da32633622c36ed90f7b93e4b52d400b50f45900a722bd3b159d0dc79ea567fb4ca2e6541028d68eec8a481ba15097fc63df50496bb51439806fc952370c3cbf3577c21f3f61b2f253c1e8c8821f5a31eceeba5fe110b2064706a36cc3a5041639f8673f7572b93195b357609944c892fffa65e0624ed73f27160d2401df695881e58074949c463c150880a9a75a0e7e28d3d144c6ef18e85d763a8b72d0ac45065d68d2a45581c11be30ab60acf4de29f7ca5592664c8a917358b5593520fee2c89cecb062a6071565acaa8f6a2bc3ba7d9f05aa\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ealice\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1235401\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201823Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=af14fcfc1b6e3a81e25df175e3b3d1b8b8be5588ef74f775dcf2cfd266b342e9c1e0184a4bc1b88c37209f27a23f0552dada58ca8edc95f47cc575a7de5678fd419f1f10bead607e37464f74093c24775990072726ad1fae5e923fbf2354967fe10d90149384d3c6e0955b6623eff94bc999b01467456bb94d1189ed76b3e4898858be4d44b5bc2a7f9b97fd4a94e2e823adfdbe2c51c385e13fb0534e8202af59364b0d9c54512fdc571a7c012d2c1f497b27f5f4e77924feddb2d4aab78ca4c2cba5282accf6707117efcf2aa4a3bb9e01815be41c380b9bd327c492b05a7e1d0fdbfbbfd8f3b0cb353165cde6cc42d1a16ac51a33c7b1318d3405cf104399\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1591923\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n\u003c/div\u003e[5 rows x 5 columns in total]", - "text/plain": " image author content_type \\\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n\n size updated \n0 1591240 2025-03-20 17:45:04+00:00 \n1 1182951 2025-03-20 17:45:02+00:00 \n2 1520884 2025-03-20 17:44:55+00:00 \n3 1235401 2025-03-20 17:45:19+00:00 \n4 1591923 2025-03-20 17:44:47+00:00 \n\n[5 rows x 5 columns]" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png7157662025-03-20 17:44:38+00:00
1bobimage/png11674062025-03-20 17:44:38+00:00
2bobimage/png11508922025-03-20 17:44:39+00:00
3aliceimage/png17365332025-03-20 17:44:39+00:00
4bobimage/png4397402025-03-20 17:44:39+00:00
" + ], + "text/plain": [ + "" + ] }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } + ], + "source": [ + "# Combine unstructured data with structured data\n", + "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", + "render_images(df_image)" ] }, { - "id": "a23ef0e4", "cell_type": "markdown", - "source": "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together.", + "id": "a23ef0e4", "metadata": {}, - "execution_count": null + "source": [ + "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together." + ] }, { - "id": "ce102df0", "cell_type": "code", - "source": [ - "# filter images and display, you can also display audio and video types\n", - "filtered_df = df_image[df_image[\"author\"] == \"alice\"]\n", - "render_images(filtered_df)" - ], + "execution_count": 7, + "id": "ce102df0", "metadata": { - "trusted": true, "execution": { - "iopub.status.busy": "2025-08-18T20:18:55.299993Z", "iopub.execute_input": "2025-08-18T20:18:55.300314Z", + "iopub.status.busy": "2025-08-18T20:18:55.299993Z", "iopub.status.idle": "2025-08-18T20:19:09.154492Z", - "shell.execute_reply.started": "2025-08-18T20:18:55.300289Z", - "shell.execute_reply": "2025-08-18T20:19:09.153315Z" - } + "shell.execute_reply": "2025-08-18T20:19:09.153315Z", + "shell.execute_reply.started": "2025-08-18T20:18:55.300289Z" + }, + "trusted": true }, - "execution_count": 7, "outputs": [ { "name": "stderr", "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n" + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] }, { "data": { - "text/html": "\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201856Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026X-Goog-Signature=4dd04f25b6ceebf868ebbbe2165da17cf6b9b2ad9a8bb1dd3e1970d32290aa520559224dea75e3af9539fd2fb2bedb462c49001c64584d154a6e84cb3bf79bd82d3d17e69c7942b4e31acbde3cd9dea47b002b12087b6e9e0878ca2dae5c4c552906c9750227657e622ff20780f075755bb255aa00d14802222d3a3cb77c94188f6f22a37edf4802ec3d595d475ebdc75f4e0194062d899d506e9e2db20e84d309cbdcbcf4229aed64dedb4bf2c66198d118b7bf63afce0eb9794724956839a9d0220861ac2fd618d8abeb74a21d150139e6afbbdbf01be8537fc9fee6d3b7495945b801ca146115b174b575b4a8f95443e6afc444e3336e95ba03f9adbb21a5\" width=\"300\"/\u003e", - "text/plain": "\u003cIPython.core.display.Image object\u003e" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": "\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T201856Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026X-Goog-Signature=8d53ca04967c2219f664a0f0aa5b3f28aab4836bb22d66a3e1a5e9e6cd7735865ef1fe5e5b34f422dbb21a33cef41f11a617ec858fd26e1a75c124222ab0e33675e349ab577bc1c4939aa9de3ce0918b94e85a4f68a27cc7b8c359dbc99e2dd211d4307c54e57d455c74ba360ce038f578ba05b2122f9eb96ae7d091345a1d53b3347b042c8c963414422a870c9d168aec09dd6fddd07ceb622d1d46e0857a78e327f82b665838d1bb4864b953a7ca7ac6ad0a77d8d2ffbd842348ea213b6a9ca2ef0768380e8cc96fc4afad91aef98ca31546dafdd989aa7ed3cf7bf692c2fa2b0c199ad21040596b5a1c4cf48c791fb252e0552c1a3444ae7461c715cac915\" width=\"300\"/\u003e", - "text/plain": "\u003cIPython.core.display.Image object\u003e" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png7157662025-03-20 17:44:38+00:00
3aliceimage/png17365332025-03-20 17:44:39+00:00
" + ], + "text/plain": [ + "" + ] }, "metadata": {}, "output_type": "display_data" } + ], + "source": [ + "# filter images and display, you can also display audio and video types\n", + "filtered_df = df_image[df_image[\"author\"] == \"alice\"]\n", + "render_images(filtered_df)" ] }, { - "id": "db2b3b12", "cell_type": "markdown", - "source": "# 3. Conduct image transformations\n\nBigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes.", + "id": "db2b3b12", "metadata": {}, - "execution_count": null + "source": [ + "# 3. Conduct image transformations\n", + "\n", + "BigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes." + ] }, { - "id": "283036f5", "cell_type": "code", + "execution_count": 8, + "id": "283036f5", + "metadata": { + "execution": { + "iopub.execute_input": "2025-08-18T20:19:22.950652Z", + "iopub.status.busy": "2025-08-18T20:19:22.950277Z", + "iopub.status.idle": "2025-08-18T20:31:51.799997Z", + "shell.execute_reply": "2025-08-18T20:31:51.798840Z", + "shell.execute_reply.started": "2025-08-18T20:19:22.950625Z" + }, + "trusted": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/pandas/__init__.py:211: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:4695: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageblurred
0
1
2
3
4
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "@bpd.udf(\n", " input_types=[str, str, int, int],\n", @@ -361,7 +712,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python-headless\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -426,254 +777,355 @@ " image_blur, 20, 20\n", ")\n", "render_images(df_image[[\"image\", \"blurred\"]])" - ], - "metadata": { - "trusted": true, - "execution": { - "iopub.status.busy": "2025-08-18T20:19:22.950277Z", - "iopub.execute_input": "2025-08-18T20:19:22.950652Z", - "iopub.status.idle": "2025-08-18T20:31:51.799997Z", - "shell.execute_reply.started": "2025-08-18T20:19:22.950625Z", - "shell.execute_reply": "2025-08-18T20:31:51.798840Z" - } - }, - "execution_count": 8, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n" - } - ] - }, - { - "id": "b9716e5e", - "cell_type": "code", - "source": [ - "# Image resizing and normalizing can be performed similarly by implementing additional cv2 UDFs." - ], - "metadata": { - "trusted": true, - "execution": { - "iopub.status.busy": "2025-08-18T20:31:51.802219Z", - "iopub.execute_input": "2025-08-18T20:31:51.802745Z", - "iopub.status.idle": "2025-08-18T20:36:13.953258Z", - "shell.execute_reply.started": "2025-08-18T20:31:51.802700Z", - "shell.execute_reply": "2025-08-18T20:36:13.951930Z" - } - }, - "execution_count": 9, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n" - }, - { - "data": { - "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003cth\u003eauthor\u003c/th\u003e\n \u003cth\u003econtent_type\u003c/th\u003e\n \u003cth\u003esize\u003c/th\u003e\n \u003cth\u003eupdated\u003c/th\u003e\n \u003cth\u003eblurred\u003c/th\u003e\n \u003cth\u003eresized\u003c/th\u003e\n \u003cth\u003enormalized\u003c/th\u003e\n \u003cth\u003eblur_resized\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=5f8c50659e2117ce9dc838470214b2a4622a50aa22df7ad766b0110fb4a3369bbed34857568626523489133580b8ab52bee7c7e95b0d1fa8ecfc1f6d748d6dee2893d8b5456aa1d366520ab2897cf4dbd011a7fc1854ef281f54c28a35cad59a0010cc1c9f5eeed6da7c85cde366d98b418ea98a3f8a0ec83ed3bf4158a5348ac3250b0f445b5ab5ab97eecbe25f799ec0a1c466719880ec8ad50e16d01ec2d33980debb992a88b42ffa99f886622d45dd921fc97489b080b625e516106000ed55ab2dbaa04ac28ef1c97d447497f1f30dd2efea4c92d2d22d250203239de2d1da1d5ad0988224b4d76447cb5bf1ce0e104dc9369ffc2ed8abfbd605e79ebaef\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ealice\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1591240\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549023340907\u0026X-Goog-Signature=5629b7eee76f37486002cef7f1db57780852922400d8d47cb838864875ba46568d97cdac09dd6b8f9ab4b0a3102446b1c749bc1a82f6b7115041225117e860061d4980a353f778bf9172d79aed23cf1471deaa2c61c1f97e104877a2d2b8d0e4b131a0f06d2c0c79929de33e0585cbcdf553fbd4ecabf7e5fecda32f97e11f809bd05df1118b6b82b7c441745a5fc4e1bcb4f0d52ed1186c94d66809ee68a4c481342aba17654f1397589227b17246952ce105c751b0d099bf674fa772fcd22456ce1534a2ff9c763009135c1f4a3a10eed92f1a6e66d6c46c5fa05a679cd9bca00558c2ceac4a63ff8d15e51ca2603682216664508abedb5f92e3f823cb4fb4\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549186906659\u0026X-Goog-Signature=a83503cf1de4b802678a0ac122089835a1b27727e2d8474838a4836cc42dc39c8c6aee08b7d145edac3cb762116641cba9c7f539a93e87075763dc526f431950995e6483d5c911d97f0d386d598a097f183d797206c6ef4deb607125d7578a3f98657ee6003329bda5567bad49a7221417c1b628b1007a3e5d01727d9b14b29b1f27cca3f281aedaf40f34d12ca93d9f1d94f6633c13a30dcc3957f3f1ee196801b56b18dc86bdf7f5128a9dfd2af09bf94a20254af4606a55fb7e3b7eb45e9d0be34664195e7cd818b2d5990687fec8e1d9c36f6333e9a49a5399501f46b110fa6e686e1d42bf112f68d2b4e13a0bfcbcd9104510ed513f771ec2053d9bb639\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549353120386\u0026X-Goog-Signature=b9d8b27e4b05cf5e27afe8c0f8abc9c42fec3128c587324a84ccb94ed8675566f1482224cfdf982f0b236c59fa34a5aa1ed54b3fc4363cde2279244dbe4fb578967366ba0af304052d57a0d88d035ada68b34b723f566eea96874522830435e103acf29ce848a809e447e25f7d7d5ab3893f84931a55945d183aa058fcfaa84991e33c44e8d6e6074a783ee9578fd88325064bf8998830bf5a9f04015e910312c8cbaaf1b539113e6bc595bbd336cf84cc5c8396d8a93f7ffb0eb8b1a7a56ada4f0ef81c5dfc05ca36ec7861e3deef98b5b12a67ba321dd7a0f213d4fdc31f0675eb7031a7980ffbe1dc97ae3699651ef18c59ef2cc25fc071be017fa7441c47\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549346801405\u0026X-Goog-Signature=2044260548b5d07aa084e834c723e9196da0eed8e63c26a248381a3d7874505e3926d78973e99fff4a89dcf34042a4c39e3da508b02b24586141e89243bbe9641982cce474f2fece7e5b2780f7ef94dd1a89d498c162eba2017dd065ee202068a0a004054abbd71524a6e447594a2003f03427db3fd4e66df3dfce42258fea5a8823b7b46d8b15b33938f1b23b4206e79257282d4511d4c72d04f66a1e3acbdad6509871aef992893fa615b2bbab638f80cb1b2ba5610b5811eb8801e06ee904dc02fe4a9740ee5a7d931f0aa96b26e2c1eab25c0d9f3361b89ee4ad1f290611490bd66d9bde6dc90c5adfd0350b8a09c2cb8399c9e3d66ca3abcceb1ef4c3fb\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=987caa66e11b8446c02a240c423145f774d295c19afbe71eba9c3bcd0504525b19723b8d08bb4644f231b3e0b2bcfc13350112e1956d53d957a9986dd742b5f71f5406420da2eda739319eea452088c5138e901a101ad20f682d66245e966da1c6024e68de1e775ea5f395fd744f1717faca8875f89d9cb5eb969a79fb056dab43ada9abac0658bccbc7f72ad2ea293e9d70bf95ac5d81a0dc7bf62220c76fed0274be232c24180ad03f60f89d51eaf7cbec2d6748a94647ec253a9b6d1c95cea8b054872db400e78c930e69fd4ad835f7208ca33a4917ff1b24fcb510fe554792f4cbcaa8c7d4d3f17d1cac843b3f8c181c3e443b6720124055a74479228b14\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1182951\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549020470951\u0026X-Goog-Signature=6040bd68fa7a59e5100ecb3bab3d005be6353e45ad856cb7003fbce5c379a4926edb4226cbe7f6347a2172bd7cabe96a27f573e78192712f5472ba7b6fcf3a8abb48bc8376cc1f916cbfa6870f878cf48b5f7ff0b896844c3ca63eab4ff9d926e895f141a25d1718cdfff8ea7dc98a096a245dfbac33e1092af4541e1cc4fc3de5c7d9d62ab4b32ab725476f74f49b40625cca3063eeefb9c03ee2ffddfd2080176435672c2a3206db6c3d1cf826c3011366c4f1dd4db0ad87d6ac392b4c76c4196db4e55d2535f2ebb66b1a23cb90547277027cd482102a1b6036e7a62466733b6ed30215594f4724a838f135e2e413e0c185935761ec0e6033d361aedbc36f\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549187521760\u0026X-Goog-Signature=8bdfcaac843956ea960296c0d03c7e3020aa2810f887d80da7569f0b1828a76097eae9f3a18e0635a6701e651f69bfb8bc56533009d8e384036acd6c3a834b5e5b16b3f9a725a5759df0e504b328e304e53b8eb686252f7946cc6cf3d14a52d9e14e3d37344aa962f52e5be9dd28ea7b6ba20bb341b699bd79ced2d9f829a90cd829dc3288d87cc6310aa4361dc20c28065d50a8f70f293719d97ed8a8e8c1799c1ba6f33a5b2aadb8e2a47b6fb3acf41c204034e7e595491fd9ede24774f0484a0ae50e8e2d404ec37b29e9c6cca2feab0c4c9e67e4923848c8d3a61c59768c0937e264ec635e36a8e5a63d2c7319af46f4ab1bfd550fdf6b1ad4c07cc8e965\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549359215948\u0026X-Goog-Signature=9365da5cd82f3f428e3fd58e0f3dc49147aa398bba646fefd7e2d24ae130a5a1edb8a333eb22c9d89ecbdc87bede7b244aef1d2d8fc84bbd8240de4656002b23c8c2c149b692e70e3ef5c6772c6a2ecf67436c5a7cad77986f0bf03fec18b1529fb8c93bfae5a3f0f361ea3c66f13330619381d28726b6e717c84aba9296c915cf3ed630ea5fc7ecdf6b6cd7af3f93e222a5b2713acf00a2b04f709ec2c58e6e94b96a64a11891978b2fbaa23e115691a73a931dd6b439d7835dd8840ad7a879c2f8606c2ede1740e676b734f4e08fdddbb6a2e0bf7372f21f6324f3ddb8c22998bdc28309aba74e3810b528f4d3c365c3ec3468a7ac585efdd108c46c6a60b9\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549348094576\u0026X-Goog-Signature=4b5db258dfdd417beb18af774f6ad4e3c2645aca66abbd248fe76dfa3f2bf83900090c0064f864eba26027379895ad99e0e75aa83b957fbb4fc667b0d9e7b6baeb811678118c7016fb828a9924c9ba8c27c045122f683b3fc525e0c27744b8ea3907b9dd8fbcae4e79234548a36c60617f648fc0b65b3949786d57d871e4c01de07a5824d75a290892ccba17e528600c1250eedf89b2958d81096eef7407a63148f29f5b6a3690ad86995e1c5974585095ecadceb218471c68c2527353447b639544c2378e85fc44ff55eb6903950de2530565c7d647c3bc09c6a6aa3c3c65329aa53085ed443dfad094758dd06d7f39d209e83cfc265b03113c9fc431b13017\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e2\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=ab5e9d751859bee56cec2b9c7279c9f1b3bef256ae2850930006ec8083a623cd96e97e207e84c2487d8e4407544a5c2fdd755e8f187eba93753cb6ef53a3a1de33691829265905e049214f7c828e640d8ed96ff2f20f60c19a044f3cfafd99797f65297703c5bc0f5450f1d9fa9526a017a99c8b174e7af970c5a8d0cab5c1ec15e34ec8efdfe06ce85aa444482e8e289f3d8dc8a5f6e2cbe5c9309e2d2ca80884880f32f27cc7745d909ba40d4c37b6631f71b9d5172a43480e5763a7d844565a084b86dcd772dc5128e238b4ea476070c4ca6d0b748417f3d23a52264c315509d94636dbeb08d152552410de678e3668ea4ea4183b7100f4c8d3d4b1aa9a4a\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1520884\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549019078907\u0026X-Goog-Signature=93ddd73d20863af4bcd6d55d8b08efd24322967276c4c50acc96abceda81aec68addf10d783fdbdac7fe7cfbd7170663bf377ca4a1e2686c52ae46e2f4fc0f4a94e7fe51b9bc3b512fd23d26fdc6a62bcce36c348457b82f65fc8c956d8ef0bf1a14d30d1b61e40f80014d76155d13e051ccbc204360b5d43b19644094f3b9fa0fa2b6d723de301ea490f18929d568d785f53c9f74be51f389ba179bcbe2f6577cc2665e8a90fe5619f33468d3df5b3b8bc1e4c7df97c430d6b2323f7961de687314d62daa4e511d8e0ccf2b0d4d5321b00024163f08ec5999feea0917aee1a33d5b103775fe64350c1fcb4e8aec72fe7c181b9c3c9deae38b19fc8190b25ead\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549188241967\u0026X-Goog-Signature=8ccefc10cb6fd5194bc2d872fba31ec997e22d8826f309bea7f5266fac43223991b58d95ac65e0b96d5d9de7f1daa817b35abdbbc2785a3d931d1d603ad916bf5d1b1d9d0f809c3dd4714a1f1027407ef0659b3d863ce571ada817019f006b8c56f5a26da1749b3202de18becf7a337e72a1efe16f44c6a946c7196bfcb38b3b6944e31e075a5da9a576c27f840e507eddf9b29264dae15cd5f10d34874a861b4113eecc6af6d3d16f131835f4ddc5592e3adc871c96e0bafa17d9b4c133f697040398d7a6dd438c6c9c8679fed8f78a6e81c5b6001846d62224683c8ca8bf3aa26cd97d0b333a5ba333f0707a528f1e986207852339f48e6f370d492938e92b\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549355947800\u0026X-Goog-Signature=42e69641c9371bd2eff79bb51f0d9e4d27960d7e2a3852e0c44740c5a57afb0f026cd43fd4ee1594d09eed2b93b84ddcfd31bf341182b5e862760b8138bfef101d9f747351b6701e699a5bc406bf10125c5d29f2d6564f9acc7eb4e5197f8fdac6abb45480966b168077d0a69d9941b96ca2517a444e7e591422ec8c2a0ed8b7dc0c839af585593e73eb4f4a372f8f41d2e5bf6bed8768c46325b27bab10c4a56813265ef15b18b61f37846448c2e7890847f6b65d03e745f89094d06d6daf5820c49f60839468a5410d5d5f93015acca4fe2912a9f7c372c0b7374ca65de28bea669d8032eff08e3f575590ead5dfb6f84ac49f851ecde0298882f7e3912eb2\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549351340006\u0026X-Goog-Signature=b2af2667d8d9026e68bb9fe6b4e9b56d0ec35b20f34490db21c39b24d139321fd31936265270ab8dceed0857c53f5d8639f7438be45cb5788d8cd2588d42e68cc86df3f81de4ad1f356263ff6fd0921d3f237b0135b37e6382c72921a5914043a72d3b6550b6ec6231cc05c9d3d522bea46c65423ce0b64b03eeaa295996c8a8e26f480e55c95ac2aec47567ff0f07b84d993b5b6f56e9caca5109932c184ed234cb5a57b1ab740f2f4164d304179242d3190b10a97a436097d8c6fbf7325fa5e04b2d65ed3fe3b00471cfb27a82ac7f3a92d0c00afe201c2a7cbec877a762c7d057bf3493e295cadefac95edc5345073ed41c518499d2bc257f92f0215ace56\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e3\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=a152ee4011f72e7f0a09a2f883dc6abeb1179f32020930e1def252e5aad559ecf3cb96abd8e134f14a7d39c71ab9fec9411d9b1c29400d06313dc871c2500ed22033c561def13df3525eda40c10b9956ec22dd06ed9148e15e4af699b8e715c2308b92303c0b57a66b9b02473abd83565731dced27af52db48b2ecc8bdecbae16b512c50c30d30fc17ab8f21e471b904b93d97a2e1652e6e70626d75aeecdb13da6ef82993a6df033dfc341127bd1d387275ef345d89b60caafdecb548126aa7f90ef6c74beb0ac0f18902de474a8e0b289e306d0f824be87d1131ed4aa093cd52e486e1dae05c6c9b3e364aec6b65d9b93294e588ff6ecdc25b75ca0a488ba2\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ealice\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1235401\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549018389386\u0026X-Goog-Signature=1f1c5af6da1c990838c0a95aedea3aa65ee34b2ca70d8221511880eac073548a5f15eb993dbaf4a92469391933875d3b198b3a146181c6acfd96798f65673699724e103c131c051094f3b5721af5a79e997244c19576ce115b826565baedeb2cc08b6cc93d762ddfdc2750596d567df1c65ef32fd56ea4bec4463724107e07e2b47a3dcf50fafc1ab5d30fb8aa24024011895224c437e6f78c2cf2d4e23e09eae5c2d9ce49de627f2c5b2f90509b9532070bb426fcdd64e2892440ca43d370415ac30cea79fed08716666d40423d0631eb3cacb545a95cfe23bd47690d63610d1409cc1d508ce29a465146408dde3ce7f6b986495542f68439d06796dc7ba792\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549186140674\u0026X-Goog-Signature=3888ecd32b4b27e8a78f8c4b674cc6ce1990d66730cc1b37fa0814a948ab09b6b9dd49ee960e9a4dc4bb77cb1b3fb5c87e0b7e23e376eef8b4dc8673c6edae6720eb7e07b52ad172e4c5518f340ce58a3adead7178051b553b9d21df0f7c2c147c922d43ec73ba36bcad88805d84f66ddbe1f2cca3338f707ce38d22bf0bb78e86fb97eaa9e8df8a923320919fa5f8b99576aaf2eab09cfdd66da2c0c82986c7df3271d0f2877ea95429018aaafd4e10ac670d25a7568d175aeab74def3371c7064cc019ee8e0fc4ca1e4c6e718c7925051b9fb35b3095dcde072043b2a29e26e85b603fcd7ee1569eaf84054e9a7a1572100f61aa38ebe4ad306bb128a9bd19\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549353981426\u0026X-Goog-Signature=a25d1c75eef237aa75032402319dd081e0c9f15cdd95b79a31b8245d63b17752756cb356df7c69390ac05b496aac3fce22c501614660a06a18a148ac332fc28f245b3029405edbd40f2887dc76be5cbcf107c3bdd4725b93359a91adf775d6b827ad0413ccdf30dd1541c23bef40136622bea484ff16cb8adca54467515d6df88ffff70f5295f4801c2eba2cfd3e2b3606c53bc4257e7dcc51983c1de7edc8a2313380b7bfb8ac42f2460fcce281b6ad11ccb7e4edba23227f0da9181cbeeb0380302134659799ca8704c1b8dba92557d84ef3222fcfacc7584d65084ba847952d4676894d6c9f90ae692128721b48697c93baeb988d639338450a2bdad89a44\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549347689362\u0026X-Goog-Signature=5bd305a1286eae578a5c7aebe3fe718ba4a221d98b2aa38aae42d25572af2990f3cc845808576f97f350a2f799a740ceb9b661d6a02d1e22029f35a4ced0c1f82d6343b9fc3132b46a7754ba6571af9186be033831f98974b75967156e97fe3e3da348521624ef9714086bb9ea67ff1cbdf721b0fb922420ea14b1a991dbac5246ec53e5156a11bccb6298aea948bb3fcedd320e468f932f3f1cbd244d87fb9ea1b4b92c9517820e4cc3a2f013f0994ce33e8ca60fe42f2de3f59c15a7c4fb98123c7f889fb1966fd20a79237d8bb95c37c56c3f2e33236d9b1c6d6aa0b08ea6e583329cefe8e76a89d062877a5046fe8f222ed1d8ea263ecca5daea54300afc\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e4\u003c/th\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=b049178a1831bbce328a7282626401c0b3d66e067bc3c93853bf5c9fe6ac1ddc234e88ecebbe6486028bd62279c28e54f80d97abb36a1a2fb2f9c663d5eec265089ec5f08cc58bf809eaddd2e6af9c500b85b37b8adef00dc18d0dca26c2633d54aa3a3fd2a4ec63af6ac9c3c8d242a146b44a51bbee529f50c54e3021e0a4deea0b47b5dd948e5c21781269d94cbe77933d2ceae39b987dbda8153a46e706d39c0922a3c9a03ede7ced494b06427a57075c2d4b5271fd230eaa2bc5bcf0d174775bed1715ac93b197aed5b5c676744b913d81dbe8a898ce7b9e2b3ba3d76b66b88b6ada370a5d83ead38beaf767913705a3dabb881b5b99212fa5ebb76fd09e\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003ebob\u003c/td\u003e\n \u003ctd\u003eimage/png\u003c/td\u003e\n \u003ctd\u003e1591923\u003c/td\u003e\n \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549019796499\u0026X-Goog-Signature=a9e096f80e63f3faeb78319c3afc7d688da12dfad094c478b150478e52bfee60a9e5789c92d52370ec6e2435d645b8816bde1415e7b5427dd71789dbf4e2fc039e6798e530b517a24ef71e2cd7b73d670781f46b0aabeb4ea82a950d4a88e9b9c75611367a7a599943b6a2ef472470993f02210f0f974269b5f32770be346293f3fafd471d4f9fb25c7328197160571fecc0e0e01ae499003ad31f2d04543690a8f62d73311ea19570ea4c11e3eb87368daa4a26cbb59eb508146c8d339bf1969775c7b2c6da9eb25b583294d34563f1e1cd5d52439860a8d7e542f99d89167c46812c267340f62a71b1536bbdd1025a4468e38587d765d6ccf535104bdb1fc2\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_resize_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549190596101\u0026X-Goog-Signature=b67386fbd201acb470f54777f5ab2a294538f76e38e5acff818225e2b01c67d9d8f1f64d416882e9a83e959b429437a310533dbf5cb1421c850c8bda8050d769b5e76499f7b975839eceb3d09840780762ef4c78ffe531c9c3af389aa894d7e0c30fc2aa7b0568a8b487c13bba99bcdf8bd49bfa8a76c2cbf74bdbde02e074f807715ae5c8868b6077fe3184c8f7307cb99c6ef9e105f1950ac4481e33a35afe9df23c6f4918c1b6d593a0b847725f415845b490409ed28ce67c5dcb0f706d66f2cc134a882401b9115b789f5545882c62e544afbfe86a9d618c6686b1233947db6e6f2bfb20c0da6ab14115660789abe6714d8e0f36552284f5b3422671bb49\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_normalize_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549352085742\u0026X-Goog-Signature=13c5b59c12fe64f09b8bb0fe82174e94e17cdddd62ee636bf3e7a916db31f4964f9165310aeae3562001b44201b869410736744dab31745e3c792628c62463d76fc567a7325f5d3ca58ac364c8cca7c21a4ec10dcc3022e283f335c19c56f4ec069301b12d04d7df08a7fa860bb07872495a39a1ad375e4a80e82f3f1eedc60959302d0404ec3c05240cbdc589a061f2804a1553891a0d6c23257fe862f12d59e5e75f62dc90c246e823e6aa097361840440f27127e657817033ee81e10e08e18934df64c9157ae6eb34e651f357a321c3924a989ff77798cbfd7b12a84678c15e89a78b5686321795b31bd20f5ee8fbddb49d56ce8609ec2fd415b4fb6e3983\" width=\"300\"\u003e\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_resize_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203600Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1755549347302955\u0026X-Goog-Signature=78da0aa9889346f7b8215b8967fecc8cb31e536236289a4636e1337f9b1b080ca03c62364424dda708b698d2eeaeee81062ff60d4e002f5af66e51a38acc335aab9ba31b44e0f42be0be76d9fb5559322c1de12975a9073cee8756f653687494d605ab56fd0d177ef463fcc33532315e48601acdcb6b7cde0aea8baca1adc0454356ae1082ccc5ae61cd7494c0933c0cc68a3be7caf219050e22cb9c075efcdb00eb7231fd8964cc00af16e6f05d2e2461962a677b5b59b568296353778e2d98b343046e405a29b6dca77ac66d4f83988d3c3c64ec038e906212d5539b5143c52747b488ebcda797492b04a7651d05f51d80485bf68067b577d40d2a929497ed\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e5 rows × 9 columns\u003c/p\u003e\n\u003c/div\u003e[5 rows x 9 columns in total]", - "text/plain": " image author content_type \\\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n\n size updated \\\n0 1591240 2025-03-20 17:45:04+00:00 \n1 1182951 2025-03-20 17:45:02+00:00 \n2 1520884 2025-03-20 17:44:55+00:00 \n3 1235401 2025-03-20 17:45:19+00:00 \n4 1591923 2025-03-20 17:44:47+00:00 \n\n blurred \\\n0 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n1 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n2 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n3 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n4 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n\n resized \\\n0 {'uri': 'gs://bigframes_blob_test/image_resize... \n1 {'uri': 'gs://bigframes_blob_test/image_resize... \n2 {'uri': 'gs://bigframes_blob_test/image_resize... \n3 {'uri': 'gs://bigframes_blob_test/image_resize... \n4 {'uri': 'gs://bigframes_blob_test/image_resize... \n\n normalized \\\n0 {'uri': 'gs://bigframes_blob_test/image_normal... \n1 {'uri': 'gs://bigframes_blob_test/image_normal... \n2 {'uri': 'gs://bigframes_blob_test/image_normal... \n3 {'uri': 'gs://bigframes_blob_test/image_normal... \n4 {'uri': 'gs://bigframes_blob_test/image_normal... \n\n blur_resized \n0 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n1 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n2 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n3 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n4 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n\n[5 rows x 9 columns]" - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } ] }, { - "id": "2d68a468", "cell_type": "markdown", - "source": "# 4. Use LLM models to ask questions and generate embeddings on images", + "id": "2d68a468", "metadata": {}, - "execution_count": null + "source": [ + "# 4. Use LLM models to ask questions and generate embeddings on images" + ] }, { - "id": "662054a0", "cell_type": "code", - "source": "from bigframes.ml import llm\ngemini = llm.GeminiTextGenerator()", + "execution_count": 10, + "id": "662054a0", "metadata": { - "trusted": true, "execution": { - "iopub.status.busy": "2025-08-18T20:36:13.954340Z", "iopub.execute_input": "2025-08-18T20:36:13.954686Z", + "iopub.status.busy": "2025-08-18T20:36:13.954340Z", "iopub.status.idle": "2025-08-18T20:36:43.225449Z", - "shell.execute_reply.started": "2025-08-18T20:36:13.954661Z", - "shell.execute_reply": "2025-08-18T20:36:43.224579Z" - } + "shell.execute_reply": "2025-08-18T20:36:43.224579Z", + "shell.execute_reply.started": "2025-08-18T20:36:13.954661Z" + }, + "trusted": true }, - "execution_count": 10, "outputs": [ { "name": "stderr", "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FutureWarning: Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message.\n return method(*args, **kwargs)\n" + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "default model will be removed in BigFrames 3.0. Please supply an\n", + "explicit model to avoid this message.\n", + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/__init__.py:437: FutureWarning: You are using the BigFrames session default connection: bigframes-\n", + "default-connection, which can be different from the\n", + "BigQuery project default connection. This default\n", + "connection may change in the future.\n", + " warnings.warn(msg, category=FutureWarning)\n" + ] } + ], + "source": [ + "from bigframes.ml import llm\n", + "gemini = llm.GeminiTextGenerator()" ] }, { - "id": "a31730ff", "cell_type": "code", - "source": [ - "# Ask the same question on the images\n", - "df_image = df_image.head(2)\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", \"what color is the picture?\"])\n", - "render_images(answer[[\"ml_generate_text_llm_result\", \"image\"]])" - ], + "execution_count": 11, + "id": "a31730ff", "metadata": { - "trusted": true, "execution": { - "iopub.status.busy": "2025-08-18T20:36:43.227457Z", "iopub.execute_input": "2025-08-18T20:36:43.227798Z", + "iopub.status.busy": "2025-08-18T20:36:43.227457Z", "iopub.status.idle": "2025-08-18T20:37:25.238649Z", - "shell.execute_reply.started": "2025-08-18T20:36:43.227764Z", - "shell.execute_reply": "2025-08-18T20:37:25.237623Z" - } + "shell.execute_reply": "2025-08-18T20:37:25.237623Z", + "shell.execute_reply.started": "2025-08-18T20:36:43.227764Z" + }, + "trusted": true }, - "execution_count": 11, "outputs": [ { "name": "stderr", "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n" + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] }, { "data": { - "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003eThe item is a tin of K9 Guard Dog Paw Balm.\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203711Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=06718c570f3a86457a786ed768e5deede983ae58e79ea4c1ff7793a6d4f7693c111340cf05a4f7dd8a968340f4c2f2a992240dec3af83725b3983a8c47f663e5d800d2af7144f8d5b2b3097ac780fa66398dd95696691f6eddadfba15b77da3c16ee5d1b700378e8fc1e2d574eea0e7adf42fa23c9e28fda9aa9c97a3ec2e461afbbbd45a9c9a0c2a50f0cd561159cae73c1d7dcf6df215a7c39fb9e4c3966364ee7c5f95f16f0e733d02ddc6e1badfbce0edc89f426a79af2af8c66e743df3811d6e7990e3a692d78461939d5ab6b393e88bc32e9bcc56764f114898e6bab921346783ff42d17cbf2f04366a21834a568a52cf32a5df81654ddd000f2ac1c34\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003eThe item is a bottle of K9 Guard Dog Hot Spot Spray.\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203711Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=141c42af27dc86e89a3075f3af82c75ee5afb817e0850586a96cdb3ee180c22a8ed09f0a8e1a3c5d632846cbdfd10d71600c8f33a699b5a6c0dd6f34784b4d1d8d39a9ba9e85bff3c984c29b5cd628551149428d3d2f96253d994d0e4005527207c5fe11d681e840a98601ca7858ef9f9b4680a477621684c1f2295bb49a95b40d04fb6fae3ab4383210145da8dcd9723d442314c9d60225470f5d8b017073a5626ccf77a2fb644432ece39b38f3a2ada019f20e6a61fb026962521d4f73ac92229f575eb21d18c52d2200cb4f14b2f2dfb650c52023277d7656159e99212ce90729d946caf847704c5cdaf467202a3bef13d2f1bcd558f2647da4a1d049f36b\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e2 rows × 2 columns\u003c/p\u003e\n\u003c/div\u003e[2 rows x 2 columns in total]", - "text/plain": " ml_generate_text_llm_result \\\n0 The item is a tin of K9 Guard Dog Paw Balm. \n1 The item is a bottle of K9 Guard Dog Hot Spot ... \n\n image \n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n\n[2 rows x 2 columns]" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0Please provide me with the picture! I need to see the image to tell you what the item is and what color the picture is.\\n
1To answer your question accurately, I need you to provide me with the picture you are referring to. Once you provide the picture, I can analyze it and tell you what item is in the picture and what color the picture is.
" + ], + "text/plain": [ + "" + ] }, - "execution_count": 11, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } + ], + "source": [ + "# Ask the same question on the images\n", + "df_image = df_image.head(2)\n", + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", \"what color is the picture?\"])\n", + "render_images(answer[[\"ml_generate_text_llm_result\", \"image\"]])" ] }, { - "id": "f5d2a1ed", "cell_type": "code", - "source": "# Ask different questions\ndf_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]", + "execution_count": 12, + "id": "f5d2a1ed", "metadata": { - "trusted": true, "execution": { - "iopub.status.busy": "2025-08-18T20:37:25.239607Z", "iopub.execute_input": "2025-08-18T20:37:25.239875Z", + "iopub.status.busy": "2025-08-18T20:37:25.239607Z", "iopub.status.idle": "2025-08-18T20:37:25.263034Z", - "shell.execute_reply.started": "2025-08-18T20:37:25.239847Z", - "shell.execute_reply": "2025-08-18T20:37:25.262002Z" - } + "shell.execute_reply": "2025-08-18T20:37:25.262002Z", + "shell.execute_reply.started": "2025-08-18T20:37:25.239847Z" + }, + "trusted": true }, - "execution_count": 12 + "outputs": [], + "source": [ + "# Ask different questions\n", + "df_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]" + ] }, { - "id": "fb67bf8e", "cell_type": "code", - "source": [ - "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "render_images(answer_alt[[\"ml_generate_text_llm_result\", \"image\"]])" - ], + "execution_count": 13, + "id": "fb67bf8e", "metadata": { - "trusted": true, "execution": { - "iopub.status.busy": "2025-08-18T20:37:25.264072Z", "iopub.execute_input": "2025-08-18T20:37:25.264585Z", + "iopub.status.busy": "2025-08-18T20:37:25.264072Z", "iopub.status.idle": "2025-08-18T20:38:10.129667Z", - "shell.execute_reply.started": "2025-08-18T20:37:25.264518Z", - "shell.execute_reply": "2025-08-18T20:38:10.128677Z" - } + "shell.execute_reply": "2025-08-18T20:38:10.128677Z", + "shell.execute_reply.started": "2025-08-18T20:37:25.264518Z" + }, + "trusted": true }, - "execution_count": 13, "outputs": [ { "name": "stderr", "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n" + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] }, { "data": { - "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n \u003cth\u003eimage\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003eThe item is a tin of K9 Guard Dog Paw Balm.\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203757Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=12cfd68cb145aef7edd54a3b0f03944424776133f09fc277c6bce2a8a267116ac6911b59615d0fce3ebf2e901d5ae847dbb414f9d524ab42b924a496a81bf0eff4b22c60b89adef7d863f85f9680c05d31afab795635e30ab38a297a221b6374df190bdbbdaed90c5bcd6b79f63a8d9c5f7c4db1c981b2a03fb5808feaa8e4d2e036a11f9d19c9b3fb94471562cbd2329e1a1ebc6e841111c5f90a847da63e362d6ff8d878ddc106aa13932143c2b8eb0e3c222a2bcb4ec1b46bc032678e872be7a3325e5e3db76b6dc81e0fc189bf0a4fc30e8c508fc1c12c5d61d2c9601339657a318a4b0abde83d5ca7f73387342de33399e2d8b62c67d549e09507f09c4e\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003eThe picture has colors such as white, gray, and a light blue (cyan).\u003c/td\u003e\n \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20250818%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20250818T203757Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=b597801312fce93103bf273c7669e665573880dca47521adb16cbb6864ab2f4a057cf060519acecc9be14695ea0c5d4848ea631f53dfe6df89a390789faa4d946a2828f7112f508be8f6b40ede1fa4cd565fc9738d73ddabcaf08e7467bd58b54a0482061fc0c522f9b74a7c4402aba788d9c45b6c0416f639a275a4d70cfa15ae258343eb3c17170ff9227b1ea6b020b5ed5e95f0db4df10292537bb19afdf03007c0b05c026343e3c7ea48c743b65c1cdf13422f4c02e258a623fafd8f083ac52afb9d5787b822913de2c58a03e93ad3323b44ff16327b689e1cc6ab7590a358a82b8f70f967898f4bfd41bdafd446e8dfd83b7704094d0146bc07a0934427\" width=\"300\"\u003e\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e2 rows × 2 columns\u003c/p\u003e\n\u003c/div\u003e[2 rows x 2 columns in total]", - "text/plain": " ml_generate_text_llm_result \\\n0 The item is a tin of K9 Guard Dog Paw Balm. \n1 The picture has colors such as white, gray, an... \n\n image \n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n\n[2 rows x 2 columns]" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a glass aquarium.
1Dark brown
" + ], + "text/plain": [ + "" + ] }, - "execution_count": 13, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } + ], + "source": [ + "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", + "render_images(answer_alt[[\"ml_generate_text_llm_result\", \"image\"]])" ] }, { - "id": "0cf33170", "cell_type": "code", - "source": "# Generate embeddings.\nembed_model = llm.MultimodalEmbeddingGenerator()\nembeddings = embed_model.predict(df_image[\"image\"])\nembeddings", + "execution_count": 14, + "id": "0cf33170", "metadata": { - "trusted": true, "execution": { - "iopub.status.busy": "2025-08-18T20:38:10.130617Z", "iopub.execute_input": "2025-08-18T20:38:10.130851Z", + "iopub.status.busy": "2025-08-18T20:38:10.130617Z", "iopub.status.idle": "2025-08-18T20:39:04.790416Z", - "shell.execute_reply.started": "2025-08-18T20:38:10.130833Z", - "shell.execute_reply": "2025-08-18T20:39:04.789398Z" - } + "shell.execute_reply": "2025-08-18T20:39:04.789398Z", + "shell.execute_reply.started": "2025-08-18T20:38:10.130833Z" + }, + "trusted": true }, - "execution_count": 14, "outputs": [ { "name": "stderr", "output_type": "stream", - "text": "/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FutureWarning: Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n" + "text": [ + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "default model will be removed in BigFrames 3.0. Please supply an\n", + "explicit model to avoid this message.\n", + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/__init__.py:437: FutureWarning: You are using the BigFrames session default connection: bigframes-\n", + "default-connection, which can be different from the\n", + "BigQuery project default connection. This default\n", + "connection may change in the future.\n", + " warnings.warn(msg, category=FutureWarning)\n", + "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] }, { "data": { - "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n\u003c/style\u003e\n\u003ctable border=\"1\" class=\"dataframe\"\u003e\n \u003cthead\u003e\n \u003ctr style=\"text-align: right;\"\u003e\n \u003cth\u003e\u003c/th\u003e\n \u003cth\u003eml_generate_embedding_result\u003c/th\u003e\n \u003cth\u003eml_generate_embedding_status\u003c/th\u003e\n \u003cth\u003eml_generate_embedding_start_sec\u003c/th\u003e\n \u003cth\u003eml_generate_embedding_end_sec\u003c/th\u003e\n \u003cth\u003econtent\u003c/th\u003e\n \u003c/tr\u003e\n \u003c/thead\u003e\n \u003ctbody\u003e\n \u003ctr\u003e\n \u003cth\u003e0\u003c/th\u003e\n \u003ctd\u003e[ 0.00638822 0.01666385 0.00451817 ... -0.02...\u003c/td\u003e\n \u003ctd\u003e\u003c/td\u003e\n \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3...\u003c/td\u003e\n \u003c/tr\u003e\n \u003ctr\u003e\n \u003cth\u003e1\u003c/th\u003e\n \u003ctd\u003e[ 0.00973672 0.02148364 0.00244308 ... 0.00...\u003c/td\u003e\n \u003ctd\u003e\u003c/td\u003e\n \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3...\u003c/td\u003e\n \u003c/tr\u003e\n \u003c/tbody\u003e\n\u003c/table\u003e\n\u003cp\u003e2 rows × 5 columns\u003c/p\u003e\n\u003c/div\u003e[2 rows x 5 columns in total]", - "text/plain": " ml_generate_embedding_result \\\n0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n1 [ 0.00973672 0.02148364 0.00244308 ... 0.00... \n\n ml_generate_embedding_status ml_generate_embedding_start_sec \\\n0 \u003cNA\u003e \n1 \u003cNA\u003e \n\n ml_generate_embedding_end_sec \\\n0 \u003cNA\u003e \n1 \u003cNA\u003e \n\n content \n0 {\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3... \n1 {\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3... \n\n[2 rows x 5 columns]" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.03416207 0.0419732 -0.0227391 ... -0.03...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-05-02T03:3...
1[ 0.01908903 0.0193082 -0.00221754 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-05-02T03:3...
\n", + "

2 rows × 5 columns

\n", + "
[2 rows x 5 columns in total]" + ], + "text/plain": [ + " ml_generate_embedding_result \\\n", + "0 [ 0.03416207 0.0419732 -0.0227391 ... -0.03... \n", + "1 [ 0.01908903 0.0193082 -0.00221754 ... 0.00... \n", + "\n", + " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", + "0 \n", + "1 \n", + "\n", + " ml_generate_embedding_end_sec \\\n", + "0 \n", + "1 \n", + "\n", + " content \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-05-02T03:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-05-02T03:3... \n", + "\n", + "[2 rows x 5 columns]" + ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Generate embeddings.\n", + "embed_model = llm.MultimodalEmbeddingGenerator()\n", + "embeddings = embed_model.predict(df_image[\"image\"])\n", + "embeddings" ] - }, - { - "id": "9fde6707", - "cell_type": "code", - "source": "", - "metadata": { - "trusted": true - }, - "execution_count": null } ], "metadata": { - "kernelspec": { - "language": "python", - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.13", - "mimetype": "text/x-python", - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "pygments_lexer": "ipython3", - "nbconvert_exporter": "python", - "file_extension": ".py" - }, "kaggle": { "accelerator": "none", "dataSources": [ { - "sourceId": 110281, "databundleVersionId": 13391012, + "sourceId": 110281, "sourceType": "competition" } ], "dockerImageVersionId": 31089, + "isGpuEnabled": false, "isInternetEnabled": true, "language": "python", - "sourceType": "notebook", - "isGpuEnabled": false + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" } }, - "nbformat_minor": 4, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 4 } From 9a9a83f22c777d6953b49a91f87f6a7fdd486d38 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 23:25:09 +0000 Subject: [PATCH 38/39] format: format code --- packages/bigframes/bigframes/bigquery/_operations/ai.py | 2 -- packages/bigframes/bigframes/functions/function_template.py | 4 +++- packages/bigframes/bigframes/pandas/io/api.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py index 7890c18aee66..97fb6515b55e 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ai.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ai.py @@ -1184,8 +1184,6 @@ def forecast( return ml_core.BaseBqml(df._session).ai_forecast(input_data=df, options=options) - - def _separate_context_and_series( prompt: PROMPT_TYPE, ) -> Tuple[List[str | None], List[series.Series]]: diff --git a/packages/bigframes/bigframes/functions/function_template.py b/packages/bigframes/bigframes/functions/function_template.py index 005a6fc91cdf..68160116a41c 100644 --- a/packages/bigframes/bigframes/functions/function_template.py +++ b/packages/bigframes/bigframes/functions/function_template.py @@ -367,7 +367,9 @@ def bigframes_handler(*args): if code_def.package_requirements: # Include package requirements as comments to help force a new # BigQuery UDF definition when only package requirements change. - packages_comment = "# Packages: " + ", ".join(sorted(code_def.package_requirements)) + packages_comment = "# Packages: " + ", ".join( + sorted(code_def.package_requirements) + ) udf_code_block.append(packages_comment) if not capture_references and signature.is_row_processor: diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index 757a7651a891..6c83095ab3cd 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -639,8 +639,8 @@ def _from_glob_path( def _get_bqclient_and_project() -> Tuple[bigquery.Client, str]: # Address circular imports in doctest due to bigframes/session/__init__.py # containing a lot of logic and samples. - from bigframes.session import clients import bigframes._config.auth + from bigframes.session import clients credentials, project = bigframes._config.auth.resolve_credentials_and_project( config.options.bigquery From b219d049dc231809971d2a6b7a614ace900673df Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 1 May 2026 23:26:47 +0000 Subject: [PATCH 39/39] chore: revert notebook changes (moved to dedicated notebook branch) --- .../notebooks/dataframes/anywidget_mode.ipynb | 278 ++-- .../generative_ai/ai_movie_poster.ipynb | 1390 ++++++++--------- ...uct-images-with-bigframes-multimodal.ipynb | 1132 +------------- ...with-bigframes-over-national-jukebox.ipynb | 652 ++++---- .../multimodal/multimodal_dataframe.ipynb | 973 ++++++++---- 5 files changed, 1804 insertions(+), 2621 deletions(-) diff --git a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb index e92e4b295bdb..a0efa571a7d7 100644 --- a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb +++ b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb @@ -1,8 +1,11 @@ { "cells": [ { - "id": "d10bfca4", "cell_type": "code", + "execution_count": 1, + "id": "d10bfca4", + "metadata": {}, + "outputs": [], "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -17,33 +20,30 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ], - "metadata": {}, - "execution_count": 1, - "outputs": [] + ] }, { - "id": "acca43ae", "cell_type": "markdown", + "id": "acca43ae", + "metadata": {}, "source": [ "# Demo to Show Anywidget mode" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "ca22f059", "cell_type": "code", + "execution_count": 2, + "id": "ca22f059", + "metadata": {}, + "outputs": [], "source": [ "import bigframes.pandas as bpd" - ], - "metadata": {}, - "execution_count": 2, - "outputs": [] + ] }, { - "id": "04406a4d", "cell_type": "markdown", + "id": "04406a4d", + "metadata": {}, "source": [ "This notebook demonstrates the **anywidget** display mode for BigQuery DataFrames. This mode provides an interactive table experience for exploring your data directly within the notebook.\n", "\n", @@ -53,49 +53,41 @@ "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views. Use **Shift + Click** to sort by multiple columns.\n", "- **Column Resizing:** Drag the dividers between column headers to adjust their width.\n", "- **Max Columns Control:** Limit the number of displayed columns to improve performance and readability for wide datasets." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "1bc5aaf3", "cell_type": "code", + "execution_count": 3, + "id": "1bc5aaf3", + "metadata": {}, + "outputs": [], "source": [ "bpd.options.bigquery.ordering_mode = \"partial\"\n", "bpd.options.display.render_mode = \"anywidget\"" - ], - "metadata": {}, - "execution_count": 3, - "outputs": [] + ] }, { - "id": "0a354c69", "cell_type": "markdown", + "id": "0a354c69", + "metadata": {}, "source": [ "Load Sample Data" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "interactive-df-header", "cell_type": "markdown", + "id": "interactive-df-header", + "metadata": {}, "source": [ "## 1. Interactive DataFrame Display\n", "Loading a dataset from BigQuery automatically renders the interactive widget." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "f289d250", "cell_type": "code", - "source": [ - "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n", - "print(df)" - ], - "metadata": {}, "execution_count": 4, + "id": "f289d250", + "metadata": {}, "outputs": [ { "data": { @@ -131,16 +123,17 @@ "[5552452 rows x 5 columns]\n" ] } + ], + "source": [ + "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n", + "print(df)" ] }, { - "id": "220340b0", "cell_type": "code", - "source": [ - "df" - ], - "metadata": {}, "execution_count": 5, + "id": "220340b0", + "metadata": {}, "outputs": [ { "data": { @@ -282,28 +275,25 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "df" ] }, { - "id": "3a73e472", "cell_type": "markdown", + "id": "3a73e472", + "metadata": {}, "source": [ "## 2. Interactive Series Display\n", "BigQuery DataFrames `Series` objects now also support the full interactive widget experience, including pagination and formatting." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "42bb02ab", "cell_type": "code", - "source": [ - "test_series = df[\"year\"]\n", - "# Displaying the series triggers the interactive widget\n", - "print(test_series)" - ], - "metadata": {}, "execution_count": 6, + "id": "42bb02ab", + "metadata": {}, "outputs": [ { "data": { @@ -353,25 +343,26 @@ "[5552452 rows]\n" ] } + ], + "source": [ + "test_series = df[\"year\"]\n", + "# Displaying the series triggers the interactive widget\n", + "print(test_series)" ] }, { - "id": "7bcf1bb7", "cell_type": "markdown", + "id": "7bcf1bb7", + "metadata": {}, "source": [ "Display with Pagination" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "da23e0f3", "cell_type": "code", - "source": [ - "test_series" - ], - "metadata": {}, "execution_count": 7, + "id": "da23e0f3", + "metadata": {}, "outputs": [ { "data": { @@ -413,11 +404,15 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "test_series" ] }, { - "id": "sorting-intro", "cell_type": "markdown", + "id": "sorting-intro", + "metadata": {}, "source": [ "### Sorting by Column(s)\n", "You can sort the table by clicking on the headers of columns that have orderable data types (like numbers, strings, and dates). Non-orderable columns (like arrays or structs) do not have sorting controls.\n", @@ -433,58 +428,42 @@ "- **Shift + Click:** Hold the `Shift` key while clicking additional column headers to add them to the sort order. \n", "- Each column in a multi-sort also cycles through the three states (Ascending, Descending, Unsorted).\n", "- **Indicator visibility:** Sorting indicators (▲, ▼) are always visible for all columns currently included in the sort. The unsorted indicator (●) is only visible when you hover over an unsorted column header." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "adjustable-width-intro", "cell_type": "markdown", + "id": "adjustable-width-intro", + "metadata": {}, "source": [ "### Adjustable Column Widths\n", "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view.\n", "\n", "### Control Maximum Columns\n", "You can control the number of columns displayed in the widget using the **Max columns** dropdown in the footer. This is useful for wide DataFrames where you want to focus on a subset of columns or improve rendering performance. Options include 3, 5, 7, 10, 20, or All." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "bb15bab6", "cell_type": "markdown", + "id": "bb15bab6", + "metadata": {}, "source": [ "Programmatic Navigation Demo" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "programmatic-header", "cell_type": "markdown", + "id": "programmatic-header", + "metadata": {}, "source": [ "## 3. Programmatic Widget Control\n", "You can also instantiate the `TableWidget` directly for more control, such as checking page counts or driving navigation programmatically." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "6920d49b", "cell_type": "code", - "source": [ - "from bigframes.display.anywidget import TableWidget\n", - "import math\n", - " \n", - "# Create widget programmatically \n", - "widget = TableWidget(df)\n", - "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", - " \n", - "# Display the widget\n", - "widget" - ], - "metadata": {}, "execution_count": 8, + "id": "6920d49b", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -508,34 +487,32 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "from bigframes.display.anywidget import TableWidget\n", + "import math\n", + " \n", + "# Create widget programmatically \n", + "widget = TableWidget(df)\n", + "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", + " \n", + "# Display the widget\n", + "widget" ] }, { - "id": "02cbd1be", "cell_type": "markdown", + "id": "02cbd1be", + "metadata": {}, "source": [ "Test Navigation Programmatically" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "12b68f15", "cell_type": "code", - "source": [ - "# Simulate button clicks programmatically\n", - "print(\"Current page:\", widget.page)\n", - "\n", - "# Go to next page\n", - "widget.page = 1\n", - "print(\"After next:\", widget.page)\n", - "\n", - "# Go to previous page\n", - "widget.page = 0\n", - "print(\"After prev:\", widget.page)" - ], - "metadata": {}, "execution_count": 9, + "id": "12b68f15", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -546,30 +523,34 @@ "After prev: 0\n" ] } + ], + "source": [ + "# Simulate button clicks programmatically\n", + "print(\"Current page:\", widget.page)\n", + "\n", + "# Go to next page\n", + "widget.page = 1\n", + "print(\"After next:\", widget.page)\n", + "\n", + "# Go to previous page\n", + "widget.page = 0\n", + "print(\"After prev:\", widget.page)" ] }, { - "id": "9d310138", "cell_type": "markdown", + "id": "9d310138", + "metadata": {}, "source": [ "## 4. Edge Cases\n", "The widget handles small datasets gracefully, disabling unnecessary pagination controls." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "a9d5d13a", "cell_type": "code", - "source": [ - "# Test with very small dataset\n", - "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n", - "small_widget = TableWidget(small_df)\n", - "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", - "small_widget" - ], - "metadata": {}, "execution_count": 10, + "id": "a9d5d13a", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -593,45 +574,38 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Test with very small dataset\n", + "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n", + "small_widget = TableWidget(small_df)\n", + "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", + "small_widget" ] }, { - "id": "added-cell-2", "cell_type": "markdown", + "id": "added-cell-2", + "metadata": {}, "source": [ "### Displaying Generative AI results containing JSON\n", "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "ai-header", "cell_type": "markdown", + "id": "ai-header", + "metadata": {}, "source": [ "## 5. Advanced Data Types (JSON/Structs)\n", "The `AI.GENERATE` function in BigQuery returns results in a JSON column. BigQuery Dataframes automatically handles complex types like JSON strings for display, allowing you to view generative AI results seamlessly." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "added-cell-1", "cell_type": "code", - "source": [ - "bpd.read_gbq(\"\"\"\n", - " SELECT\n", - " AI.GENERATE(\n", - " prompt=>(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"us.conn\")), \"r\")),\n", - " connection_id=>\"bigframes-dev.us.bigframes-default-connection\",\n", - " output_schema=>\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n", - " *\n", - " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", - " LIMIT 5;\n", - "\"\"\")" - ], - "metadata": {}, "execution_count": 11, + "id": "added-cell-1", + "metadata": {}, "outputs": [ { "data": { @@ -836,6 +810,18 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "bpd._read_gbq_colab(\"\"\"\n", + " SELECT\n", + " AI.GENERATE(\n", + " prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n", + " connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n", + " output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n", + " *\n", + " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", + " LIMIT 5;\n", + "\"\"\")" ] } ], @@ -858,6 +844,6 @@ "version": "3.13.0" } }, - "nbformat_minor": 5, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index ce0be03197a8..b25e2b556e65 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -1,772 +1,732 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "7add2e44", - "metadata": { - "id": "XZpKUoHjXw3_" - }, - "outputs": [], - "source": [ - "# Copyright 2026 Google LLC\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "id": "ee509844", - "metadata": { - "id": "SEKzWP6jW9Oj" - }, - "source": [ - "# Analyzing movie posters with BigQuery Dataframe AI functions" - ] - }, - { - "cell_type": "markdown", - "id": "81b8de8d", - "metadata": {}, - "source": [ - "\n", - "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", - " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", - " Open in BQ Studio\n", - " \n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "256b6c02", - "metadata": { - "id": "c9CCKXG5XTb-" - }, - "source": [ - "BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old\n", - "movie posters. These posters are images stored in a public Google Cloud Storage bucket: `gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters`" - ] - }, - { - "cell_type": "markdown", - "id": "3f71d3cb", - "metadata": { - "id": "CUJDa_7MPbL9" - }, - "source": [ - "## Set up" - ] - }, - { - "cell_type": "markdown", - "id": "547145f5", - "metadata": { - "id": "D3iYtBSkYpCK" - }, - "source": [ - "Before you begin, you need to\n", - "\n", - "* Set up your permissions for generative AI functions with [these instructions](https://docs.cloud.google.com/bigquery/docs/permissions-for-ai-functions)\n", - "* Set up your Cloud Resource connection by following [these instructions](https://docs.cloud.google.com/bigquery/docs/create-cloud-resource-connection)\n", - "\n", - "Once you have the permissions set up, import the `bigframes.pandas` package, and\n", - "set your cloud project ID." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d9cd6da8", - "metadata": { - "id": "6nqoRHYbPAx3" - }, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "MY_RPOJECT_ID = \"bigframes-dev\" # @param {type:\"string\"}\n", - "LOCATION = \"us\" # @param {type:\"string\"}\n", - "\n", - "bpd.options.bigquery.project = MY_RPOJECT_ID\n", - "bpd.options.bigquery.location = LOCATION" - ] - }, - { - "cell_type": "markdown", - "id": "015a63c1", - "metadata": { - "id": "2XHcNHtvPhNW" - }, - "source": [ - "## Load data" - ] - }, - { - "cell_type": "markdown", - "id": "254561e0", - "metadata": { - "id": "eS-9A7DijfoQ" - }, - "source": [ - "First, you load the data from the GCS bucket to a BigQuery Dataframe with the `from_glob_path` method:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "47acbbfe", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "ZNPzFjCyPap0", - "outputId": "346d20b2-d615-4094-d24e-2d40e5c90ee2" - }, - "outputs": [ + "cells": [ { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "XZpKUoHjXw3_" + }, + "outputs": [], + "source": [ + "# Copyright 2026 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "SEKzWP6jW9Oj" + }, + "source": [ + "# Analyzing movie posters with BigQuery Dataframe AI functions" + ] }, { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in 16 seconds of slot time.\n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", + " View on GitHub\n", + " \n", + " \n", + " \n", + " \"BQ\n", + " Open in BQ Studio\n", + " \n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in 9 seconds of slot time.\n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "c9CCKXG5XTb-" + }, + "source": [ + "BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old\n", + "movie posters. These posters are images stored in a public Google Cloud Storage bucket: `gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters`" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
poster
0
" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "CUJDa_7MPbL9" + }, + "source": [ + "## Set up" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Replace with your own connection name.\n", - "MY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\n", - "FULL_CONNECTION_ID = f\"{MY_RPOJECT_ID}.{LOCATION}.{MY_CONNECTION}\"\n", - "\n", - "import gcsfs\n", - "import bigframes\n", - "import bigframes.pandas as bpd\n", - "import bigframes.bigquery as bbq\n", - "import json\n", - "from IPython.display import HTML, display\n", - "\n", - "session = bpd.get_global_session()\n", - "\n", - "# Configure global display parameters \n", - "bigframes.options.display.blob_display_width = 200\n", - "\n", - "def get_runtime_json_str(series, mode=\"R\", with_metadata=False):\n", - " s = bbq.obj.fetch_metadata(series) if with_metadata else series\n", - " runtime = bbq.obj.get_access_url(s, mode=mode)\n", - " return bbq.to_json_string(runtime)\n", - "\n", - "def get_read_url(series):\n", - " runtime = bbq.obj.get_access_url(series, mode=\"R\")\n", - " return bbq.json_value(runtime, \"$.access_urls.read_url\")\n", - "\n", - "def render_images(df):\n", - " \"\"\"Helper to display BigFrames DataFrame with rendered image previews.\"\"\"\n", - " from bigframes import dtypes\n", - " if isinstance(df, bpd.Series):\n", - " df = df.to_frame()\n", - " \n", - " object_cols = [col for col, dtype in zip(df.columns, df.dtypes) if dtype == dtypes.OBJ_REF_DTYPE]\n", - " if not object_cols:\n", - " display(df)\n", - " return\n", - "\n", - " limit = bigframes.options.display.max_rows or 10\n", - " view_df = df.head(limit)\n", - " runtime_cols = {\n", - " col: get_runtime_json_str(view_df[col], mode=\"R\", with_metadata=False) \n", - " for col in object_cols\n", - " }\n", - " \n", - " pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()\n", - " final_pd = view_df.to_pandas()\n", - " width = bigframes.options.display.blob_display_width or 200\n", - " \n", - " def format_cell_html(raw_json):\n", - " if not raw_json: return \"\"\n", - " try:\n", - " obj_rt = json.loads(raw_json)\n", - " if \"access_urls\" not in obj_rt: return \"Error fetching URL\"\n", - " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", - " url = obj_rt[\"access_urls\"][\"read_url\"]\n", - " if str(uri).lower().endswith((\".png\", \".jpg\", \".jpeg\", \".webp\")):\n", - " return f''\n", - " return f'{uri}'\n", - " except: return \"Format Error\"\n", - "\n", - " for col in object_cols:\n", - " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", - " display(HTML(final_pd.to_html(escape=False)))\n", - "\n", - "# List files using gcsfs\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\")\n", - "\n", - "# Ensure URIs have gs:// prefix\n", - "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", - "\n", - "# Read the URIs into a BigQuery DataFrame\n", - "movies = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column using the fully qualified connection ID\n", - "movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=FULL_CONNECTION_ID)\n", - "movies = movies[['poster']]\n", - "render_images(movies.head(1))" - ] - }, - { - "cell_type": "markdown", - "id": "f1096d2f", - "metadata": { - "id": "EfkdDH08QnYw" - }, - "source": [ - "## Extract titles from posters" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "bb30d47c", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 }, - "id": "6CoZZ5tSQm1r", - "outputId": "1b3915ce-eb83-4be9-b1c1-d9a326dc9408" - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "D3iYtBSkYpCK" + }, + "source": [ + "Before you begin, you need to\n", + "\n", + "* Set up your permissions for generative AI functions with [these instructions](https://docs.cloud.google.com/bigquery/docs/permissions-for-ai-functions)\n", + "* Set up your Cloud Resource connection by following [these instructions](https://docs.cloud.google.com/bigquery/docs/create-cloud-resource-connection)\n", + "\n", + "Once you have the permissions set up, import the `bigframes.pandas` package, and\n", + "set your cloud project ID." + ] }, { - "data": { - "text/html": [ - "\n", - " Query started with request ID bigframes-dev:US.dc0385a0-1910-4dc4-b090-19d92db9bbcb.
SQL
WITH `bfcte_0` AS (\n",
-       "  SELECT\n",
-       "    *\n",
-       "  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/au_secours.jpeg',\n",
-       "    0,\n",
-       "    0\n",
-       "  ), STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/barque_sortant_du_port.jpeg',\n",
-       "    1,\n",
-       "    1\n",
-       "  ), STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/battling_butler.jpg',\n",
-       "    2,\n",
-       "    2\n",
-       "  ), STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/brown_of_harvard.jpeg',\n",
-       "    3,\n",
-       "    3\n",
-       "  ), STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/der_student_von_prag.jpg',\n",
-       "    4,\n",
-       "    4\n",
-       "  )])\n",
-       ")\n",
-       "SELECT\n",
-       "  `bfcol_1` AS `bfuid_col_20`,\n",
-       "  TO_JSON_STRING(\n",
-       "    OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R')\n",
-       "  ) AS `bfuid_col_24`\n",
-       "FROM `bfcte_0`\n",
-       "ORDER BY\n",
-       "  `bfcol_2` ASC NULLS LAST\n",
-       "LIMIT 1
\n", - " " - ], - "text/plain": [ - "" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6nqoRHYbPAx3" + }, + "outputs": [], + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "MY_RPOJECT_ID = \"bigframes-dev\" # @param {type:\"string\"}\n", + "\n", + "bpd.options.bigquery.project = MY_RPOJECT_ID" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in 44 seconds of slot time. [Job bigframes-dev:US.job_3KY0bZD8ZOVtXa1mDZrw6FBieAZk details]\n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "2XHcNHtvPhNW" + }, + "source": [ + "## Load data" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitle
0The movie title for this poster image is **Au secours!**
" - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "eS-9A7DijfoQ" + }, + "source": [ + "First, you load the data from the GCS bucket to a BigQuery Dataframe with the `from_glob_path` method:" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "movies['title'] = bbq.ai.generate(\n", - " (\"What is the movie title for this poster image?\", get_read_url(movies['poster']))\n", - ").struct.field(\"result\")\n", - "render_images(movies.head(1))" - ] - }, - { - "cell_type": "markdown", - "id": "eb9eb261", - "metadata": { - "id": "cFQHQ9S2lr6t" - }, - "source": [ - "Notice that `ai.generate()` has a `struct` return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, `\"result\"` will be the default name. You can access LLM response content with the struct accessor (e.g. `my_response.struct.filed(\"result\")`);." - ] - }, - { - "cell_type": "markdown", - "id": "ea29eb21", - "metadata": { - "id": "R8kkUhgoS5Xz" - }, - "source": [ - "## Get movie release year\n", - "\n", - "In the example below, you will use `ai.generate_int()` to find the release year for each movie poster:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "bf426247", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 976 }, - "id": "cKZdHq0XS1iW", - "outputId": "72cbad57-4518-4e1e-97bb-333d424dba73" - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in a minute of slot time. [Job bigframes-dev:US.cdbe8ee8-3e39-4cb3-aaf8-060419f5b58a details]\n", - " " + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "ZNPzFjCyPap0", + "outputId": "346d20b2-d615-4094-d24e-2d40e5c90ee2" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", + " _global_session = bigframes.session.connect(\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 1.2 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poster
0
\n", + "

1 rows × 1 columns

\n", + "
[1 rows x 1 columns in total]" + ], + "text/plain": [ + " poster\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0...\n", + "\n", + "[1 rows x 1 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - "" + "source": [ + "# Replace with your own connection name.\n", + "MY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\n", + "\n", + "movies = bpd.from_glob_path(\n", + " \"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\",\n", + " connection = MY_CONNECTION,\n", + " name='poster')\n", + "movies.head(1)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/html": [ - "\n", - " Query processed 347 Bytes in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "EfkdDH08QnYw" + }, + "source": [ + "## Extract titles from posters" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
0{'uri': 'gs://cloud-samples-data/vertex-ai/dat...The movie title for the poster image is **Au S...1924
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "6CoZZ5tSQm1r", + "outputId": "1b3915ce-eb83-4be9-b1c1-d9a326dc9408" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 1.2 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitle
0Der Student von Prag
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" + ], + "text/plain": [ + " poster title\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag\n", + "\n", + "[1 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - " poster \\\n", - "0 {'uri': 'gs://cloud-samples-data/vertex-ai/dat... \n", - "\n", - " title year \n", - "0 The movie title for the poster image is **Au S... 1924 \n", - "\n", - "[1 rows x 3 columns]" + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "movies['title'] = bbq.ai.generate(\n", + " (\"What is the movie title for this poster? Name only\", movies['poster']),\n", + " endpoint='gemini-2.5-pro'\n", + ").struct.field(\"result\")\n", + "movies.head(1)" ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies['year'] = bbq.ai.generate_int(\n", - " (\"What is the release year for this movie?\", movies['title']),\n", - " endpoint='gemini-2.5-pro'\n", - ").struct.field(\"result\")\n", - "\n", - "movies.head(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "8bf12352", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 250 }, - "id": "yqRiNRY8_8fs", - "outputId": "efa60107-6883-4f5c-8e40-43c7287ea7fb" - }, - "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "cFQHQ9S2lr6t" + }, + "source": [ + "Notice that `ai.generate()` has a `struct` return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, `\"result\"` will be the default name. You can access LLM response content with the struct accessor (e.g. `my_response.struct.filed(\"result\")`);." + ] }, { - "data": { - "text/plain": [ - "poster structJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
0Der Student von Prag1913
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" + ], + "text/plain": [ + " poster title \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag \n", + "\n", + " year \n", + "0 1913 \n", + "\n", + "[1 rows x 3 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies['year'] = bbq.ai.generate_int(\n", + " (\"What is the release year for this movie?\", movies['title']),\n", + " endpoint='gemini-2.5-pro'\n", + ").struct.field(\"result\")\n", + "\n", + "movies.head(1)" + ] }, { - "data": { - "text/html": [ - "\n", - " Query started with request ID bigframes-dev:US.0d2028fb-8d80-4b0c-835f-e29362686899.
SQL
WITH `bfcte_0` AS (\n",
-       "  SELECT\n",
-       "    *\n",
-       "  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/au_secours.jpeg',\n",
-       "    0,\n",
-       "    0\n",
-       "  ), STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/barque_sortant_du_port.jpeg',\n",
-       "    1,\n",
-       "    1\n",
-       "  ), STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/battling_butler.jpg',\n",
-       "    2,\n",
-       "    2\n",
-       "  ), STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/brown_of_harvard.jpeg',\n",
-       "    3,\n",
-       "    3\n",
-       "  ), STRUCT(\n",
-       "    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/der_student_von_prag.jpg',\n",
-       "    4,\n",
-       "    4\n",
-       "  )])\n",
-       ")\n",
-       "SELECT\n",
-       "  `bfcol_1` AS `bfuid_col_52`,\n",
-       "  TO_JSON_STRING(\n",
-       "    OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R')\n",
-       "  ) AS `bfuid_col_58`\n",
-       "FROM `bfcte_0`\n",
-       "WHERE\n",
-       "  AI.IF(\n",
-       "    prompt => (\n",
-       "      'The movie ',\n",
-       "      AI.GENERATE(\n",
-       "        prompt => (\n",
-       "          'What is the movie title for this poster image?',\n",
-       "          JSON_VALUE(\n",
-       "            OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R'),\n",
-       "            '$.access_urls.read_url'\n",
-       "          )\n",
-       "        ),\n",
-       "        request_type => 'UNSPECIFIED'\n",
-       "      ).`result`,\n",
-       "      ' was made in US'\n",
-       "    )\n",
-       "  )\n",
-       "ORDER BY\n",
-       "  `bfcol_2` ASC NULLS LAST\n",
-       "LIMIT 1
\n", - " " + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 250 + }, + "id": "yqRiNRY8_8fs", + "outputId": "efa60107-6883-4f5c-8e40-43c7287ea7fb" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", + "

" + ], + "text/plain": [ + "poster struct" + "source": [ + "movies.dtypes" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in 3 minutes of slot time. [Job bigframes-dev:US.job_pEC4qGIM1vr98oTcLjp-HYQ6R9h_ details]\n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "0WwbiMtdTXt5" + }, + "source": [ + "## Filter movie by production country\n", + "\n", + "In the next example, you will use `ai.if_()` to find the movies that were produced in the USA." ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
3NaNThe movie title is **Brown of Harvard**.1926
" + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "xTE8dj3LThy6", + "outputId": "941e04d8-9f24-4309-a59e-35e8740c9c54" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in 6 minutes of slot time. [Job bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " Query processed 1.2 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
8Shoulder Arms1918
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" + ], + "text/plain": [ + " poster title year\n", + "8 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Shoulder Arms 1918\n", + "\n", + "[1 rows x 3 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } ], - "text/plain": [ - "" + "source": [ + "us_movies = movies[bbq.ai.if_(\n", + " (\"The movie \", movies['title'], \" was made in US\")\n", + ")]\n", + "us_movies.head(1)" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "us_movies = movies[bbq.ai.if_(\n", - " (\"The movie \", movies['title'], \" was made in US\")\n", - ")]\n", - "render_images(us_movies.head(1))" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb b/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb index 1a7de9b837f9..1c2e2b53a830 100644 --- a/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb +++ b/packages/bigframes/notebooks/kaggle/describe-product-images-with-bigframes-multimodal.ipynb @@ -1,1131 +1 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "876eb80c", - "metadata": { - "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", - "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" - }, - "source": [ - "# Describe product images with BigFrames multimodal DataFrames\n", - "\n", - "Based on notebook at https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\n", - "\n", - "This notebook is introducing BigFrames Multimodal features:\n", - "\n", - "1. Create Multimodal DataFrame\n", - "2. Combine unstructured data with structured data\n", - "3. Conduct image transformations\n", - "4. Use LLM models to ask questions and generate embeddings on images\n", - "5. PDF chunking function\n", - "\n", - "Install the bigframes package and upgrade other packages that are already included in Kaggle but have versions incompatible with bigframes." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "0506e15e", - "metadata": { - "trusted": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: bigframes in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (2.39.0)\n", - "Requirement already satisfied: google-cloud-automl in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (2.19.0)\n", - "Requirement already satisfied: google-cloud-translate in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (3.26.0)\n", - "Requirement already satisfied: google-ai-generativelanguage in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (0.11.0)\n", - "Requirement already satisfied: tensorflow in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (2.21.0)\n", - "Requirement already satisfied: cloudpickle>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (3.1.2)\n", - "Requirement already satisfied: fsspec>=2023.3.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2026.1.0)\n", - "Requirement already satisfied: gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2026.1.0)\n", - "Requirement already satisfied: geopandas>=0.12.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.1.3)\n", - "Requirement already satisfied: google-auth<3.0,>=2.15.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.49.1)\n", - "Requirement already satisfied: google-cloud-bigquery>=3.36.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-bigquery[bqstorage,pandas]>=3.36.0->bigframes) (3.41.0)\n", - "Requirement already satisfied: google-cloud-bigquery-storage<3.0.0,>=2.30.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.37.0)\n", - "Requirement already satisfied: google-cloud-functions>=1.12.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.23.0)\n", - "Requirement already satisfied: google-cloud-bigquery-connection>=1.12.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.21.0)\n", - "Requirement already satisfied: google-cloud-resource-manager>=1.10.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.17.0)\n", - "Requirement already satisfied: google-cloud-storage>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (3.10.1)\n", - "Requirement already satisfied: google-crc32c<2.0.0,>=1.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.8.0)\n", - "Requirement already satisfied: grpc-google-iam-v1>=0.14.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (0.14.4)\n", - "Requirement already satisfied: numpy>=1.24.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.4.4)\n", - "Requirement already satisfied: pandas>=1.5.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.3.3)\n", - "Requirement already satisfied: pandas-gbq>=0.26.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (0.34.1)\n", - "Requirement already satisfied: pyarrow>=15.0.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (21.0.0)\n", - "Requirement already satisfied: pydata-google-auth>=1.8.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.9.1)\n", - "Requirement already satisfied: requests>=2.27.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.33.1)\n", - "Requirement already satisfied: shapely>=1.8.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.1.2)\n", - "Requirement already satisfied: tabulate>=0.9 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (0.10.0)\n", - "Requirement already satisfied: humanize>=4.6.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (4.15.0)\n", - "Requirement already satisfied: matplotlib>=3.7.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (3.10.8)\n", - "Requirement already satisfied: db-dtypes>=1.4.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.5.1)\n", - "Requirement already satisfied: pyiceberg>=0.7.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (0.11.1)\n", - "Requirement already satisfied: atpublic<6,>=2.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (5.1)\n", - "Requirement already satisfied: python-dateutil<3,>=2.8.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2022.7 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (2026.1.post1)\n", - "Requirement already satisfied: toolz<2,>=0.11 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (1.1.0)\n", - "Requirement already satisfied: typing-extensions<5,>=4.5.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (4.15.0)\n", - "Requirement already satisfied: rich<14,>=12.4.4 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from bigframes) (13.9.4)\n", - "Requirement already satisfied: google-api-core<3.0.0,>=2.11.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-api-core[grpc]<3.0.0,>=2.11.0->google-cloud-automl) (2.30.2)\n", - "Requirement already satisfied: grpcio<2.0.0,>=1.33.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-automl) (1.80.0)\n", - "Requirement already satisfied: proto-plus<2.0.0,>=1.22.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-automl) (1.27.2)\n", - "Requirement already satisfied: protobuf<8.0.0,>=4.25.8 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-automl) (6.33.6)\n", - "Requirement already satisfied: google-cloud-core<3.0.0,>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-translate) (2.5.1)\n", - "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (2.4.0)\n", - "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (1.6.3)\n", - "Requirement already satisfied: flatbuffers>=25.9.23 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (25.12.19)\n", - "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (0.7.0)\n", - "Requirement already satisfied: google_pasta>=0.1.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (0.2.0)\n", - "Requirement already satisfied: libclang>=13.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (18.1.1)\n", - "Requirement already satisfied: opt_einsum>=2.3.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (3.4.0)\n", - "Requirement already satisfied: packaging in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (26.0)\n", - "Requirement already satisfied: setuptools in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (82.0.1)\n", - "Requirement already satisfied: six>=1.12.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (1.17.0)\n", - "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (3.3.0)\n", - "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (2.1.2)\n", - "Requirement already satisfied: keras>=3.12.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (3.14.0)\n", - "Requirement already satisfied: h5py<3.15.0,>=3.11.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (3.14.0)\n", - "Requirement already satisfied: ml_dtypes<1.0.0,>=0.5.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from tensorflow) (0.5.4)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from astunparse>=1.6.0->tensorflow) (0.47.0)\n", - "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (3.13.5)\n", - "Requirement already satisfied: decorator>4.1.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (5.2.1)\n", - "Requirement already satisfied: google-auth-oauthlib in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.3.1)\n", - "Requirement already satisfied: google-cloud-storage-control in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.11.0)\n", - "Requirement already satisfied: pyogrio>=0.7.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from geopandas>=0.12.2->bigframes) (0.12.1)\n", - "Requirement already satisfied: pyproj>=3.5.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from geopandas>=0.12.2->bigframes) (3.7.2)\n", - "Requirement already satisfied: googleapis-common-protos<2.0.0,>=1.63.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-api-core<3.0.0,>=2.11.0->google-api-core[grpc]<3.0.0,>=2.11.0->google-cloud-automl) (1.74.0)\n", - "Requirement already satisfied: grpcio-status<2.0.0,>=1.33.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-api-core[grpc]<3.0.0,>=2.11.0->google-cloud-automl) (1.80.0)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-auth<3.0,>=2.15.0->bigframes) (0.4.2)\n", - "Requirement already satisfied: cryptography>=38.0.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-auth<3.0,>=2.15.0->bigframes) (46.0.7)\n", - "Requirement already satisfied: google-resumable-media<3.0.0,>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-cloud-bigquery>=3.36.0->google-cloud-bigquery[bqstorage,pandas]>=3.36.0->bigframes) (2.8.2)\n", - "Requirement already satisfied: namex in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from keras>=3.12.0->tensorflow) (0.1.0)\n", - "Requirement already satisfied: optree in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from keras>=3.12.0->tensorflow) (0.19.0)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (1.3.3)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (4.62.1)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (1.5.0)\n", - "Requirement already satisfied: pillow>=8 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (12.2.0)\n", - "Requirement already satisfied: pyparsing>=3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from matplotlib>=3.7.1->bigframes) (3.3.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pandas>=1.5.3->bigframes) (2026.1)\n", - "Requirement already satisfied: psutil>=5.9.8 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pandas-gbq>=0.26.1->bigframes) (7.2.2)\n", - "Requirement already satisfied: mmh3<6.0.0,>=4.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (5.2.1)\n", - "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (8.3.2)\n", - "Requirement already satisfied: strictyaml<2.0.0,>=1.7.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (1.7.3)\n", - "Requirement already satisfied: pydantic!=2.12.0,!=2.12.1,!=2.4.0,!=2.4.1,<3.0,>=2.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (2.12.5)\n", - "Requirement already satisfied: tenacity<10.0.0,>=8.2.3 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (9.1.4)\n", - "Requirement already satisfied: pyroaring<2.0.0,>=1.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (1.0.4)\n", - "Requirement already satisfied: cachetools<7.0,>=5.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (6.2.6)\n", - "Requirement already satisfied: zstandard<1.0.0,>=0.13.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyiceberg>=0.7.1->bigframes) (0.25.0)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests>=2.27.1->bigframes) (3.4.7)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests>=2.27.1->bigframes) (3.11)\n", - "Requirement already satisfied: urllib3<3,>=1.26 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests>=2.27.1->bigframes) (2.6.3)\n", - "Requirement already satisfied: certifi>=2023.5.7 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests>=2.27.1->bigframes) (2026.2.25)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from rich<14,>=12.4.4->bigframes) (4.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from rich<14,>=12.4.4->bigframes) (2.20.0)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (2.6.1)\n", - "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.4.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (26.1.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.8.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (6.7.1)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (0.4.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (1.23.0)\n", - "Requirement already satisfied: cffi>=2.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from cryptography>=38.0.3->google-auth<3.0,>=2.15.0->bigframes) (2.0.0)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from google-auth-oauthlib->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (2.0.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich<14,>=12.4.4->bigframes) (0.1.2)\n", - "Requirement already satisfied: pyasn1<0.7.0,>=0.6.1 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0,>=2.15.0->bigframes) (0.6.3)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pydantic!=2.12.0,!=2.12.1,!=2.4.0,!=2.4.1,<3.0,>=2.0->pyiceberg>=0.7.1->bigframes) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.41.5 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pydantic!=2.12.0,!=2.12.1,!=2.4.0,!=2.4.1,<3.0,>=2.0->pyiceberg>=0.7.1->bigframes) (2.41.5)\n", - "Requirement already satisfied: typing-inspection>=0.4.2 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from pydantic!=2.12.0,!=2.12.1,!=2.4.0,!=2.4.1,<3.0,>=2.0->pyiceberg>=0.7.1->bigframes) (0.4.2)\n", - "Requirement already satisfied: pycparser in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from cffi>=2.0.0->cryptography>=38.0.3->google-auth<3.0,>=2.15.0->bigframes) (3.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/.venv/lib/python3.13/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs!=2025.5.0,!=2026.2.0,!=2026.3.0,>=2023.3.0->bigframes) (3.3.1)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ] - }, - { - "cell_type": "markdown", - "id": "c749e07c", - "metadata": {}, - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "5e00777d", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:17:14.873201Z", - "iopub.status.busy": "2025-08-18T20:17:14.872905Z", - "iopub.status.idle": "2025-08-18T20:17:14.946971Z", - "shell.execute_reply": "2025-08-18T20:17:14.945996Z", - "shell.execute_reply.started": "2025-08-18T20:17:14.873171Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Not running on Kaggle, skipping Kaggle secrets initialization.\n" - ] - } - ], - "source": [ - "try:\n", - " from kaggle_secrets import UserSecretsClient\n", - " user_secrets = UserSecretsClient()\n", - " user_credential = user_secrets.get_gcloud_credential()\n", - " user_secrets.set_tensorflow_credential(user_credential)\n", - " print(\"Successfully authenticated using Kaggle secrets.\")\n", - "except ImportError:\n", - " print(\"Not running on Kaggle, skipping Kaggle secrets initialization.\")\n", - "except Exception as e:\n", - " print(f\"Could not initialize Kaggle secrets: {e}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b2e171de", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:17:25.574192Z", - "iopub.status.busy": "2025-08-18T20:17:25.573874Z", - "iopub.status.idle": "2025-08-18T20:17:45.102002Z", - "shell.execute_reply": "2025-08-18T20:17:45.101140Z", - "shell.execute_reply.started": "2025-08-18T20:17:25.574168Z" - }, - "trusted": true - }, - "outputs": [], - "source": [ - "PROJECT = \"bigframes-dev\" # replace with your project. \n", - "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", - "\n", - "LOCATION = \"us\" # replace with your location.\n", - "DATASET_ID = \"bigframes_samples\" # replace with your dataset ID.\n", - "OUTPUT_BUCKET = \"bigframes_blob_test\" # replace with your GCS bucket. \n", - "\n", - "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", - "\n", - "import bigframes\n", - "# Setup project\n", - "bigframes.options.bigquery.project = PROJECT\n", - "bigframes.options.bigquery.location = LOCATION\n", - "\n", - "# Display options\n", - "bigframes.options.display.blob_display_width = 300\n", - "bigframes.options.display.progress_bar = None\n", - "\n", - "import bigframes.pandas as bpd\n", - "import bigframes.bigquery as bbq\n", - "\n", - "def get_runtime_json_str(series, mode=\"R\", with_metadata=False):\n", - " \"\"\"Get runtime JSON from objectref.\"\"\"\n", - " s = bbq.obj.fetch_metadata(series) if with_metadata else series\n", - " runtime = bbq.obj.get_access_url(s, mode=mode)\n", - " return bbq.to_json_string(runtime)\n", - "\n", - "def get_metadata(series):\n", - " metadata_obj = bbq.obj.fetch_metadata(series)\n", - " return bbq.json_query(metadata_obj.struct.field(\"details\"), \"$.gcs_metadata\")\n", - "\n", - "def get_content_type(series):\n", - " return bbq.json_value(get_metadata(series), \"$.content_type\")\n", - "\n", - "def get_size(series):\n", - " return bbq.json_value(get_metadata(series), \"$.size\").astype(\"Int64\")\n", - "\n", - "def get_updated(series):\n", - " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)\n", - "\n", - "from IPython.display import HTML, display\n", - "\n", - "def render_images(df):\n", - " \"\"\"Helper to display BigFrames DataFrame with rendered image previews.\"\"\"\n", - " import bigframes.pandas as bpd\n", - " import bigframes.bigquery as bbq\n", - " import bigframes\n", - " from bigframes import dtypes\n", - " import json\n", - " \n", - " if isinstance(df, bpd.Series):\n", - " df = df.to_frame()\n", - " \n", - " object_cols = [\n", - " col for col, dtype in zip(df.columns, df.dtypes)\n", - " if dtype == dtypes.OBJ_REF_DTYPE\n", - " ]\n", - " \n", - " if not object_cols:\n", - " display(df)\n", - " return\n", - "\n", - " limit = bigframes.options.display.max_rows or 10\n", - " view_df = df.head(limit)\n", - " \n", - " runtime_cols = {\n", - " col: get_runtime_json_str(view_df[col], mode=\"R\", with_metadata=False) \n", - " for col in object_cols\n", - " }\n", - " \n", - " pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()\n", - " final_pd = view_df.to_pandas()\n", - " \n", - " width = bigframes.options.display.blob_display_width or 300\n", - " IMAGE_EXTENSIONS = (\".png\", \".jpg\", \".jpeg\", \".gif\", \".webp\")\n", - " \n", - " def format_cell_html(raw_json):\n", - " if not raw_json:\n", - " return \"\"\n", - " try:\n", - " obj_rt = json.loads(raw_json)\n", - " if \"access_urls\" not in obj_rt:\n", - " err = obj_rt.get(\"errors\", [{\"message\": \"URL Generation Failed\"}])[0].get(\"message\")\n", - " return f'Error: {err}'\n", - " \n", - " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", - " url = obj_rt[\"access_urls\"][\"read_url\"]\n", - " \n", - " if uri and str(uri).lower().endswith(IMAGE_EXTENSIONS):\n", - " return f''\n", - " \n", - " return f'{uri if uri else \"view\"}'\n", - " except:\n", - " return \"Format Error\"\n", - "\n", - " for col in object_cols:\n", - " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", - " \n", - " display(HTML(final_pd.to_html(escape=False)))" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "d17afaf1", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:17:45.103530Z", - "iopub.status.busy": "2025-08-18T20:17:45.103249Z", - "iopub.status.idle": "2025-08-18T20:17:47.424586Z", - "shell.execute_reply": "2025-08-18T20:17:47.423762Z", - "shell.execute_reply.started": "2025-08-18T20:17:45.103499Z" - }, - "trusted": true - }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", - "# List files using gcsfs (public bucket)\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", - "\n", - "# Ensure URIs have gs:// prefix\n", - "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", - "\n", - "# Read the URIs into a BigQuery DataFrame using UNNEST\n", - "# We take the first 5 for this example\n", - "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df_image = df_image[['image']]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "3e84b922", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:17:47.425873Z", - "iopub.status.busy": "2025-08-18T20:17:47.425578Z", - "iopub.status.idle": "2025-08-18T20:18:07.919961Z", - "shell.execute_reply": "2025-08-18T20:18:07.918942Z", - "shell.execute_reply.started": "2025-08-18T20:17:47.425844Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
image
0
1
2
3
4
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", - "df_image = df_image.head(5)\n", - "render_images(df_image)" - ] - }, - { - "cell_type": "markdown", - "id": "b0eaa73c", - "metadata": {}, - "source": [ - "# 2. Combine unstructured data with structured data\n", - "\n", - "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "7d64fb54", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:18:07.922593Z", - "iopub.status.busy": "2025-08-18T20:18:07.921884Z", - "iopub.status.idle": "2025-08-18T20:18:35.549725Z", - "shell.execute_reply": "2025-08-18T20:18:35.548942Z", - "shell.execute_reply.started": "2025-08-18T20:18:07.922551Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdated
0aliceimage/png7157662025-03-20 17:44:38+00:00
1bobimage/png11674062025-03-20 17:44:38+00:00
2bobimage/png11508922025-03-20 17:44:39+00:00
3aliceimage/png17365332025-03-20 17:44:39+00:00
4bobimage/png4397402025-03-20 17:44:39+00:00
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Combine unstructured data with structured data\n", - "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", - "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", - "df_image[\"size\"] = get_size(df_image[\"image\"])\n", - "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "render_images(df_image)" - ] - }, - { - "cell_type": "markdown", - "id": "a23ef0e4", - "metadata": {}, - "source": [ - "Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ce102df0", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:18:55.300314Z", - "iopub.status.busy": "2025-08-18T20:18:55.299993Z", - "iopub.status.idle": "2025-08-18T20:19:09.154492Z", - "shell.execute_reply": "2025-08-18T20:19:09.153315Z", - "shell.execute_reply.started": "2025-08-18T20:18:55.300289Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdated
0aliceimage/png7157662025-03-20 17:44:38+00:00
3aliceimage/png17365332025-03-20 17:44:39+00:00
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# filter images and display, you can also display audio and video types\n", - "filtered_df = df_image[df_image[\"author\"] == \"alice\"]\n", - "render_images(filtered_df)" - ] - }, - { - "cell_type": "markdown", - "id": "db2b3b12", - "metadata": {}, - "source": [ - "# 3. Conduct image transformations\n", - "\n", - "BigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "283036f5", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:19:22.950652Z", - "iopub.status.busy": "2025-08-18T20:19:22.950277Z", - "iopub.status.idle": "2025-08-18T20:31:51.799997Z", - "shell.execute_reply": "2025-08-18T20:31:51.798840Z", - "shell.execute_reply.started": "2025-08-18T20:19:22.950625Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/pandas/__init__.py:211: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:4695: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageblurred
0
1
2
3
4
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "@bpd.udf(\n", - " input_types=[str, str, int, int],\n", - " output_type=str,\n", - " dataset=DATASET_ID,\n", - " name=\"image_blur_kaggle\",\n", - " bigquery_connection=FULL_CONNECTION_ID,\n", - " packages=[\"opencv-python-headless\", \"numpy\", \"requests\"],\n", - ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", - " import json\n", - " import cv2 as cv\n", - " import numpy as np\n", - " import requests\n", - " \n", - " src_obj = json.loads(src_rt)\n", - " if \"access_urls\" not in src_obj:\n", - " raise ValueError(f\"Missing 'access_urls' in source object. Response: {src_obj}\")\n", - " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", - " \n", - " response = requests.get(src_url, timeout=30)\n", - " response.raise_for_status()\n", - " \n", - " img = cv.imdecode(np.frombuffer(response.content, np.uint8), cv.IMREAD_UNCHANGED)\n", - " if img is None:\n", - " raise ValueError(\"cv.imdecode failed\")\n", - " \n", - " img_blurred = cv.blur(img, ksize=(int(kx), int(ky)))\n", - " success, encoded = cv.imencode(\".jpeg\", img_blurred)\n", - " \n", - " if not success:\n", - " raise ValueError(\"cv.imencode failed\")\n", - " \n", - " if dst_rt: # GCS Output Mode\n", - " dst_obj = json.loads(dst_rt)\n", - " if \"access_urls\" not in dst_obj:\n", - " raise ValueError(f\"Missing 'access_urls' in destination object. Response: {dst_obj}\")\n", - " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", - " \n", - " requests.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", - " return dst_obj[\"objectref\"][\"uri\"]\n", - " return \"\"\n", - "\n", - "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", - " import os\n", - " dst_folder = os.path.join(dst_folder, \"\")\n", - " metadata = bbq.obj.fetch_metadata(series)\n", - " current_uri = metadata.struct.field(\"uri\")\n", - " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", - " \n", - " # Bypass synchronous validation via JSON initialization\n", - " dst_blob_df = bpd.DataFrame({\"uri\": dst_uri})\n", - " dst_blob_df[\"authorizer\"] = FULL_CONNECTION_ID\n", - " dst_blob = bbq.obj.make_ref(bbq.to_json(bbq.struct(dst_blob_df)))\n", - "\n", - " df_transform = bpd.DataFrame({\n", - " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", - " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", - " })\n", - " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(udf, axis=1, args=args)\n", - " \n", - " if verbose:\n", - " return res\n", - " \n", - " res_df = bpd.DataFrame({\"uri\": res})\n", - " res_df[\"authorizer\"] = FULL_CONNECTION_ID\n", - " return bbq.obj.make_ref(bbq.to_json(bbq.struct(res_df)))\n", - "\n", - "# Apply Blur Transformation\n", - "df_image[\"blurred\"] = apply_transformation(\n", - " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n", - " image_blur, 20, 20\n", - ")\n", - "render_images(df_image[[\"image\", \"blurred\"]])" - ] - }, - { - "cell_type": "markdown", - "id": "2d68a468", - "metadata": {}, - "source": [ - "# 4. Use LLM models to ask questions and generate embeddings on images" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "662054a0", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:36:13.954686Z", - "iopub.status.busy": "2025-08-18T20:36:13.954340Z", - "iopub.status.idle": "2025-08-18T20:36:43.225449Z", - "shell.execute_reply": "2025-08-18T20:36:43.224579Z", - "shell.execute_reply.started": "2025-08-18T20:36:13.954661Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", - "default model will be removed in BigFrames 3.0. Please supply an\n", - "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/__init__.py:437: FutureWarning: You are using the BigFrames session default connection: bigframes-\n", - "default-connection, which can be different from the\n", - "BigQuery project default connection. This default\n", - "connection may change in the future.\n", - " warnings.warn(msg, category=FutureWarning)\n" - ] - } - ], - "source": [ - "from bigframes.ml import llm\n", - "gemini = llm.GeminiTextGenerator()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "a31730ff", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:36:43.227798Z", - "iopub.status.busy": "2025-08-18T20:36:43.227457Z", - "iopub.status.idle": "2025-08-18T20:37:25.238649Z", - "shell.execute_reply": "2025-08-18T20:37:25.237623Z", - "shell.execute_reply.started": "2025-08-18T20:36:43.227764Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0Please provide me with the picture! I need to see the image to tell you what the item is and what color the picture is.\\n
1To answer your question accurately, I need you to provide me with the picture you are referring to. Once you provide the picture, I can analyze it and tell you what item is in the picture and what color the picture is.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Ask the same question on the images\n", - "df_image = df_image.head(2)\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", \"what color is the picture?\"])\n", - "render_images(answer[[\"ml_generate_text_llm_result\", \"image\"]])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "f5d2a1ed", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:37:25.239875Z", - "iopub.status.busy": "2025-08-18T20:37:25.239607Z", - "iopub.status.idle": "2025-08-18T20:37:25.263034Z", - "shell.execute_reply": "2025-08-18T20:37:25.262002Z", - "shell.execute_reply.started": "2025-08-18T20:37:25.239847Z" - }, - "trusted": true - }, - "outputs": [], - "source": [ - "# Ask different questions\n", - "df_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "fb67bf8e", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:37:25.264585Z", - "iopub.status.busy": "2025-08-18T20:37:25.264072Z", - "iopub.status.idle": "2025-08-18T20:38:10.129667Z", - "shell.execute_reply": "2025-08-18T20:38:10.128677Z", - "shell.execute_reply.started": "2025-08-18T20:37:25.264518Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a glass aquarium.
1Dark brown
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "render_images(answer_alt[[\"ml_generate_text_llm_result\", \"image\"]])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "0cf33170", - "metadata": { - "execution": { - "iopub.execute_input": "2025-08-18T20:38:10.130851Z", - "iopub.status.busy": "2025-08-18T20:38:10.130617Z", - "iopub.status.idle": "2025-08-18T20:39:04.790416Z", - "shell.execute_reply": "2025-08-18T20:39:04.789398Z", - "shell.execute_reply.started": "2025-08-18T20:38:10.130833Z" - }, - "trusted": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", - "default model will be removed in BigFrames 3.0. Please supply an\n", - "explicit model to avoid this message.\n", - " return method(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/session/__init__.py:437: FutureWarning: You are using the BigFrames session default connection: bigframes-\n", - "default-connection, which can be different from the\n", - "BigQuery project default connection. This default\n", - "connection may change in the future.\n", - " warnings.warn(msg, category=FutureWarning)\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.03416207 0.0419732 -0.0227391 ... -0.03...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-05-02T03:3...
1[ 0.01908903 0.0193082 -0.00221754 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-05-02T03:3...
\n", - "

2 rows × 5 columns

\n", - "
[2 rows x 5 columns in total]" - ], - "text/plain": [ - " ml_generate_embedding_result \\\n", - "0 [ 0.03416207 0.0419732 -0.0227391 ... -0.03... \n", - "1 [ 0.01908903 0.0193082 -0.00221754 ... 0.00... \n", - "\n", - " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \n", - "1 \n", - "\n", - " ml_generate_embedding_end_sec \\\n", - "0 \n", - "1 \n", - "\n", - " content \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-05-02T03:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-05-02T03:3... \n", - "\n", - "[2 rows x 5 columns]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Generate embeddings.\n", - "embed_model = llm.MultimodalEmbeddingGenerator()\n", - "embeddings = embed_model.predict(df_image[\"image\"])\n", - "embeddings" - ] - } - ], - "metadata": { - "kaggle": { - "accelerator": "none", - "dataSources": [ - { - "databundleVersionId": 13391012, - "sourceId": 110281, - "sourceType": "competition" - } - ], - "dockerImageVersionId": 31089, - "isGpuEnabled": false, - "isInternetEnabled": true, - "language": "python", - "sourceType": "notebook" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} +{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":110281,"databundleVersionId":13391012,"sourceType":"competition"}],"dockerImageVersionId":31089,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Describe product images with BigFrames multimodal DataFrames\n\nBased on notebook at https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\n\nThis notebook is introducing BigFrames Multimodal features:\n\n1. Create Multimodal DataFrame\n2. Combine unstructured data with structured data\n3. Conduct image transformations\n4. Use LLM models to ask questions and generate embeddings on images\n5. PDF chunking function\n\nInstall the bigframes package and upgrade other packages that are already included in Kaggle but have versions incompatible with bigframes.","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19"}},{"cell_type":"code","source":"%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow ","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n\nConfigure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then,","metadata":{}},{"cell_type":"code","source":"from kaggle_secrets import UserSecretsClient\nuser_secrets = UserSecretsClient()\nuser_credential = user_secrets.get_gcloud_credential()\nuser_secrets.set_tensorflow_credential(user_credential)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:17:14.872905Z","iopub.execute_input":"2025-08-18T20:17:14.873201Z","iopub.status.idle":"2025-08-18T20:17:14.946971Z","shell.execute_reply.started":"2025-08-18T20:17:14.873171Z","shell.execute_reply":"2025-08-18T20:17:14.945996Z"}},"outputs":[],"execution_count":2},{"cell_type":"code","source":"PROJECT = \"bigframes-dev\" # replace with your project. \n# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n\nOUTPUT_BUCKET = \"bigframes_blob_test\" # replace with your GCS bucket. \n# The connection (or bigframes-default-connection of the project) must have read/write permission to the bucket. \n# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#grant-permissions for setting up connection service account permissions.\n# In this Notebook it uses bigframes-default-connection by default. You can also bring in your own connections in each method.\n\nimport bigframes\n# Setup project\nbigframes.options.bigquery.project = PROJECT\n\n# Display options\nbigframes.options.display.blob_display_width = 300\nbigframes.options.display.progress_bar = None\n\nimport bigframes.pandas as bpd","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:17:25.573874Z","iopub.execute_input":"2025-08-18T20:17:25.574192Z","iopub.status.idle":"2025-08-18T20:17:45.102002Z","shell.execute_reply.started":"2025-08-18T20:17:25.574168Z","shell.execute_reply":"2025-08-18T20:17:45.101140Z"}},"outputs":[],"execution_count":3},{"cell_type":"code","source":"# Create blob columns from wildcard path.\ndf_image = bpd.from_glob_path(\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n)\n# Other ways are: from string uri column\n# df = bpd.DataFrame({\"uri\": [\"gs:///\", \"gs:///\"]})\n# df[\"blob_col\"] = df[\"uri\"].str.to_blob()\n\n# From an existing object table\n# df = bpd.read_gbq_object_table(\"\", name=\"blob_col\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:17:45.103249Z","iopub.execute_input":"2025-08-18T20:17:45.103530Z","iopub.status.idle":"2025-08-18T20:17:47.424586Z","shell.execute_reply.started":"2025-08-18T20:17:45.103499Z","shell.execute_reply":"2025-08-18T20:17:47.423762Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/global_session.py:103: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n _global_session = bigframes.session.connect(\n","output_type":"stream"},{"name":"stdout","text":"Please ensure you have selected a BigQuery account in the Notebook Add-ons menu.\n","output_type":"stream"}],"execution_count":4},{"cell_type":"code","source":"# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\ndf_image = df_image.head(5)\ndf_image","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:17:47.425578Z","iopub.execute_input":"2025-08-18T20:17:47.425873Z","iopub.status.idle":"2025-08-18T20:18:07.919961Z","shell.execute_reply.started":"2025-08-18T20:17:47.425844Z","shell.execute_reply":"2025-08-18T20:18:07.918942Z"}},"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":" image\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n\n[5 rows x 1 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
image
0
1
2
3
4
\n

5 rows × 1 columns

\n
[5 rows x 1 columns in total]"},"metadata":{}}],"execution_count":5},{"cell_type":"markdown","source":"# 2. Combine unstructured data with structured data\n\nNow you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself.","metadata":{}},{"cell_type":"code","source":"# Combine unstructured data with structured data\ndf_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\ndf_image[\"content_type\"] = df_image[\"image\"].blob.content_type()\ndf_image[\"size\"] = df_image[\"image\"].blob.size()\ndf_image[\"updated\"] = df_image[\"image\"].blob.updated()\ndf_image","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:18:07.921884Z","iopub.execute_input":"2025-08-18T20:18:07.922593Z","iopub.status.idle":"2025-08-18T20:18:35.549725Z","shell.execute_reply.started":"2025-08-18T20:18:07.922551Z","shell.execute_reply":"2025-08-18T20:18:35.548942Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n","output_type":"stream"},{"execution_count":6,"output_type":"execute_result","data":{"text/plain":" image author content_type \\\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n\n size updated \n0 1591240 2025-03-20 17:45:04+00:00 \n1 1182951 2025-03-20 17:45:02+00:00 \n2 1520884 2025-03-20 17:44:55+00:00 \n3 1235401 2025-03-20 17:45:19+00:00 \n4 1591923 2025-03-20 17:44:47+00:00 \n\n[5 rows x 5 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n

5 rows × 5 columns

\n
[5 rows x 5 columns in total]"},"metadata":{}}],"execution_count":6},{"cell_type":"markdown","source":"Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together.","metadata":{}},{"cell_type":"code","source":"# filter images and display, you can also display audio and video types\ndf_image[df_image[\"author\"] == \"alice\"][\"image\"].blob.display()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:18:55.299993Z","iopub.execute_input":"2025-08-18T20:18:55.300314Z","iopub.status.idle":"2025-08-18T20:19:09.154492Z","shell.execute_reply.started":"2025-08-18T20:18:55.300289Z","shell.execute_reply":"2025-08-18T20:19:09.153315Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\nversion. Use `json_query` instead.\n warnings.warn(bfe.format_message(msg), category=UserWarning)\n","output_type":"stream"},{"output_type":"display_data","data":{"text/html":"","text/plain":""},"metadata":{}},{"output_type":"display_data","data":{"text/html":"","text/plain":""},"metadata":{}}],"execution_count":7},{"cell_type":"markdown","source":"# 3. Conduct image transformations\n\nBigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes.","metadata":{}},{"cell_type":"code","source":"df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n)\ndf_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", engine=\"opencv\"\n)\ndf_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n alpha=50.0,\n beta=150.0,\n norm_type=\"minmax\",\n dst=f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n engine=\"opencv\",\n)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:19:22.950277Z","iopub.execute_input":"2025-08-18T20:19:22.950652Z","iopub.status.idle":"2025-08-18T20:31:51.799997Z","shell.execute_reply.started":"2025-08-18T20:19:22.950625Z","shell.execute_reply":"2025-08-18T20:31:51.798840Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n","output_type":"stream"}],"execution_count":8},{"cell_type":"code","source":"# You can also chain functions together\ndf_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")\ndf_image","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:31:51.802219Z","iopub.execute_input":"2025-08-18T20:31:51.802745Z","iopub.status.idle":"2025-08-18T20:36:13.953258Z","shell.execute_reply.started":"2025-08-18T20:31:51.802700Z","shell.execute_reply":"2025-08-18T20:36:13.951930Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n return method(*args, **kwargs)\n","output_type":"stream"},{"execution_count":9,"output_type":"execute_result","data":{"text/plain":" image author content_type \\\n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n\n size updated \\\n0 1591240 2025-03-20 17:45:04+00:00 \n1 1182951 2025-03-20 17:45:02+00:00 \n2 1520884 2025-03-20 17:44:55+00:00 \n3 1235401 2025-03-20 17:45:19+00:00 \n4 1591923 2025-03-20 17:44:47+00:00 \n\n blurred \\\n0 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n1 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n2 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n3 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n4 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n\n resized \\\n0 {'uri': 'gs://bigframes_blob_test/image_resize... \n1 {'uri': 'gs://bigframes_blob_test/image_resize... \n2 {'uri': 'gs://bigframes_blob_test/image_resize... \n3 {'uri': 'gs://bigframes_blob_test/image_resize... \n4 {'uri': 'gs://bigframes_blob_test/image_resize... \n\n normalized \\\n0 {'uri': 'gs://bigframes_blob_test/image_normal... \n1 {'uri': 'gs://bigframes_blob_test/image_normal... \n2 {'uri': 'gs://bigframes_blob_test/image_normal... \n3 {'uri': 'gs://bigframes_blob_test/image_normal... \n4 {'uri': 'gs://bigframes_blob_test/image_normal... \n\n blur_resized \n0 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n1 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n2 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n3 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n4 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n\n[5 rows x 9 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
imageauthorcontent_typesizeupdatedblurredresizednormalizedblur_resized
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n

5 rows × 9 columns

\n
[5 rows x 9 columns in total]"},"metadata":{}}],"execution_count":9},{"cell_type":"markdown","source":"# 4. Use LLM models to ask questions and generate embeddings on images","metadata":{}},{"cell_type":"code","source":"from bigframes.ml import llm\ngemini = llm.GeminiTextGenerator()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:36:13.954340Z","iopub.execute_input":"2025-08-18T20:36:13.954686Z","iopub.status.idle":"2025-08-18T20:36:43.225449Z","shell.execute_reply.started":"2025-08-18T20:36:13.954661Z","shell.execute_reply":"2025-08-18T20:36:43.224579Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FutureWarning: Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message.\n return method(*args, **kwargs)\n","output_type":"stream"}],"execution_count":10},{"cell_type":"code","source":"# Ask the same question on the images\ndf_image = df_image.head(2)\nanswer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\nanswer[[\"ml_generate_text_llm_result\", \"image\"]]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:36:43.227457Z","iopub.execute_input":"2025-08-18T20:36:43.227798Z","iopub.status.idle":"2025-08-18T20:37:25.238649Z","shell.execute_reply.started":"2025-08-18T20:36:43.227764Z","shell.execute_reply":"2025-08-18T20:37:25.237623Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n","output_type":"stream"},{"execution_count":11,"output_type":"execute_result","data":{"text/plain":" ml_generate_text_llm_result \\\n0 The item is a tin of K9 Guard Dog Paw Balm. \n1 The item is a bottle of K9 Guard Dog Hot Spot ... \n\n image \n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n\n[2 rows x 2 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ml_generate_text_llm_resultimage
0The item is a tin of K9 Guard Dog Paw Balm.
1The item is a bottle of K9 Guard Dog Hot Spot Spray.
\n

2 rows × 2 columns

\n
[2 rows x 2 columns in total]"},"metadata":{}}],"execution_count":11},{"cell_type":"code","source":"# Ask different questions\ndf_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:37:25.239607Z","iopub.execute_input":"2025-08-18T20:37:25.239875Z","iopub.status.idle":"2025-08-18T20:37:25.263034Z","shell.execute_reply.started":"2025-08-18T20:37:25.239847Z","shell.execute_reply":"2025-08-18T20:37:25.262002Z"}},"outputs":[],"execution_count":12},{"cell_type":"code","source":"answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\nanswer_alt[[\"ml_generate_text_llm_result\", \"image\"]]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:37:25.264072Z","iopub.execute_input":"2025-08-18T20:37:25.264585Z","iopub.status.idle":"2025-08-18T20:38:10.129667Z","shell.execute_reply.started":"2025-08-18T20:37:25.264518Z","shell.execute_reply":"2025-08-18T20:38:10.128677Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n","output_type":"stream"},{"execution_count":13,"output_type":"execute_result","data":{"text/plain":" ml_generate_text_llm_result \\\n0 The item is a tin of K9 Guard Dog Paw Balm. \n1 The picture has colors such as white, gray, an... \n\n image \n0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n\n[2 rows x 2 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ml_generate_text_llm_resultimage
0The item is a tin of K9 Guard Dog Paw Balm.
1The picture has colors such as white, gray, and a light blue (cyan).
\n

2 rows × 2 columns

\n
[2 rows x 2 columns in total]"},"metadata":{}}],"execution_count":13},{"cell_type":"code","source":"# Generate embeddings.\nembed_model = llm.MultimodalEmbeddingGenerator()\nembeddings = embed_model.predict(df_image[\"image\"])\nembeddings","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-08-18T20:38:10.130617Z","iopub.execute_input":"2025-08-18T20:38:10.130851Z","iopub.status.idle":"2025-08-18T20:39:04.790416Z","shell.execute_reply.started":"2025-08-18T20:38:10.130833Z","shell.execute_reply":"2025-08-18T20:39:04.789398Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/bigframes/core/log_adapter.py:175: FutureWarning: Since upgrading the default model can cause unintended breakages, the\ndefault model will be removed in BigFrames 3.0. Please supply an\nexplicit model to avoid this message.\n return method(*args, **kwargs)\n/usr/local/lib/python3.11/dist-packages/bigframes/core/array_value.py:108: PreviewWarning: JSON column interpretation as a custom PyArrow extention in\n`db_dtypes` is a preview feature and subject to change.\n warnings.warn(msg, bfe.PreviewWarning)\n","output_type":"stream"},{"execution_count":14,"output_type":"execute_result","data":{"text/plain":" ml_generate_embedding_result \\\n0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n1 [ 0.00973672 0.02148364 0.00244308 ... 0.00... \n\n ml_generate_embedding_status ml_generate_embedding_start_sec \\\n0 \n1 \n\n ml_generate_embedding_end_sec \\\n0 \n1 \n\n content \n0 {\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3... \n1 {\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3... \n\n[2 rows x 5 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3...
1[ 0.00973672 0.02148364 0.00244308 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-08-19T02:3...
\n

2 rows × 5 columns

\n
[2 rows x 5 columns in total]"},"metadata":{}}],"execution_count":14},{"cell_type":"code","source":"","metadata":{"trusted":true},"outputs":[],"execution_count":null}]} diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index ac10f68f639a..4faff4b8e768 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -1,8 +1,23 @@ { "cells": [ { - "id": "f4ece66a", "cell_type": "markdown", + "metadata": { + "@deathbeds/jupyterlab-fonts": { + "styles": { + "": { + "body[data-jp-deck-mode='presenting'] &": { + "zoom": "194%" + } + } + } + }, + "editable": true, + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] + }, "source": [ "# Creating a searchable index of the National Jukebox\n", "\n", @@ -20,42 +35,42 @@ "To follow along, you'll need a Google Cloud project\n", "\n", "* Go to https://cloud.google.com/free to start a free trial." - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { "body[data-jp-deck-mode='presenting'] &": { - "zoom": "194%" + "z-index": "0", + "zoom": "216%" } } } }, - "editable": true, "slideshow": { - "slide_type": "subslide" - }, - "tags": [] + "slide_type": "slide" + } }, - "execution_count": null - }, - { - "id": "bc01a1d3", - "cell_type": "markdown", "source": [ "The National Jukebox is a project of the USA Library of Congress to provide access to thousands of acoustic sound recordings from the very earliest days of the commercial record industry.\n", "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", "\"recording" - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", - "zoom": "216%" + "zoom": "181%" } } } @@ -64,11 +79,6 @@ "slide_type": "slide" } }, - "execution_count": null - }, - { - "id": "4fc7c468", - "cell_type": "markdown", "source": [ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", @@ -86,32 +96,10 @@ "3. BigQuery DataFrames to use Python instead of SQL.\n", "\n", " https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction" - ], - "metadata": { - "@deathbeds/jupyterlab-fonts": { - "styles": { - "": { - "body[data-jp-deck-mode='presenting'] &": { - "z-index": "0", - "zoom": "181%" - } - } - } - }, - "slideshow": { - "slide_type": "slide" - } - }, - "execution_count": null + ] }, { - "id": "90f2e543", "cell_type": "markdown", - "source": [ - "## Getting started with BigQuery DataFrames (bigframes)\n", - "\n", - "Install the bigframes package." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -126,14 +114,15 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Getting started with BigQuery DataFrames (bigframes)\n", + "\n", + "Install the bigframes package." + ] }, { - "id": "56694cb4", "cell_type": "code", - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -153,17 +142,13 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ] }, { - "id": "fa84ad03", "cell_type": "markdown", - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -176,17 +161,15 @@ } } }, - "execution_count": null + "source": [ + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," + ] }, { - "id": "1fbd4f9e", "cell_type": "code", - "source": [ - "from kaggle_secrets import UserSecretsClient\n", - "user_secrets = UserSecretsClient()\n", - "user_credential = user_secrets.get_gcloud_credential()\n", - "user_secrets.set_tensorflow_credential(user_credential)" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:53:08.494636Z", @@ -197,21 +180,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from kaggle_secrets import UserSecretsClient\n", + "user_secrets = UserSecretsClient()\n", + "user_credential = user_secrets.get_gcloud_credential()\n", + "user_secrets.set_tensorflow_credential(user_credential)" + ] }, { - "id": "0b0b1cd8", "cell_type": "code", - "source": [ - "import bigframes._config\n", - "import bigframes.pandas as bpd\n", - "\n", - "bpd.options.bigquery.location = \"US\"\n", - "\n", - "# Set to your GCP project ID.\n", - "bpd.options.bigquery.project = \"swast-scratch\"" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -231,17 +210,19 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes._config\n", + "import bigframes.pandas as bpd\n", + "\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "# Set to your GCP project ID.\n", + "bpd.options.bigquery.project = \"swast-scratch\"" + ] }, { - "id": "32e58a7f", "cell_type": "markdown", - "source": [ - "## Reading data\n", - "\n", - "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -256,19 +237,15 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Reading data\n", + "\n", + "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." + ] }, { - "id": "e52aa9e8", "cell_type": "code", - "source": [ - "df = bpd.read_json(\n", - " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", - " engine=\"bigquery\",\n", - " orient=\"records\",\n", - " lines=True,\n", - ")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -288,16 +265,19 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df = bpd.read_json(\n", + " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", + " engine=\"bigquery\",\n", + " orient=\"records\",\n", + " lines=True,\n", + ")" + ] }, { - "id": "0c1fca97", "cell_type": "code", - "source": [ - "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", - "df.peek()" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -320,15 +300,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", + "df.peek()" + ] }, { - "id": "4a13e789", "cell_type": "code", - "source": [ - "df.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -348,18 +328,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df.shape" + ] }, { - "id": "26b8baba", "cell_type": "code", - "source": [ - "# For the purposes of a demo, select only a subset of rows.\n", - "df = df.sample(n=250)\n", - "df.cache()\n", - "df.shape" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:55:55.448664Z", @@ -370,32 +346,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# For the purposes of a demo, select only a subset of rows.\n", + "df = df.sample(n=250)\n", + "df.cache()\n", + "df.shape" + ] }, { - "id": "af84cb21", "cell_type": "code", - "source": [ - "# As a side effect of how I extracted the song information from the HTML DOM,\n", - "# we ended up with lists in places where we only expect one item.\n", - "#\n", - "# We can \"explode\" to flatten these lists.\n", - "flattened = df.explode([\n", - " \"Recording Repository\",\n", - " \"Recording Label\",\n", - " \"Recording Take Number\",\n", - " \"Recording Date\",\n", - " \"Recording Matrix Number\",\n", - " \"Recording Catalog Number\",\n", - " \"Media Size\",\n", - " \"Recording Location\",\n", - " \"Summary\",\n", - " \"Rights Advisory\",\n", - " \"Title\",\n", - "])\n", - "flattened.peek()" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -418,15 +379,31 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# As a side effect of how I extracted the song information from the HTML DOM,\n", + "# we ended up with lists in places where we only expect one item.\n", + "#\n", + "# We can \"explode\" to flatten these lists.\n", + "flattened = df.explode([\n", + " \"Recording Repository\",\n", + " \"Recording Label\",\n", + " \"Recording Take Number\",\n", + " \"Recording Date\",\n", + " \"Recording Matrix Number\",\n", + " \"Recording Catalog Number\",\n", + " \"Media Size\",\n", + " \"Recording Location\",\n", + " \"Summary\",\n", + " \"Rights Advisory\",\n", + " \"Title\",\n", + "])\n", + "flattened.peek()" + ] }, { - "id": "085deffd", "cell_type": "code", - "source": [ - "flattened.shape" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:56:06.546531Z", @@ -437,15 +414,13 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened.shape" + ] }, { - "id": "f8e653ee", "cell_type": "markdown", - "source": [ - "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -462,14 +437,13 @@ }, "tags": [] }, - "execution_count": null + "source": [ + "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." + ] }, { - "id": "dbd1a844", "cell_type": "code", - "source": [ - "flattened = flattened.assign(**{\\n \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\\n \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\\n})\\nflattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -494,15 +468,18 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened = flattened.assign(**{\n", + " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", + " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", + "})\n", + "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", + "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" + ] }, { - "id": "fae13ec5", "cell_type": "markdown", - "source": [ - "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -519,14 +496,13 @@ }, "tags": [] }, - "execution_count": null + "source": [ + "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." + ] }, { - "id": "f08f92b1", "cell_type": "code", - "source": [ - "# Code calling .blob.audio_transcribe() was removed to satisfy the goal of removing public Blob APIs." - ], + "execution_count": null, "metadata": { "editable": true, "execution": { @@ -542,15 +518,17 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", + " model_name=\"gemini-2.5-flash\",\n", + " verbose=True,\n", + ")\n", + "flattened[\"Transcription\"]" + ] }, { - "id": "30969ae1", "cell_type": "markdown", - "source": [ - "Sometimes the model has transient errors. Check the status column to see if there are errors." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -565,16 +543,13 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "Sometimes the model has transient errors. Check the status column to see if there are errors." + ] }, { - "id": "7d0dbc38", "cell_type": "code", - "source": [ - "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", - "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", - "flattened.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -599,16 +574,16 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", + "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", + "flattened.shape" + ] }, { - "id": "6cddf53b", "cell_type": "code", - "source": [ - "# Show transcribed lyrics.\n", - "flattened[\"Transcription\"].struct.field(\"content\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -628,19 +603,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Show transcribed lyrics.\n", + "flattened[\"Transcription\"].struct.field(\"content\")" + ] }, { - "id": "ba0386cc", "cell_type": "code", - "source": [ - "# Find all instrumentatal songs\n", - "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", - "print(instrumental.shape)\n", - "song = instrumental.peek(1)\n", - "song" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -663,22 +634,18 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Find all instrumentatal songs\n", + "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", + "print(instrumental.shape)\n", + "song = instrumental.peek(1)\n", + "song" + ] }, { - "id": "61a883b2", "cell_type": "code", - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -703,19 +670,20 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] - }, - { - "id": "e8a25c46", - "cell_type": "markdown", + "outputs": [], "source": [ - "## Creating a searchable index\n", + "import gcsfs\n", + "import IPython.display\n", "\n", - "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", "\n", - "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" - ], + "IPython.display.Audio(song_bytes)" + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -730,16 +698,17 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Creating a searchable index\n", + "\n", + "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "\n", + "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" + ] }, { - "id": "ead0fa8c", "cell_type": "code", - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -759,21 +728,16 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" + ] }, { - "id": "5ed7776d", "cell_type": "code", - "source": [ - "df_to_index = (\n", - " flattened\n", - " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", - " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", - ")\n", - "embedding = text_model.predict(df_to_index)\n", - "embedding.peek(1)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -793,18 +757,20 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df_to_index = (\n", + " flattened\n", + " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", + " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", + ")\n", + "embedding = text_model.predict(df_to_index)\n", + "embedding.peek(1)" + ] }, { - "id": "c96e9832", "cell_type": "code", - "source": [ - "# Check the status column to look for errors.\n", - "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", - "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", - "embedding.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -829,15 +795,16 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Check the status column to look for errors.\n", + "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", + "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", + "embedding.shape" + ] }, { - "id": "0e2a5d7b", "cell_type": "markdown", - "source": [ - "We're now ready to save this to a table." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -849,15 +816,13 @@ } } }, - "execution_count": null + "source": [ + "We're now ready to save this to a table." + ] }, { - "id": "51819a0c", "cell_type": "code", - "source": [ - "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", - "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -877,20 +842,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", + "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" + ] }, { - "id": "5e16fb14", "cell_type": "markdown", - "source": [ - "## Searching the database\n", - "\n", - "To search by semantics, we:\n", - "\n", - "1. Turn our search string into an embedding using the same model as our index.\n", - "2. Find the closest matches to the search string." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -905,17 +864,18 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Searching the database\n", + "\n", + "To search by semantics, we:\n", + "\n", + "1. Turn our search string into an embedding using the same model as our index.\n", + "2. Find the closest matches to the search string." + ] }, { - "id": "1bad3317", "cell_type": "code", - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "df_written = bpd.read_gbq(embedding_table_id)\n", - "df_written.peek(1)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -938,22 +898,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "df_written = bpd.read_gbq(embedding_table_id)\n", + "df_written.peek(1)" + ] }, { - "id": "8aaaef1f", "cell_type": "code", - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "search_string = \"walking home\"\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", - "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", - "search_embedding = text_model.predict(search_df)\n", - "search_embedding" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -973,24 +928,21 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "search_string = \"walking home\"\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", + "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", + "search_embedding = text_model.predict(search_df)\n", + "search_embedding" + ] }, { - "id": "908a2340", "cell_type": "code", - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "vector_search_results = bbq.vector_search(\n", - " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", - " column_to_search=\"ml_generate_embedding_result\",\n", - " query=search_embedding,\n", - " distance_type=\"COSINE\",\n", - " query_column_to_search=\"ml_generate_embedding_result\",\n", - " top_k=5,\n", - ")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1015,15 +967,23 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "vector_search_results = bbq.vector_search(\n", + " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", + " column_to_search=\"ml_generate_embedding_result\",\n", + " query=search_embedding,\n", + " distance_type=\"COSINE\",\n", + " query_column_to_search=\"ml_generate_embedding_result\",\n", + " top_k=5,\n", + ")" + ] }, { - "id": "f84ebe70", "cell_type": "code", - "source": [ - "vector_search_results.dtypes" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T16:05:50.566930Z", @@ -1034,16 +994,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "vector_search_results.dtypes" + ] }, { - "id": "eeff1c72", "cell_type": "code", - "source": [ - "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", - "results" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1066,15 +1024,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", + "results" + ] }, { - "id": "7ec53675", "cell_type": "code", - "source": [ - "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1094,22 +1052,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" + ] }, { - "id": "a96552fb", "cell_type": "code", - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ], + "execution_count": null, "metadata": { "editable": true, "execution": { @@ -1126,18 +1076,26 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ] }, { - "id": "72af7c7f", "cell_type": "code", - "source": [], + "execution_count": null, "metadata": { "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [] } ], "metadata": { @@ -1174,6 +1132,6 @@ "version": "3.11.13" } }, - "nbformat_minor": 4, - "nbformat": 4 -} \ No newline at end of file + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index cd363db6f362..8f3241259d5f 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -2,8 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, - "id": "9edad7a6", + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -24,7 +23,6 @@ }, { "cell_type": "markdown", - "id": "816ab253", "metadata": { "id": "YOrUAvz6DMw-" }, @@ -55,7 +53,6 @@ }, { "cell_type": "markdown", - "id": "77d821d4", "metadata": {}, "source": [ "This notebook is introducing BigFrames Multimodal features:\n", @@ -70,7 +67,6 @@ }, { "cell_type": "markdown", - "id": "75ab1c13", "metadata": { "id": "PEAJQQ6AFg-n" }, @@ -80,7 +76,6 @@ }, { "cell_type": "markdown", - "id": "750954c4", "metadata": {}, "source": [ "Install the latest bigframes package if bigframes version < 2.4.0" @@ -88,8 +83,7 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "2a6fafb1", + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -98,8 +92,7 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "df561d04", + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -122,8 +115,6 @@ "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#grant-permissions for setting up connection service account permissions.\n", "# In this Notebook it uses bigframes-default-connection by default. You can also bring in your own connections in each method.\n", "\n", - "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", - "\n", "import bigframes\n", "# Setup project\n", "bigframes.options.bigquery.project = PROJECT\n", @@ -139,8 +130,7 @@ }, { "cell_type": "code", - "execution_count": 35, - "id": "35bd6e6e", + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -181,90 +171,22 @@ " return bbq.json_value(get_metadata(series), \"$.size\").astype(\"Int64\")\n", "\n", "def get_updated(series):\n", - " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)\n", - "\n", - "from IPython.display import HTML, display\n", - "\n", - "def render_images(df):\n", - " \"\"\"Helper to display BigFrames DataFrame with rendered image previews.\"\"\"\n", - " import bigframes.pandas as bpd\n", - " import bigframes.bigquery as bbq\n", - " import bigframes\n", - " from bigframes import dtypes\n", - " import json\n", - " \n", - " if isinstance(df, bpd.Series):\n", - " df = df.to_frame()\n", - " \n", - " # 1. Auto-detect columns holding ObjectRefs\n", - " object_cols = [\n", - " col for col, dtype in zip(df.columns, df.dtypes)\n", - " if dtype == dtypes.OBJ_REF_DTYPE\n", - " ]\n", - " \n", - " if not object_cols:\n", - " display(df)\n", - " return\n", - "\n", - " limit = bigframes.options.display.max_rows or 10\n", - " view_df = df.head(limit)\n", - " \n", - " # 2. Bulk-fetch access runtime URLs ONLY (disable with_metadata to bypass potential \n", - " # race conditions on new files where BigQuery may error before async writes finalize)\n", - " runtime_cols = {\n", - " col: get_runtime_json_str(view_df[col], mode=\"R\", with_metadata=False) \n", - " for col in object_cols\n", - " }\n", - " \n", - " pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()\n", - " final_pd = view_df.to_pandas()\n", - " \n", - " width = bigframes.options.display.blob_display_width or 300\n", - " IMAGE_EXTENSIONS = (\".png\", \".jpg\", \".jpeg\", \".gif\", \".webp\")\n", - " \n", - " def format_cell_html(raw_json):\n", - " if not raw_json:\n", - " return \"\"\n", - " try:\n", - " obj_rt = json.loads(raw_json)\n", - " \n", - " if \"access_urls\" not in obj_rt:\n", - " err = obj_rt.get(\"errors\", [{\"message\": \"URL Generation Failed\"}])[0].get(\"message\")\n", - " return f'Error: {err}'\n", - " \n", - " uri = obj_rt.get(\"objectref\", {}).get(\"uri\", \"\")\n", - " url = obj_rt[\"access_urls\"][\"read_url\"]\n", - " \n", - " # Safely infer type from extension to guarantee immediate display availability\n", - " if uri and str(uri).lower().endswith(IMAGE_EXTENSIONS):\n", - " return f''\n", - " \n", - " return f'{uri if uri else \"view\"}'\n", - " except:\n", - " return \"Format Error\"\n", - "\n", - " for col in object_cols:\n", - " final_pd[col] = pandas_json_df[col].map(format_cell_html)\n", - " \n", - " display(HTML(final_pd.to_html(escape=False)))" + " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ] }, { "cell_type": "markdown", - "id": "be9ce892", "metadata": { "id": "ifKOq7VZGtZy" }, "source": [ - "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", - "\n", - "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference." + "### 1. Create Multimodal DataFrame\n", + "There are several ways to create Multimodal DataFrame. The easiest way is from the wildcard paths." ] }, { "cell_type": "code", - "execution_count": 36, - "id": "871d02f4", + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -274,29 +196,15 @@ }, "outputs": [], "source": [ - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", - "# List files using gcsfs (public bucket)\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", - "\n", - "# Ensure URIs have gs:// prefix\n", - "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", - "\n", - "# Read the URIs into a BigQuery DataFrame using UNNEST\n", - "# We take the first 5 for this example\n", - "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df_image = df_image[['image']]" + "# Create blob columns from wildcard path.\n", + "df_image = bpd.from_glob_path(\n", + " \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 37, - "id": "2e0436b0", + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -310,15 +218,31 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { "data": { "text/html": [ + "
\n", + "\n", "\n", " \n", " \n", @@ -329,44 +253,53 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - "
0
1
2
3
4
" + "\n", + "

5 rows × 1 columns

\n", + "
[5 rows x 1 columns in total]" ], "text/plain": [ - "" + " image\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3...\n", + "\n", + "[5 rows x 1 columns]" ] }, + "execution_count": 6, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", "df_image = df_image.head(5)\n", - "render_images(df_image)" + "df_image" ] }, { "cell_type": "markdown", - "id": "429b0117", "metadata": { "id": "b6RRZb3qPi_T" }, @@ -376,7 +309,6 @@ }, { "cell_type": "markdown", - "id": "991fa065", "metadata": { "id": "4YJCdmLtR-qu" }, @@ -386,8 +318,7 @@ }, { "cell_type": "code", - "execution_count": 38, - "id": "08722ec5", + "execution_count": 7, "metadata": { "id": "YYYVn7NDH0Me" }, @@ -396,15 +327,31 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { "data": { "text/html": [ + "
\n", + "\n", "\n", " \n", " \n", @@ -419,53 +366,70 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - "
0aliceimage/png7157662025-03-20 17:44:38+00:0015912402025-03-20 17:45:04+00:00
1bobimage/png11674062025-03-20 17:44:38+00:0011829512025-03-20 17:45:02+00:00
2bobimage/png11508922025-03-20 17:44:39+00:0015208842025-03-20 17:44:55+00:00
3aliceimage/png17365332025-03-20 17:44:39+00:0012354012025-03-20 17:45:19+00:00
4bobimage/png4397402025-03-20 17:44:39+00:0015919232025-03-20 17:44:47+00:00
" + "\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ - "" + " image author content_type \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... alice image/png \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... alice image/png \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... bob image/png \n", + "\n", + " size updated \n", + "0 1591240 2025-03-20 17:45:04+00:00 \n", + "1 1182951 2025-03-20 17:45:02+00:00 \n", + "2 1520884 2025-03-20 17:44:55+00:00 \n", + "3 1235401 2025-03-20 17:45:19+00:00 \n", + "4 1591923 2025-03-20 17:44:47+00:00 \n", + "\n", + "[5 rows x 5 columns]" ] }, + "execution_count": 7, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ @@ -475,12 +439,11 @@ "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", "df_image[\"size\"] = get_size(df_image[\"image\"])\n", "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "render_images(df_image)" + "df_image" ] }, { "cell_type": "markdown", - "id": "f90826f6", "metadata": {}, "source": [ "### 3. Conduct image transformations" @@ -488,7 +451,6 @@ }, { "cell_type": "markdown", - "id": "e24c9f8c", "metadata": {}, "source": [ "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." @@ -496,8 +458,7 @@ }, { "cell_type": "code", - "execution_count": 39, - "id": "db665049", + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -511,19 +472,37 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/pandas/__init__.py:211: PreviewWarning: udf is in preview.\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dataframe.py:4695: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" ] }, { "data": { "text/html": [ + "
\n", + "\n", "\n", " \n", " \n", @@ -535,38 +514,55 @@ " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - "
0
1
2
3
4
" + "\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ - "" + " image \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + " blurred \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + "[5 rows x 2 columns]" ] }, + "execution_count": 8, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ @@ -577,9 +573,9 @@ " input_types=[str, str, int, int],\n", " output_type=str,\n", " dataset=DATASET_ID,\n", - " name=\"image_blur_v2\",\n", + " name=\"image_blur\",\n", " bigquery_connection=FULL_CONNECTION_ID,\n", - " packages=[\"opencv-python-headless\", \"numpy\", \"requests\"],\n", + " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", @@ -589,8 +585,6 @@ " import base64\n", "\n", " src_obj = json.loads(src_rt)\n", - " if \"access_urls\" not in src_obj:\n", - " raise ValueError(f\"Missing 'access_urls' in source object. Response: {src_obj}\")\n", " src_url = src_obj[\"access_urls\"][\"read_url\"]\n", " \n", " response = requests.get(src_url, timeout=30)\n", @@ -610,8 +604,6 @@ " # Handle two output modes\n", " if dst_rt: # GCS/Series output mode\n", " dst_obj = json.loads(dst_rt)\n", - " if \"access_urls\" not in dst_obj:\n", - " raise ValueError(f\"Missing 'access_urls' in destination object. Verify authorizer permissions. Response: {dst_obj}\")\n", " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", " \n", " requests.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", @@ -630,13 +622,7 @@ " metadata = bbq.obj.fetch_metadata(series)\n", " current_uri = metadata.struct.field(\"uri\")\n", " dst_uri = current_uri.str.replace(r\"^.*\\/(.*)$\", rf\"{dst_folder}\\1\", regex=True)\n", - " \n", - " # To avoid synchronous 404 validation checks on files that don't exist yet, \n", - " # bypass the validator by explicitly constructing an objectref JSON.\n", - " dst_blob_df = bpd.DataFrame({\"uri\": dst_uri})\n", - " dst_blob_df[\"authorizer\"] = FULL_CONNECTION_ID\n", - " dst_blob = bbq.obj.make_ref(bbq.to_json(bbq.struct(dst_blob_df)))\n", - "\n", + " dst_blob = dst_uri.str.to_blob(connection=FULL_CONNECTION_ID)\n", " df_transform = bpd.DataFrame({\n", " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", @@ -644,27 +630,18 @@ " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n", " udf, axis=1, args=args\n", " )\n", - " \n", - " if verbose:\n", - " return res\n", - " \n", - " # Final return MUST also use JSON bypass to eliminate temporary 404 validation \n", - " # errors from embedded ObjectRefs during fused query execution pipelines.\n", - " res_df = bpd.DataFrame({\"uri\": res})\n", - " res_df[\"authorizer\"] = FULL_CONNECTION_ID\n", - " return bbq.obj.make_ref(bbq.to_json(bbq.struct(res_df)))\n", + " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", "\n", "# Apply transformations\n", "df_image[\"blurred\"] = apply_transformation(\n", " df_image[\"image\"], f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\",\n", " image_blur, 20, 20\n", ")\n", - "render_images(df_image[[\"image\", \"blurred\"]])" + "df_image[[\"image\", \"blurred\"]]" ] }, { "cell_type": "markdown", - "id": "11fcc6ec", "metadata": { "id": "Euk5saeVVdTP" }, @@ -674,12 +651,22 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "793b2f45", + "execution_count": 9, "metadata": { "id": "mRUGfcaFVW-3" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "default model will be removed in BigFrames 3.0. Please supply an\n", + "explicit model to avoid this message.\n", + " return method(*args, **kwargs)\n" + ] + } + ], "source": [ "from bigframes.ml import llm\n", "gemini = llm.GeminiTextGenerator()" @@ -687,8 +674,7 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "13d7cb93", + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -697,17 +683,113 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" + ], + "text/plain": [ + " ml_generate_text_llm_result \\\n", + "0 The item is a container of K9 Guard Dog Paw Balm. \n", + "1 The item is K9 Guard Dog Hot Spot Spray. \n", + "2 The image contains three bags of food, likely ... \n", + "3 The item is a cat tree.\\n \n", + "4 The item is a bag of bird seed. Specifically, ... \n", + "\n", + " image \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Ask the same question on the images\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", \"what color is the picture?\"])\n", - "render_images(answer[[\"ml_generate_text_llm_result\", \"image\"]])" + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", + "answer[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "68857305", + "execution_count": 11, "metadata": { "id": "IG3J3HsKhyBY" }, @@ -725,8 +807,7 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "829afc69", + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -735,16 +816,112 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" + ], + "text/plain": [ + " ml_generate_text_llm_result \\\n", + "0 The item is a container of Dog Paw Balm. \n", + "1 The picture contains many colors, including wh... \n", + "2 Here are the product names from the image:\\n\\n... \n", + "3 Yes, it is for pets. It appears to be a cat tr... \n", + "4 The image shows that the weight of the product... \n", + "\n", + " image \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "render_images(answer_alt[[\"ml_generate_text_llm_result\", \"image\"]])" + "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "e75df430", + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -753,7 +930,138 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:183: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "default model will be removed in BigFrames 3.0. Please supply an\n", + "explicit model to avoid this message.\n", + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" + ], + "text/plain": [ + " ml_generate_embedding_result \\\n", + "0 [ 0.00638822 0.01666385 0.00451817 ... -0.02... \n", + "1 [ 0.00973976 0.02148137 0.0024429 ... 0.00... \n", + "2 [ 0.01195884 0.02139394 0.05968047 ... -0.01... \n", + "3 [-0.02621161 0.02797648 0.04416926 ... -0.01... \n", + "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", + "\n", + " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "\n", + " ml_generate_embedding_end_sec \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "\n", + " content \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Generate embeddings.\n", "embed_model = llm.MultimodalEmbeddingGenerator()\n", @@ -763,7 +1071,6 @@ }, { "cell_type": "markdown", - "id": "23892b0e", "metadata": { "id": "iRUi8AjG7cIf" }, @@ -775,10 +1082,18 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "136a18b8", + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -851,27 +1166,62 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "234a5f86", + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" + ], + "text/plain": [ + " extracted_text \\\n", + "0 CritterCuisine Pro 5000 - Automatic Pet Feeder... \n", + "\n", + " chunked \n", + "0 [\"CritterCuisine Pro 5000 - Automatic Pet Feed... \n", + "\n", + "[1 rows x 2 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", - "# List files using gcsfs\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", - "\n", - "# Ensure URIs have gs:// prefix\n", - "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", - "\n", - "# Read the URIs into a BigQuery DataFrame\n", - "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df_pdf = df_pdf[['pdf']]\n", + "df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\n", "\n", "# Generate a JSON string containing the runtime information (including signed read URLs)\n", "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", @@ -887,10 +1237,36 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "d80effbe", + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+       "0    on a level, stable surface to prevent tipping....\n",
+       "0    included)\\nto maintain the schedule during pow...\n",
+       "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
+       "0    paperclip) for 5\\nseconds. This will reset all...\n",
+       "0    unit with a damp cloth. Do not immerse the bas...\n",
+       "0    continues,\\ncontact customer support.\\nE2: Foo...
" + ], + "text/plain": [ + "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", + "0 on a level, stable surface to prevent tipping....\n", + "0 included)\\nto maintain the schedule during pow...\n", + "0 digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n", + "0 paperclip) for 5\\nseconds. This will reset all...\n", + "0 unit with a damp cloth. Do not immerse the bas...\n", + "0 continues,\\ncontact customer support.\\nE2: Foo...\n", + "Name: chunked, dtype: string" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Explode the chunks to see each chunk as a separate row\n", "chunked = df_pdf[\"chunked\"].explode()\n", @@ -899,7 +1275,6 @@ }, { "cell_type": "markdown", - "id": "118cf1c7", "metadata": {}, "source": [ "### 6. Audio transcribe" @@ -907,42 +1282,44 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "1794c54f", + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n", - "\n", - "# List files using gcsfs\n", - "fs = gcsfs.GCSFileSystem()\n", - "uris = fs.glob(audio_gcs_path)\n", - "\n", - "# Ensure URIs have gs:// prefix\n", - "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", - "\n", - "# Read the URIs into a BigQuery DataFrame\n", - "# If the bucket is empty or doesn't exist, this will result in an empty DataFrame\n", - "if not uris:\n", - " # Fallback to a dummy list or just let it be empty\n", - " uris = [\"gs://bigframes_blob_test/audio/dummy.mp3\"]\n", - "\n", - "df = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df['audio'] = bbq.obj.make_ref(df['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df = df[['audio']]" + "df = bpd.from_glob_path(audio_gcs_path, name=\"audio\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "c9f9d484", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
0    Now, as all books, not primarily intended as p...
" + ], + "text/plain": [ + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -972,10 +1349,24 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "7209a62a", + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
0    {'status': '', 'content': 'Now, as all books, ...
" + ], + "text/plain": [ + "0 {'status': '', 'content': 'Now, as all books, ...\n", + "Name: transcription_results, dtype: struct[pyarrow]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -994,7 +1385,6 @@ }, { "cell_type": "markdown", - "id": "c8351cc3", "metadata": {}, "source": [ "### 7. Extract EXIF metadata from images" @@ -1002,7 +1392,6 @@ }, { "cell_type": "markdown", - "id": "e59670b9", "metadata": {}, "source": [ "This section demonstrates how to extract EXIF metadata from images using a custom BigQuery Python UDF and the `Pillow` library." @@ -1010,10 +1399,18 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "fda362f4", + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1050,27 +1447,39 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "40bb6bc9", + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", + "change in future versions.\n", + " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + ], + "text/plain": [ + "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", + "Name: blob_col, dtype: extension>[pyarrow]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", "# Create a Multimodal DataFrame from the sample image URIs\n", - "fs = gcsfs.GCSFileSystem()\n", - "uris = fs.glob(\"gs://bigframes_blob_test/images_exif/*\")\n", - "\n", - "# Ensure URIs have gs:// prefix\n", - "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", - "\n", - "if not uris:\n", - " uris = [\"gs://bigframes_blob_test/images_exif/dummy.jpg\"]\n", - "\n", - "exif_image_df = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "exif_image_df['blob_col'] = bbq.obj.make_ref(exif_image_df['uri'], authorizer=FULL_CONNECTION_ID)\n", - "exif_image_df = exif_image_df[['blob_col']]\n", + "exif_image_df = bpd.from_glob_path(\n", + " \"gs://bigframes_blob_test/images_exif/*\",\n", + " name=\"blob_col\",\n", + ")\n", "\n", "# Generate a JSON string containing the runtime information (including signed read URLs)\n", "# This allows the UDF to download the images from Google Cloud Storage\n",