From 77bc4bf2518cecb2c2942c0b6a4d47ab398b6d1e Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 24 Jul 2024 07:03:20 +0000 Subject: [PATCH 1/4] fix: reduce redundant `remote_function` deployments --- .../functions/remote_function_template.py | 36 ++- .../remote_function_usecases.ipynb | 218 +++++++++--------- 2 files changed, 131 insertions(+), 123 deletions(-) diff --git a/bigframes/functions/remote_function_template.py b/bigframes/functions/remote_function_template.py index 68fe1b917d..f852b263cd 100644 --- a/bigframes/functions/remote_function_template.py +++ b/bigframes/functions/remote_function_template.py @@ -215,9 +215,9 @@ def udf_http_row_processor(request): def generate_udf_code(def_, directory): - """Generate serialized bytecode using cloudpickle given a udf.""" + """Generate serialized code using cloudpickle given a udf.""" udf_code_file_name = "udf.py" - udf_bytecode_file_name = "udf.cloudpickle" + udf_pickle_file_name = "udf.cloudpickle" # original code, only for debugging purpose udf_code = textwrap.dedent(inspect.getsource(def_)) @@ -225,13 +225,29 @@ def generate_udf_code(def_, directory): with open(udf_code_file_path, "w") as f: f.write(udf_code) - # serialized bytecode - udf_bytecode_file_path = os.path.join(directory, udf_bytecode_file_name) + # There is a known cell-id sensitivity of the cloudpickle serialization in + # notebooks https://github.com/cloudpipe/cloudpickle/issues/538. Because of + # this, if a cell contains a udf decorated with @remote_function, a unique + # cloudpickle code is generated every time the cell is run, creating new + # cloud artifacts every time. This is slow and wasteful. + # A workaround of the same can be achieved by replacing the filename in the + # code object to a static value + # https://github.com/cloudpipe/cloudpickle/issues/120#issuecomment-338510661. + # + # To respect the user code/environment let's make this modification on a + # copy of the udf, not on the original udf itself. + def_copy = cloudpickle.loads(cloudpickle.dumps(def_)) + def_copy.__code__ = def_copy.__code__.replace( + co_filename="bigframes_place_holder_filename" + ) + + # serialized udf + udf_pickle_file_path = os.path.join(directory, udf_pickle_file_name) # TODO(b/345433300): try io.BytesIO to avoid writing to the file system - with open(udf_bytecode_file_path, "wb") as f: - cloudpickle.dump(def_, f, protocol=_pickle_protocol_version) + with open(udf_pickle_file_path, "wb") as f: + cloudpickle.dump(def_copy, f, protocol=_pickle_protocol_version) - return udf_code_file_name, udf_bytecode_file_name + return udf_code_file_name, udf_pickle_file_name def generate_cloud_function_main_code( @@ -252,15 +268,15 @@ def generate_cloud_function_main_code( """ # Pickle the udf with all its dependencies - udf_code_file, udf_bytecode_file = generate_udf_code(def_, directory) + udf_code_file, udf_pickle_file = generate_udf_code(def_, directory) code_blocks = [ f"""\ import cloudpickle # original udf code is in {udf_code_file} -# serialized udf code is in {udf_bytecode_file} -with open("{udf_bytecode_file}", "rb") as f: +# serialized udf code is in {udf_pickle_file} +with open("{udf_pickle_file}", "rb") as f: udf = cloudpickle.load(f) input_types = {repr(input_types)} diff --git a/notebooks/remote_functions/remote_function_usecases.ipynb b/notebooks/remote_functions/remote_function_usecases.ipynb index 3d7ae3e8c7..f510b0c644 100644 --- a/notebooks/remote_functions/remote_function_usecases.ipynb +++ b/notebooks/remote_functions/remote_function_usecases.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 28, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 2, "metadata": { "id": "Y6QAttCqqMM0" }, @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -51,18 +51,10 @@ "outputId": "6e3308cf-8de0-4b89-9128-4c6ddf3598c0" }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shobs/code/bigframes/venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3550: UserWarning: Reading cached table from 2024-06-28 02:49:31.716256+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n" - ] - }, { "data": { "text/html": [ - "Query job f72cda67-2a96-4cd2-a624-591c0d540fc9 is DONE. 582.8 kB processed. Open Job" + "Query job 87e10cee-43bc-409e-8731-a334f9fd066f is DONE. 582.8 kB processed. Open Job" ], "text/plain": [ "" @@ -74,7 +66,7 @@ { "data": { "text/html": [ - "Query job 65cf6ca3-73f0-49e6-84a8-1ff79af6ec75 is DONE. 82.0 kB processed. Open Job" + "Query job 3885405e-c2e9-478a-ad56-ad839ec78ba1 is DONE. 82.0 kB processed. Open Job" ], "text/plain": [ "" @@ -111,34 +103,34 @@ " \n", " \n", " \n", - " 50\n", - " Rays\n", - " Rangers\n", - " 181\n", + " 149\n", + " Athletics\n", + " Angels\n", + " 185\n", " \n", " \n", - " 72\n", - " Phillies\n", - " Pirates\n", - " 192\n", + " 397\n", + " Twins\n", + " Tigers\n", + " 171\n", " \n", " \n", - " 89\n", - " Mariners\n", - " Blue Jays\n", - " 183\n", + " 420\n", + " Rockies\n", + " Padres\n", + " 200\n", " \n", " \n", - " 351\n", - " Astros\n", - " Angels\n", - " 212\n", + " 479\n", + " Orioles\n", + " Yankees\n", + " 170\n", " \n", " \n", - " 382\n", - " Royals\n", + " 581\n", + " Padres\n", " Yankees\n", - " 259\n", + " 192\n", " \n", " \n", "\n", @@ -146,14 +138,14 @@ ], "text/plain": [ " homeTeamName awayTeamName duration_minutes\n", - "50 Rays Rangers 181\n", - "72 Phillies Pirates 192\n", - "89 Mariners Blue Jays 183\n", - "351 Astros Angels 212\n", - "382 Royals Yankees 259" + "149 Athletics Angels 185\n", + "397 Twins Tigers 171\n", + "420 Rockies Padres 200\n", + "479 Orioles Yankees 170\n", + "581 Padres Yankees 192" ] }, - "execution_count": 30, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -202,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -215,7 +207,7 @@ { "data": { "text/html": [ - "Query job f039d478-8dc4-4b60-8eda-179955e06586 is DONE. 0 Bytes processed. Open Job" + "Query job 9be3f9a3-b3c0-4b0e-8bbc-c34ab4516454 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -228,7 +220,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-862150459da5240a6df1ce01c59b32d8-em4ibov0' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_862150459da5240a6df1ce01c59b32d8_em4ibov0'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-ef6a39eaeb1d6a012c567f071fb5fb73-4fkm' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_ef6a39eaeb1d6a012c567f071fb5fb73_4fkm'.\n" ] } ], @@ -247,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -260,7 +252,7 @@ { "data": { "text/html": [ - "Query job 23e95831-d913-4d2b-97f6-588fc7967455 is DONE. 58.3 kB processed. Open Job" + "Query job 3f9c58fc-8ff9-4cf0-b4d2-ad183c463fd7 is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -272,7 +264,7 @@ { "data": { "text/html": [ - "Query job bb8b3d13-a521-4d45-b4c8-5686c944a9f2 is DONE. 157.2 kB processed. Open Job" + "Query job bb354fad-9428-4c3f-bfc2-61a8c2a0af7b is DONE. 157.2 kB processed. Open Job" ], "text/plain": [ "" @@ -284,7 +276,7 @@ { "data": { "text/html": [ - "Query job 2a4653f5-cc6b-4279-a45e-40f0f97090a7 is DONE. 98.8 kB processed. Open Job" + "Query job ce4017b8-f76f-45fa-884d-b64f67d193b1 is DONE. 98.8 kB processed. Open Job" ], "text/plain": [ "" @@ -369,7 +361,7 @@ "654 Astros Angels 143 medium" ] }, - "execution_count": 32, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -396,7 +388,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 6, "metadata": { "id": "2UEmTbu4znyS" }, @@ -409,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -422,7 +414,7 @@ { "data": { "text/html": [ - "Query job 5d914fde-81ec-46eb-9219-9822f77dd9a2 is DONE. 0 Bytes processed. Open Job" + "Query job 6a963fef-8ee6-4cd6-9f3c-90ef276b55ff is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -435,7 +427,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-f3231b74ec807496f4894218d5d40ed5-688mx7hi' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_f3231b74ec807496f4894218d5d40ed5_688mx7hi'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-174f85737a8e9a5ab4cbc772455bd799-7q90' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_174f85737a8e9a5ab4cbc772455bd799_7q90'.\n" ] } ], @@ -454,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -467,7 +459,7 @@ { "data": { "text/html": [ - "Query job b0b39944-1e69-4185-97ba-985178ee241f is DONE. 58.3 kB processed. Open Job" + "Query job 0c98fafd-3a35-4427-928a-86def0d53e37 is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -479,7 +471,7 @@ { "data": { "text/html": [ - "Query job 90d99515-eb5e-4bcd-bce5-292eea09770e is DONE. 147.7 kB processed. Open Job" + "Query job c7c8a62f-1c72-4a4f-bbf4-67b32cfe1f8c is DONE. 147.7 kB processed. Open Job" ], "text/plain": [ "" @@ -491,7 +483,7 @@ { "data": { "text/html": [ - "Query job eb31d033-c871-49c5-a75e-4427e376516f is DONE. 89.3 kB processed. Open Job" + "Query job 4f6608b6-d687-4c9a-9792-cee9a6c98424 is DONE. 89.3 kB processed. Open Job" ], "text/plain": [ "" @@ -576,7 +568,7 @@ "654 Astros Angels 143 M" ] }, - "execution_count": 35, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -607,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 9, "metadata": { "id": "zlQfhcW41uzM" }, @@ -618,7 +610,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -631,7 +623,7 @@ { "data": { "text/html": [ - "Query job 2895676f-d15c-40fd-8cf2-3a0436291e6b is DONE. 0 Bytes processed. Open Job" + "Query job 0c29b064-4343-4c43-b2c1-32a21113ae83 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -644,7 +636,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-9b20b0257558a42da610d8998022c25e-7k62x9l6' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_9b20b0257558a42da610d8998022c25e_7k62x9l6'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-3afd74d885105142f214bdc8fe461059-m7jx' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_3afd74d885105142f214bdc8fe461059_m7jx'.\n" ] } ], @@ -659,7 +651,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -672,7 +664,7 @@ { "data": { "text/html": [ - "Query job 4efda755-2f54-4477-b48a-4a424c888559 is DONE. 58.3 kB processed. Open Job" + "Query job 6144a150-5f73-46ff-83eb-1639d5de6e7b is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -684,7 +676,7 @@ { "data": { "text/html": [ - "Query job a8992776-c2e8-4c3e-ab75-dfc01c5de89f is DONE. 150.1 kB processed. Open Job" + "Query job 4d3393e3-976a-4cc0-938f-dfcc53b398b6 is DONE. 150.1 kB processed. Open Job" ], "text/plain": [ "" @@ -696,7 +688,7 @@ { "data": { "text/html": [ - "Query job 3ea299b0-27ad-432b-8dbf-81da3aae884f is DONE. 91.7 kB processed. Open Job" + "Query job bd94daa5-4d4f-4912-8437-81dbc7f4758c is DONE. 91.7 kB processed. Open Job" ], "text/plain": [ "" @@ -781,7 +773,7 @@ "654 Astros Angels 143 3h" ] }, - "execution_count": 38, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -812,7 +804,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 12, "metadata": { "id": "0G91fWiF3pKg" }, @@ -829,7 +821,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -842,7 +834,7 @@ { "data": { "text/html": [ - "Query job 411853db-bf83-4df8-af78-55b1ceb39cb1 is DONE. 0 Bytes processed. Open Job" + "Query job 56c2bb32-5dc7-426b-9e31-c8d246cff4ea is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -855,7 +847,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-b54aa0aa752af6a3bd6d9d529dac373b-h4lgpy4y' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_b54aa0aa752af6a3bd6d9d529dac373b_h4lgpy4y'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-ad70713227cb8bc042079efcad7da546-jk4m' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_ad70713227cb8bc042079efcad7da546_jk4m'.\n" ] } ], @@ -870,7 +862,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -883,7 +875,7 @@ { "data": { "text/html": [ - "Query job d04abfa5-e2f2-4936-a708-ed97ef429df3 is DONE. 58.3 kB processed. Open Job" + "Query job b305edeb-cfcf-4d8d-a604-ec1c3d92a620 is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -895,7 +887,7 @@ { "data": { "text/html": [ - "Query job 2fc4edf0-7a86-4532-b8fb-bd3f5d153dcb is DONE. 157.4 kB processed. Open Job" + "Query job 1c58f01b-a1ae-4c58-a9cc-554572458b64 is DONE. 157.4 kB processed. Open Job" ], "text/plain": [ "" @@ -907,7 +899,7 @@ { "data": { "text/html": [ - "Query job f7e6e18c-70d7-4b4e-926a-03b3a1abd1fe is DONE. 99.0 kB processed. Open Job" + "Query job c8a54ac3-8d75-4868-8f7e-aa7608a4ad08 is DONE. 99.0 kB processed. Open Job" ], "text/plain": [ "" @@ -992,7 +984,7 @@ "654 Astros Angels 143 3 hrs" ] }, - "execution_count": 41, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1018,7 +1010,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1031,7 +1023,7 @@ { "data": { "text/html": [ - "Query job c674e7b7-2349-4317-8f08-8bfd9aa99785 is DONE. 0 Bytes processed. Open Job" + "Query job d766e0f8-ff75-4f88-8fef-b6da77f55d50 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1057,7 +1049,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1070,7 +1062,7 @@ { "data": { "text/html": [ - "Query job eb9384c9-de7d-4232-bdca-94b61b50ff89 is DONE. 60.5 kB processed. Open Job" + "Query job d7bde249-ad7e-49e1-9391-030acd28b7e7 is DONE. 60.5 kB processed. Open Job" ], "text/plain": [ "" @@ -1082,7 +1074,7 @@ { "data": { "text/html": [ - "Query job 11a736a5-96d1-4e62-90e2-576156131a94 is DONE. 388.3 kB processed. Open Job" + "Query job 0f87f6c0-18df-4a63-9314-d05e3fabb052 is DONE. 388.3 kB processed. Open Job" ], "text/plain": [ "" @@ -1094,7 +1086,7 @@ { "data": { "text/html": [ - "Query job c66a9ad1-60f7-4af1-ad7c-65e4eecbb035 is DONE. 330.0 kB processed. Open Job" + "Query job 7b14b63e-8833-40c0-87de-cb58101afefd is DONE. 330.0 kB processed. Open Job" ], "text/plain": [ "" @@ -1132,39 +1124,39 @@ " \n", " \n", " \n", - " 719\n", + " 351\n", " Astros\n", " Angels\n", - " 180\n", - " gAAAAABmflbKCFygsmoTzFkUCObFSBJG29Ksk8HEtk82ib...\n", + " 212\n", + " gAAAAABmoKZY3GepMRcGsaXKAGivl7fqn2hIt1dvwnPpDE...\n", " \n", " \n", - " 2295\n", + " 654\n", " Astros\n", " Angels\n", - " 204\n", - " gAAAAABmflbKv-XzIxcNS92RO4fXYIAwA0kGWsAy-tI5fm...\n", + " 143\n", + " gAAAAABmoKZYwY-MKnwwgU3enh3jPVYF2q3Djl3Tn2J8bW...\n", " \n", " \n", - " 1126\n", + " 2295\n", " Astros\n", " Angels\n", - " 176\n", - " gAAAAABmflbJdjgpqnfvmklU7Zg3NJUqlTMYMs44dLEkwg...\n", + " 204\n", + " gAAAAABmoKZYFe-AMB_hl9O-4KaKYb_UvxF-EhA6B6qSqu...\n", " \n", " \n", - " 294\n", + " 624\n", " Astros\n", " Angels\n", - " 189\n", - " gAAAAABmflbKmfBh4P3FnwyiIpVFek9TzF4GzwP_5rQmkv...\n", + " 173\n", + " gAAAAABmoKZYWpas2Cd8MXRWl6A4q7CCt9QDVU_-pFH7WB...\n", " \n", " \n", - " 351\n", + " 1843\n", " Astros\n", " Angels\n", - " 212\n", - " gAAAAABmflbJ_mzqao9i7BtoYlMpb6y3bV3x7-cYuWGxsT...\n", + " 182\n", + " gAAAAABmoKZYKDxZqGkwBxQlFMCbG02zYiOSdKVe5CMM5u...\n", " \n", " \n", "\n", @@ -1172,21 +1164,21 @@ ], "text/plain": [ " homeTeamName awayTeamName duration_minutes \\\n", - "719 Astros Angels 180 \n", - "2295 Astros Angels 204 \n", - "1126 Astros Angels 176 \n", - "294 Astros Angels 189 \n", "351 Astros Angels 212 \n", + "654 Astros Angels 143 \n", + "2295 Astros Angels 204 \n", + "624 Astros Angels 173 \n", + "1843 Astros Angels 182 \n", "\n", " homeTeamNameRedacted \n", - "719 gAAAAABmflbKCFygsmoTzFkUCObFSBJG29Ksk8HEtk82ib... \n", - "2295 gAAAAABmflbKv-XzIxcNS92RO4fXYIAwA0kGWsAy-tI5fm... \n", - "1126 gAAAAABmflbJdjgpqnfvmklU7Zg3NJUqlTMYMs44dLEkwg... \n", - "294 gAAAAABmflbKmfBh4P3FnwyiIpVFek9TzF4GzwP_5rQmkv... \n", - "351 gAAAAABmflbJ_mzqao9i7BtoYlMpb6y3bV3x7-cYuWGxsT... " + "351 gAAAAABmoKZY3GepMRcGsaXKAGivl7fqn2hIt1dvwnPpDE... \n", + "654 gAAAAABmoKZYwY-MKnwwgU3enh3jPVYF2q3Djl3Tn2J8bW... \n", + "2295 gAAAAABmoKZYFe-AMB_hl9O-4KaKYb_UvxF-EhA6B6qSqu... \n", + "624 gAAAAABmoKZYWpas2Cd8MXRWl6A4q7CCt9QDVU_-pFH7WB... \n", + "1843 gAAAAABmoKZYKDxZqGkwBxQlFMCbG02zYiOSdKVe5CMM5u... " ] }, - "execution_count": 43, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1211,7 +1203,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -1221,13 +1213,13 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job 21b054a9-8fb2-418f-a17b-effdf5aba9b5 is DONE. 0 Bytes processed. Open Job" + "Query job a7bd215e-2ecf-4acb-9b75-b8a58da66a3a is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1240,7 +1232,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-0879f72acd9b8ede460b69c5a8cc0dcb-edxlst27' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_0879f72acd9b8ede460b69c5a8cc0dcb_edxlst27'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-6ffb0fce0234ca767920e183b2b8f51e-8wyw' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_6ffb0fce0234ca767920e183b2b8f51e_8wyw'.\n" ] } ], @@ -1255,13 +1247,13 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job d67b7cb9-9813-4863-99d1-01cf45ab4949 is DONE. 58.3 kB processed. Open Job" + "Query job d3a30741-c64a-4eef-8fa3-5f1d5ab5d4f3 is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -1273,7 +1265,7 @@ { "data": { "text/html": [ - "Query job 579ba853-a7b8-49df-9539-bf22f08d2370 is DONE. 162.2 kB processed. Open Job" + "Query job 74f99ed6-ddb3-415c-9e4b-f31c4d56f345 is DONE. 162.2 kB processed. Open Job" ], "text/plain": [ "" @@ -1285,7 +1277,7 @@ { "data": { "text/html": [ - "Query job 72f9eb5d-1c1a-4ce8-8f2f-1f5a8f7cec99 is DONE. 103.9 kB processed. Open Job" + "Query job b61e9036-0057-48c1-bc22-fc628d091ef2 is DONE. 103.9 kB processed. Open Job" ], "text/plain": [ "" @@ -1370,7 +1362,7 @@ "654 Astros Angels 143 2 hours" ] }, - "execution_count": 46, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } From 5867d1cacee58ec87a133273ab9c6b8d267e7cb1 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 26 Jul 2024 07:08:44 +0000 Subject: [PATCH 2/4] do filename override in the naming rather than pickling --- bigframes/functions/remote_function.py | 18 +- .../functions/remote_function_template.py | 18 +- .../remote_function_usecases.ipynb | 238 +++++++++--------- 3 files changed, 141 insertions(+), 133 deletions(-) diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py index b95067983f..005e9b5e42 100644 --- a/bigframes/functions/remote_function.py +++ b/bigframes/functions/remote_function.py @@ -167,7 +167,23 @@ def get_remote_function_locations(bq_location): def _get_hash(def_, package_requirements=None): "Get hash (32 digits alphanumeric) of a function." - def_repr = cloudpickle.dumps(def_, protocol=_pickle_protocol_version) + # There is a known cell-id sensitivity of the cloudpickle serialization in + # notebooks https://github.com/cloudpipe/cloudpickle/issues/538. Because of + # this, if a cell contains a udf decorated with @remote_function, a unique + # cloudpickle code is generated every time the cell is run, creating new + # cloud artifacts every time. This is slow and wasteful. + # A workaround of the same can be achieved by replacing the filename in the + # code object to a static value + # https://github.com/cloudpipe/cloudpickle/issues/120#issuecomment-338510661. + # + # To respect the user code/environment let's make this modification on a + # copy of the udf, not on the original udf itself. + def_copy = cloudpickle.loads(cloudpickle.dumps(def_)) + def_copy.__code__ = def_copy.__code__.replace( + co_filename="bigframes_place_holder_filename" + ) + + def_repr = cloudpickle.dumps(def_copy, protocol=_pickle_protocol_version) if package_requirements: for p in sorted(package_requirements): def_repr += p.encode() diff --git a/bigframes/functions/remote_function_template.py b/bigframes/functions/remote_function_template.py index f852b263cd..c666f41daa 100644 --- a/bigframes/functions/remote_function_template.py +++ b/bigframes/functions/remote_function_template.py @@ -225,27 +225,11 @@ def generate_udf_code(def_, directory): with open(udf_code_file_path, "w") as f: f.write(udf_code) - # There is a known cell-id sensitivity of the cloudpickle serialization in - # notebooks https://github.com/cloudpipe/cloudpickle/issues/538. Because of - # this, if a cell contains a udf decorated with @remote_function, a unique - # cloudpickle code is generated every time the cell is run, creating new - # cloud artifacts every time. This is slow and wasteful. - # A workaround of the same can be achieved by replacing the filename in the - # code object to a static value - # https://github.com/cloudpipe/cloudpickle/issues/120#issuecomment-338510661. - # - # To respect the user code/environment let's make this modification on a - # copy of the udf, not on the original udf itself. - def_copy = cloudpickle.loads(cloudpickle.dumps(def_)) - def_copy.__code__ = def_copy.__code__.replace( - co_filename="bigframes_place_holder_filename" - ) - # serialized udf udf_pickle_file_path = os.path.join(directory, udf_pickle_file_name) # TODO(b/345433300): try io.BytesIO to avoid writing to the file system with open(udf_pickle_file_path, "wb") as f: - cloudpickle.dump(def_copy, f, protocol=_pickle_protocol_version) + cloudpickle.dump(def_, f, protocol=_pickle_protocol_version) return udf_code_file_name, udf_pickle_file_name diff --git a/notebooks/remote_functions/remote_function_usecases.ipynb b/notebooks/remote_functions/remote_function_usecases.ipynb index f510b0c644..9317e4b8fe 100644 --- a/notebooks/remote_functions/remote_function_usecases.ipynb +++ b/notebooks/remote_functions/remote_function_usecases.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 21, "metadata": { "id": "Y6QAttCqqMM0" }, @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -51,10 +51,18 @@ "outputId": "6e3308cf-8de0-4b89-9128-4c6ddf3598c0" }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shobs/code/bigframes1/venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3550: UserWarning: Reading cached table from 2024-07-24 08:01:12.491984+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n" + ] + }, { "data": { "text/html": [ - "Query job 87e10cee-43bc-409e-8731-a334f9fd066f is DONE. 582.8 kB processed. Open Job" + "Query job 9d155f10-e37a-4d20-b2ff-02868ecb58f4 is DONE. 582.8 kB processed. Open Job" ], "text/plain": [ "" @@ -66,7 +74,7 @@ { "data": { "text/html": [ - "Query job 3885405e-c2e9-478a-ad56-ad839ec78ba1 is DONE. 82.0 kB processed. Open Job" + "Query job 5a524e70-12dc-4116-b416-04570bbf754e is DONE. 82.0 kB processed. Open Job" ], "text/plain": [ "" @@ -103,49 +111,49 @@ " \n", " \n", " \n", - " 149\n", - " Athletics\n", - " Angels\n", - " 185\n", + " 36\n", + " Reds\n", + " Cubs\n", + " 159\n", " \n", " \n", - " 397\n", - " Twins\n", - " Tigers\n", - " 171\n", + " 358\n", + " Dodgers\n", + " Diamondbacks\n", + " 223\n", " \n", " \n", - " 420\n", - " Rockies\n", - " Padres\n", - " 200\n", + " 416\n", + " Yankees\n", + " White Sox\n", + " 216\n", " \n", " \n", - " 479\n", - " Orioles\n", - " Yankees\n", - " 170\n", + " 523\n", + " Rays\n", + " Athletics\n", + " 187\n", " \n", " \n", - " 581\n", - " Padres\n", - " Yankees\n", - " 192\n", + " 594\n", + " Pirates\n", + " Brewers\n", + " 169\n", " \n", " \n", "\n", "" ], "text/plain": [ - " homeTeamName awayTeamName duration_minutes\n", - "149 Athletics Angels 185\n", - "397 Twins Tigers 171\n", - "420 Rockies Padres 200\n", - "479 Orioles Yankees 170\n", - "581 Padres Yankees 192" + " homeTeamName awayTeamName duration_minutes\n", + "36 Reds Cubs 159\n", + "358 Dodgers Diamondbacks 223\n", + "416 Yankees White Sox 216\n", + "523 Rays Athletics 187\n", + "594 Pirates Brewers 169" ] }, - "execution_count": 3, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -194,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -207,7 +215,7 @@ { "data": { "text/html": [ - "Query job 9be3f9a3-b3c0-4b0e-8bbc-c34ab4516454 is DONE. 0 Bytes processed. Open Job" + "Query job ec8d958d-93ef-45ae-8150-6ccfa8feb89a is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -220,7 +228,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-ef6a39eaeb1d6a012c567f071fb5fb73-4fkm' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_ef6a39eaeb1d6a012c567f071fb5fb73_4fkm'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-e22dbecc9ec0374bda36bc23df3775b0-g8zp' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_e22dbecc9ec0374bda36bc23df3775b0_g8zp'.\n" ] } ], @@ -239,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -252,7 +260,7 @@ { "data": { "text/html": [ - "Query job 3f9c58fc-8ff9-4cf0-b4d2-ad183c463fd7 is DONE. 58.3 kB processed. Open Job" + "Query job 4b116e3e-d4d3-4eb6-9764-0a29a7c5d036 is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -264,7 +272,7 @@ { "data": { "text/html": [ - "Query job bb354fad-9428-4c3f-bfc2-61a8c2a0af7b is DONE. 157.2 kB processed. Open Job" + "Query job d62ac4f0-47c9-47ae-8611-c9ecf78f20c9 is DONE. 157.2 kB processed. Open Job" ], "text/plain": [ "" @@ -276,7 +284,7 @@ { "data": { "text/html": [ - "Query job ce4017b8-f76f-45fa-884d-b64f67d193b1 is DONE. 98.8 kB processed. Open Job" + "Query job 5f876ebb-2d95-4c68-9d84-947e02b37bad is DONE. 98.8 kB processed. Open Job" ], "text/plain": [ "" @@ -361,7 +369,7 @@ "654 Astros Angels 143 medium" ] }, - "execution_count": 5, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -388,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 25, "metadata": { "id": "2UEmTbu4znyS" }, @@ -401,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 26, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -414,7 +422,7 @@ { "data": { "text/html": [ - "Query job 6a963fef-8ee6-4cd6-9f3c-90ef276b55ff is DONE. 0 Bytes processed. Open Job" + "Query job 1909a652-5735-401b-8a77-674d8539ded0 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -427,7 +435,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-174f85737a8e9a5ab4cbc772455bd799-7q90' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_174f85737a8e9a5ab4cbc772455bd799_7q90'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-4191f0fce98d46cc09359de47e203236-e009' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_4191f0fce98d46cc09359de47e203236_e009'.\n" ] } ], @@ -446,7 +454,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -459,7 +467,7 @@ { "data": { "text/html": [ - "Query job 0c98fafd-3a35-4427-928a-86def0d53e37 is DONE. 58.3 kB processed. Open Job" + "Query job a942bdc5-6a6d-4db8-b2aa-a556197377b3 is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -471,7 +479,7 @@ { "data": { "text/html": [ - "Query job c7c8a62f-1c72-4a4f-bbf4-67b32cfe1f8c is DONE. 147.7 kB processed. Open Job" + "Query job 175ae9d3-604f-495b-a167-8b06c0283bd2 is DONE. 147.7 kB processed. Open Job" ], "text/plain": [ "" @@ -483,7 +491,7 @@ { "data": { "text/html": [ - "Query job 4f6608b6-d687-4c9a-9792-cee9a6c98424 is DONE. 89.3 kB processed. Open Job" + "Query job d331a785-e574-45c9-86c8-d29ddd79a4d1 is DONE. 89.3 kB processed. Open Job" ], "text/plain": [ "" @@ -568,7 +576,7 @@ "654 Astros Angels 143 M" ] }, - "execution_count": 8, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -599,7 +607,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 28, "metadata": { "id": "zlQfhcW41uzM" }, @@ -610,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -623,7 +631,7 @@ { "data": { "text/html": [ - "Query job 0c29b064-4343-4c43-b2c1-32a21113ae83 is DONE. 0 Bytes processed. Open Job" + "Query job bbc0b78f-bc04-4bd5-b711-399786a51519 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -636,7 +644,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-3afd74d885105142f214bdc8fe461059-m7jx' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_3afd74d885105142f214bdc8fe461059_m7jx'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-cf31fc2d2c7fe111afa5526f5a9cdf06-gmmo' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_cf31fc2d2c7fe111afa5526f5a9cdf06_gmmo'.\n" ] } ], @@ -651,7 +659,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 30, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -664,7 +672,7 @@ { "data": { "text/html": [ - "Query job 6144a150-5f73-46ff-83eb-1639d5de6e7b is DONE. 58.3 kB processed. Open Job" + "Query job 991b54ed-9eaa-450f-9208-3e73404bb112 is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -676,7 +684,7 @@ { "data": { "text/html": [ - "Query job 4d3393e3-976a-4cc0-938f-dfcc53b398b6 is DONE. 150.1 kB processed. Open Job" + "Query job 4e464a58-ac5b-42fd-91e3-92c115bdd273 is DONE. 150.1 kB processed. Open Job" ], "text/plain": [ "" @@ -688,7 +696,7 @@ { "data": { "text/html": [ - "Query job bd94daa5-4d4f-4912-8437-81dbc7f4758c is DONE. 91.7 kB processed. Open Job" + "Query job d340f55d-1511-431a-970d-a70ed4356935 is DONE. 91.7 kB processed. Open Job" ], "text/plain": [ "" @@ -773,7 +781,7 @@ "654 Astros Angels 143 3h" ] }, - "execution_count": 11, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -804,7 +812,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 31, "metadata": { "id": "0G91fWiF3pKg" }, @@ -821,7 +829,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 32, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -834,7 +842,7 @@ { "data": { "text/html": [ - "Query job 56c2bb32-5dc7-426b-9e31-c8d246cff4ea is DONE. 0 Bytes processed. Open Job" + "Query job 10d1afa3-349b-49a8-adbd-79a8309ce77c is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -847,7 +855,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-ad70713227cb8bc042079efcad7da546-jk4m' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_ad70713227cb8bc042079efcad7da546_jk4m'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-3c03836c2044bf625d02e25ccdbfe101-k1m4' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_3c03836c2044bf625d02e25ccdbfe101_k1m4'.\n" ] } ], @@ -862,7 +870,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 33, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -875,7 +883,7 @@ { "data": { "text/html": [ - "Query job b305edeb-cfcf-4d8d-a604-ec1c3d92a620 is DONE. 58.3 kB processed. Open Job" + "Query job 33aff336-48d6-4caa-8cae-f459d21b180e is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -887,7 +895,7 @@ { "data": { "text/html": [ - "Query job 1c58f01b-a1ae-4c58-a9cc-554572458b64 is DONE. 157.4 kB processed. Open Job" + "Query job 561e0aa7-3962-4ef3-b308-a117a0ac3a7d is DONE. 157.4 kB processed. Open Job" ], "text/plain": [ "" @@ -899,7 +907,7 @@ { "data": { "text/html": [ - "Query job c8a54ac3-8d75-4868-8f7e-aa7608a4ad08 is DONE. 99.0 kB processed. Open Job" + "Query job 759dccf8-3d88-40e1-a38a-2a2064e1d269 is DONE. 99.0 kB processed. Open Job" ], "text/plain": [ "" @@ -984,7 +992,7 @@ "654 Astros Angels 143 3 hrs" ] }, - "execution_count": 14, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1010,7 +1018,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 34, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1023,7 +1031,7 @@ { "data": { "text/html": [ - "Query job d766e0f8-ff75-4f88-8fef-b6da77f55d50 is DONE. 0 Bytes processed. Open Job" + "Query job e2a44878-2564-44a5-8dec-b7ea2f42afd4 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1049,7 +1057,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 35, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1062,7 +1070,7 @@ { "data": { "text/html": [ - "Query job d7bde249-ad7e-49e1-9391-030acd28b7e7 is DONE. 60.5 kB processed. Open Job" + "Query job bcfab000-ca19-4633-bf0e-45e7d053f3eb is DONE. 60.5 kB processed. Open Job" ], "text/plain": [ "" @@ -1074,7 +1082,7 @@ { "data": { "text/html": [ - "Query job 0f87f6c0-18df-4a63-9314-d05e3fabb052 is DONE. 388.3 kB processed. Open Job" + "Query job 139a6449-c07e-41ff-9aed-c6fdd633740a is DONE. 388.3 kB processed. Open Job" ], "text/plain": [ "" @@ -1086,7 +1094,7 @@ { "data": { "text/html": [ - "Query job 7b14b63e-8833-40c0-87de-cb58101afefd is DONE. 330.0 kB processed. Open Job" + "Query job 035fa2fb-0a55-4358-bb50-3ef915f5bf54 is DONE. 330.0 kB processed. Open Job" ], "text/plain": [ "" @@ -1124,61 +1132,61 @@ " \n", " \n", " \n", - " 351\n", - " Astros\n", - " Angels\n", - " 212\n", - " gAAAAABmoKZY3GepMRcGsaXKAGivl7fqn2hIt1dvwnPpDE...\n", + " 641\n", + " American League\n", + " National League\n", + " 185\n", + " gAAAAABmo0n2I391cbYwIYeg8lyJq1MSFZatrtpvuUD5v-...\n", " \n", " \n", - " 654\n", - " Astros\n", + " 349\n", " Angels\n", - " 143\n", - " gAAAAABmoKZYwY-MKnwwgU3enh3jPVYF2q3Djl3Tn2J8bW...\n", + " Astros\n", + " 187\n", + " gAAAAABmo0n2pX-siRwl2tIZA4m--swndC_b7vgGXrqSNM...\n", " \n", " \n", - " 2295\n", - " Astros\n", + " 2349\n", " Angels\n", - " 204\n", - " gAAAAABmoKZYFe-AMB_hl9O-4KaKYb_UvxF-EhA6B6qSqu...\n", + " Astros\n", + " 160\n", + " gAAAAABmo0n28Q9RwH62HvYRhTDpQ9lo8c6G8F5bnn7wgF...\n", " \n", " \n", - " 624\n", - " Astros\n", + " 557\n", " Angels\n", - " 173\n", - " gAAAAABmoKZYWpas2Cd8MXRWl6A4q7CCt9QDVU_-pFH7WB...\n", + " Astros\n", + " 166\n", + " gAAAAABmo0n2YlwHlSGQ0_XvXd-QVBtB_Lq2zUifu7vKhg...\n", " \n", " \n", - " 1843\n", - " Astros\n", + " 220\n", " Angels\n", - " 182\n", - " gAAAAABmoKZYKDxZqGkwBxQlFMCbG02zYiOSdKVe5CMM5u...\n", + " Astros\n", + " 162\n", + " gAAAAABmo0n2l8HMSGKYizxfEmRvGQy96mrjwx734-Rl_Z...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " homeTeamName awayTeamName duration_minutes \\\n", - "351 Astros Angels 212 \n", - "654 Astros Angels 143 \n", - "2295 Astros Angels 204 \n", - "624 Astros Angels 173 \n", - "1843 Astros Angels 182 \n", + " homeTeamName awayTeamName duration_minutes \\\n", + "641 American League National League 185 \n", + "349 Angels Astros 187 \n", + "2349 Angels Astros 160 \n", + "557 Angels Astros 166 \n", + "220 Angels Astros 162 \n", "\n", " homeTeamNameRedacted \n", - "351 gAAAAABmoKZY3GepMRcGsaXKAGivl7fqn2hIt1dvwnPpDE... \n", - "654 gAAAAABmoKZYwY-MKnwwgU3enh3jPVYF2q3Djl3Tn2J8bW... \n", - "2295 gAAAAABmoKZYFe-AMB_hl9O-4KaKYb_UvxF-EhA6B6qSqu... \n", - "624 gAAAAABmoKZYWpas2Cd8MXRWl6A4q7CCt9QDVU_-pFH7WB... \n", - "1843 gAAAAABmoKZYKDxZqGkwBxQlFMCbG02zYiOSdKVe5CMM5u... " + "641 gAAAAABmo0n2I391cbYwIYeg8lyJq1MSFZatrtpvuUD5v-... \n", + "349 gAAAAABmo0n2pX-siRwl2tIZA4m--swndC_b7vgGXrqSNM... \n", + "2349 gAAAAABmo0n28Q9RwH62HvYRhTDpQ9lo8c6G8F5bnn7wgF... \n", + "557 gAAAAABmo0n2YlwHlSGQ0_XvXd-QVBtB_Lq2zUifu7vKhg... \n", + "220 gAAAAABmo0n2l8HMSGKYizxfEmRvGQy96mrjwx734-Rl_Z... " ] }, - "execution_count": 16, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1203,7 +1211,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -1213,13 +1221,13 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job a7bd215e-2ecf-4acb-9b75-b8a58da66a3a is DONE. 0 Bytes processed. Open Job" + "Query job af73ab2d-8d88-4cbe-863f-d35e48af84e1 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1232,7 +1240,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session8fa7c6-6ffb0fce0234ca767920e183b2b8f51e-8wyw' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session8fa7c6_6ffb0fce0234ca767920e183b2b8f51e_8wyw'.\n" + "Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-a5e21a4ad488ce8b90de19c3c8cd33b6-0ab2' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_a5e21a4ad488ce8b90de19c3c8cd33b6_0ab2'.\n" ] } ], @@ -1247,13 +1255,13 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job d3a30741-c64a-4eef-8fa3-5f1d5ab5d4f3 is DONE. 58.3 kB processed. Open Job" + "Query job 0a9ac329-619d-4303-8dbd-176a576d4ce8 is DONE. 58.3 kB processed. Open Job" ], "text/plain": [ "" @@ -1265,7 +1273,7 @@ { "data": { "text/html": [ - "Query job 74f99ed6-ddb3-415c-9e4b-f31c4d56f345 is DONE. 162.2 kB processed. Open Job" + "Query job 456bb9b4-0576-4c04-b707-4a04496aa538 is DONE. 162.2 kB processed. Open Job" ], "text/plain": [ "" @@ -1277,7 +1285,7 @@ { "data": { "text/html": [ - "Query job b61e9036-0057-48c1-bc22-fc628d091ef2 is DONE. 103.9 kB processed. Open Job" + "Query job 37f59939-5d2c-4fb1-839b-282ae3702d3d is DONE. 103.9 kB processed. Open Job" ], "text/plain": [ "" @@ -1362,7 +1370,7 @@ "654 Astros Angels 143 2 hours" ] }, - "execution_count": 19, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } From 3dd30c37518a805736944f5a1c4a717d44de3d72 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Tue, 30 Jul 2024 00:12:11 +0000 Subject: [PATCH 3/4] update documentation --- bigframes/session/__init__.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 22ca63d25b..dfec83a56a 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1629,15 +1629,21 @@ def remote_function( `True` by default, which will result in reusing an existing remote function and corresponding cloud function (if any) that was previously created for the same udf. + Please note that for an unnamed (i.e. created without an explicit + `name` argument) remote function, the BigQuery DataFrames + session id is attached in the cloud artifacts names. So for the + effective reuse across the sessions it is recommended to create + the remote function with an explicit `name`. Setting it to `False` would force creating a unique remote function. If the required remote function does not exist then it would be created irrespective of this param. name (str, Optional): - Explicit name of the persisted BigQuery remote function. Use it with - caution, because two users working in the same project and dataset - could overwrite each other's remote functions if they use the same - persistent name. When an explicit name is provided, any session - specific clean up (``bigframes.session.Session.close``/ + Explicit name of the persisted BigQuery remote function. Use it + with caution, because more than one users working in the same + project and dataset could overwrite each other's remote + functions if they use the same persistent name. When an explicit + name is provided, any session specific clean up ( + ``bigframes.session.Session.close``/ ``bigframes.pandas.close_session``/ ``bigframes.pandas.reset_session``/ ``bigframes.pandas.clean_up_by_session_id``) does not clean up From 3f36219e82e45558c7c3686ccdac1dfe58289782 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Tue, 30 Jul 2024 00:12:58 +0000 Subject: [PATCH 4/4] update documentation --- bigframes/functions/remote_function.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py index 005e9b5e42..d84fbcdbab 100644 --- a/bigframes/functions/remote_function.py +++ b/bigframes/functions/remote_function.py @@ -893,11 +893,16 @@ def remote_function( dynamically using the `bigquery_connection_client` assuming the user has necessary priviliges. The PROJECT_ID should be the same as the BigQuery connection project. reuse (bool, Optional): - Reuse the remote function if is already exists. - `True` by default, which results in reusing an existing remote + Reuse the remote function if already exists. + `True` by default, which will result in reusing an existing remote function and corresponding cloud function (if any) that was previously created for the same udf. - Setting it to `False` forces the creation of a unique remote function. + Please note that for an unnamed (i.e. created without an explicit + `name` argument) remote function, the BigQuery DataFrames + session id is attached in the cloud artifacts names. So for the + effective reuse across the sessions it is recommended to create + the remote function with an explicit `name`. + Setting it to `False` would force creating a unique remote function. If the required remote function does not exist then it would be created irrespective of this param. name (str, Optional):