From ffe09284b71bb2376f587d3092dfe1c71cdb70d9 Mon Sep 17 00:00:00 2001 From: vlif Date: Fri, 22 Apr 2016 09:48:04 -0700 Subject: [PATCH 001/241] Remove no longer used max_trace_rate flag from Python Cloud Debugger ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=120556133 --- src/googleclouddebugger/rate_limit.cc | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/googleclouddebugger/rate_limit.cc b/src/googleclouddebugger/rate_limit.cc index babae25..7a15ace 100644 --- a/src/googleclouddebugger/rate_limit.cc +++ b/src/googleclouddebugger/rate_limit.cc @@ -19,12 +19,6 @@ #include "rate_limit.h" -DEFINE_int64( - max_trace_rate, - 25000, - "maximum number of Python trace callbacks per second before all " - "breakpoints are disabled"); - DEFINE_int32( max_condition_lines_rate, 5000, @@ -47,7 +41,6 @@ namespace cdbg { static const double kMaxTraceRateCapacityFactor = 10; static const double kConditionCostCapacityFactor = 0.1; -static std::unique_ptr g_trace_quota; static std::unique_ptr g_global_condition_quota; @@ -57,12 +50,6 @@ static int64 GetBaseConditionQuotaCapacity() { void LazyInitializeRateLimit() { - if (g_trace_quota == nullptr) { - g_trace_quota.reset(new LeakyBucket( - FLAGS_max_trace_rate * kMaxTraceRateCapacityFactor, - FLAGS_max_trace_rate)); - } - if (g_global_condition_quota == nullptr) { g_global_condition_quota.reset(new LeakyBucket( GetBaseConditionQuotaCapacity(), @@ -72,16 +59,10 @@ void LazyInitializeRateLimit() { void CleanupRateLimit() { - g_trace_quota = nullptr; g_global_condition_quota = nullptr; } -LeakyBucket* GetTraceQuota() { - return g_trace_quota.get(); -} - - LeakyBucket* GetGlobalConditionQuota() { return g_global_condition_quota.get(); } From 7d72a6e2b46112ee72bfee200ca8f2df7a51299b Mon Sep 17 00:00:00 2001 From: danielsb Date: Tue, 31 May 2016 11:49:21 -0700 Subject: [PATCH 002/241] Fix documentation of the enable_service_account_auth flag. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=123666355 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7e1362f..1b091e5 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,7 @@ _Option A_: add this code to the beginning of your `main()` function: try: import googleclouddebugger googleclouddebugger.AttachDebugger( - enable_service_account=True, + enable_service_account_auth=True, project_id='my-gcp-project-id', project_number='123456789', service_account_email='123@developer.gserviceaccount.com', From 538a8a821395b1b98046ba9683765fc3b3504cbb Mon Sep 17 00:00:00 2001 From: danielsb Date: Tue, 31 May 2016 12:42:42 -0700 Subject: [PATCH 003/241] Add "LOGPOINT: " prefix to Python logpoints. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=123672037 --- src/googleclouddebugger/capture_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 6354890..cd5e9d6 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -467,7 +467,7 @@ def Log(self, frame): self._definition.get('logMessageFormat', ''), self._EvaluateExpressions(frame)) - self._log_message(message) + self._log_message('LOGPOINT: ' + message) return None def _EvaluateExpressions(self, frame): From 3af28025c2d83be2ef0e735acb1ad7a382576232 Mon Sep 17 00:00:00 2001 From: danielsb Date: Wed, 15 Jun 2016 16:49:00 -0700 Subject: [PATCH 004/241] Rename AttachDebugger to enable(), which is more Pythonic. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=125007958 --- README.md | 4 ++-- src/googleclouddebugger/__init__.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1b091e5..92b82e5 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ easy_install dist/google_python_cloud_debugger-*.egg # Attach Python Cloud Debugger try: import googleclouddebugger - googleclouddebugger.AttachDebugger() + googleclouddebugger.enable() except ImportError: pass ``` @@ -133,7 +133,7 @@ _Option A_: add this code to the beginning of your `main()` function: # Attach Python Cloud Debugger try: import googleclouddebugger - googleclouddebugger.AttachDebugger( + googleclouddebugger.enable( enable_service_account_auth=True, project_id='my-gcp-project-id', project_number='123456789', diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index ccbfc7a..2e1a4e0 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -39,9 +39,12 @@ __version__ = '1.9' _flags = None +_hub_client = None +_breakpoints_manager = None def _StartDebugger(): + """Configures and starts the debugger.""" global _hub_client global _breakpoints_manager @@ -55,7 +58,6 @@ def _StartDebugger(): capture_collector.log_warning_message = logging.warning capture_collector.log_error_message = logging.error - """Configures and starts the debugger.""" capture_collector.CaptureCollector.pretty_printers.append( appengine_pretty_printers.PrettyPrinter) @@ -112,7 +114,8 @@ def _DebuggerMain(): exec 'execfile(%r)' % app_path in globals, locals # pylint: disable=exec-used -def AttachDebugger(**kwargs): +# pylint: disable=invalid-name +def enable(**kwargs): """Starts the debugger for already running application. This function should only be called once. @@ -132,3 +135,6 @@ def AttachDebugger(**kwargs): _flags = kwargs _StartDebugger() + +# AttachDebugger is an alias for enable, preserved for compatibility. +AttachDebugger = enable From 7ff62ed8787959bf86839df11f483512887c7f95 Mon Sep 17 00:00:00 2001 From: danielsb Date: Mon, 20 Jun 2016 16:59:23 -0700 Subject: [PATCH 005/241] Increment minor version for the python debugger. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=125397044 --- src/googleclouddebugger/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 2e1a4e0..94f94a0 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -36,7 +36,7 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.9' +__version__ = '1.10' _flags = None _hub_client = None From 982efa6e3941a4027df52ef44f1ff683e4f6fb18 Mon Sep 17 00:00:00 2001 From: gigid Date: Thu, 23 Jun 2016 13:36:14 -0700 Subject: [PATCH 006/241] Changed python agent gcp hub client to use the auto generated labels. Prepared open source build of python debugger to use auto generated labels. Added copyright notices to labels.java and labels.py files, since we ship them inside the open source build (we auto generate them only in the kokoro build). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=125712348 --- src/googleclouddebugger/gcp_hub_client.py | 12 ++++--- src/googleclouddebugger/labels.py | 42 +++++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 src/googleclouddebugger/labels.py diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 68760f5..a3e3bb3 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -35,6 +35,7 @@ import oauth2client from oauth2client.contrib.gce import AppAssertionCredentials +import labels import cdbg_native as native import uniquifier_computer import googleclouddebugger @@ -56,13 +57,16 @@ # a map is optional environment variable that can be used to set the flag # (flags still take precedence). _DEBUGGEE_LABELS = { - 'module': 'GAE_MODULE_NAME', - 'version': 'GAE_MODULE_VERSION', - 'minorversion': 'GAE_MINOR_VERSION'} + labels.Debuggee.MODULE: 'GAE_MODULE_NAME', + labels.Debuggee.VERSION: 'GAE_MODULE_VERSION', + labels.Debuggee.MINOR_VERSION: 'GAE_MINOR_VERSION' +} # Debuggee labels used to format debuggee description (ordered). The minor # version is excluded for the sake of consistency with AppEngine UX. -_DESCRIPTION_LABELS = ['projectid', 'module', 'version'] +_DESCRIPTION_LABELS = [ + labels.Debuggee.PROJECT_ID, labels.Debuggee.MODULE, labels.Debuggee.VERSION +] class GcpHubClient(object): diff --git a/src/googleclouddebugger/labels.py b/src/googleclouddebugger/labels.py new file mode 100644 index 0000000..f289ed4 --- /dev/null +++ b/src/googleclouddebugger/labels.py @@ -0,0 +1,42 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines the keys of the well known labels used by the cloud debugger. + +DO NOT EDIT - This file is auto-generated +""" + + +class Breakpoint(object): + REQUEST_LOG_ID = 'requestlogid' + + SET_ALL = frozenset([ + 'requestlogid', + ]) + +class Debuggee(object): + MINOR_VERSION = 'minorversion' + MODULE = 'module' + VERSION = 'version' + PROJECT_ID = 'projectid' + DOMAIN = 'domain' + + SET_ALL = frozenset([ + 'minorversion', + 'module', + 'version', + 'projectid', + 'domain', + ]) + From 13a450aeff76100b370c697dfde706baec0d04dc Mon Sep 17 00:00:00 2001 From: gigid Date: Fri, 24 Jun 2016 16:35:18 -0700 Subject: [PATCH 007/241] Modified python agent to include the request log id into the breakpoint labels for GAE Classic applications only. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=125829312 --- src/googleclouddebugger/capture_collector.py | 22 ++++++++++++++++++++ src/googleclouddebugger/labels.py | 22 ++++++++++---------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index cd5e9d6..30a9711 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -24,6 +24,7 @@ import sys import types +import labels import cdbg_native as native # Externally defined functions to actually log a message. If these variables @@ -32,6 +33,9 @@ log_warning_message = None log_error_message = None +# Externally defined function to collect the request log id. +request_log_id_collector = None + _PRIMITIVE_TYPES = (int, long, float, complex, str, unicode, bool, types.NoneType) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) @@ -164,6 +168,8 @@ def Collect(self, top_frame): # didn't make it point to var_index of 0 ("buffer full") self.TrimVariableTable(i) + self._CaptureRequestLogId() + def CaptureFrameLocals(self, frame): """Captures local variables and arguments of the specified frame. @@ -381,6 +387,22 @@ def ProcessBufferFull(variables): ProcessBufferFull(self._var_table) ProcessBufferFull(self.breakpoint['evaluatedExpressions']) + def _CaptureRequestLogId(self): + """Captures the request log id if possible. + + The request log id is stored inside the breakpoint labels. + """ + # pylint: disable=not-callable + if callable(request_log_id_collector): + request_log_id = request_log_id_collector() + if request_log_id: + # We have a request_log_id, save it into the breakpoint labels + if 'labels' not in self.breakpoint: + self.breakpoint['labels'] = {} + + self.breakpoint['labels'][ + labels.Breakpoint.REQUEST_LOG_ID] = request_log_id + @staticmethod def _NormalizePath(path): """Converts an absolute path to a relative one. diff --git a/src/googleclouddebugger/labels.py b/src/googleclouddebugger/labels.py index f289ed4..72751ff 100644 --- a/src/googleclouddebugger/labels.py +++ b/src/googleclouddebugger/labels.py @@ -18,25 +18,25 @@ """ -class Breakpoint(object): - REQUEST_LOG_ID = 'requestlogid' - - SET_ALL = frozenset([ - 'requestlogid', - ]) - class Debuggee(object): MINOR_VERSION = 'minorversion' - MODULE = 'module' - VERSION = 'version' PROJECT_ID = 'projectid' + MODULE = 'module' DOMAIN = 'domain' + VERSION = 'version' SET_ALL = frozenset([ 'minorversion', - 'module', - 'version', 'projectid', + 'module', 'domain', + 'version', + ]) + +class Breakpoint(object): + REQUEST_LOG_ID = 'requestlogid' + + SET_ALL = frozenset([ + 'requestlogid', ]) From fdfad1ea9756cb9daf3cf8b24b22dd066e594ead Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 14 Jul 2016 09:42:50 -0700 Subject: [PATCH 008/241] Modify open-source README docs for Java and Python agents to explicitly list the dependencies and how to install them. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=126707026 --- README.md | 13 ++++++++++--- src/build.sh | 7 ++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 92b82e5..51060e7 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,7 @@ and install the debugger agent with: easy_install google_python_cloud_debugger-py2.7-linux-x86_64.egg ``` -You can also build the agent from source code (OS dependencies are listed in -[build.sh](https://github.com/GoogleCloudPlatform/cloud-debug-python/blob/master/src/build.sh) -script): +You can also build the agent from source code: ```shell git clone https://github.com/GoogleCloudPlatform/cloud-debug-python.git @@ -70,6 +68,15 @@ cd cloud-debug-python/src/ easy_install dist/google_python_cloud_debugger-*.egg ``` +Note that the build script assumes some dependencies. To install these +dependencies on Debian, run this command: + +```shell +sudo apt-get -y -q --no-install-recommends install \ + curl ca-certificates gcc build-essential cmake \ + python python-dev libpython2.7 python-setuptools +``` + ## Setup ### Google Compute Engine diff --git a/src/build.sh b/src/build.sh index 2b3b16c..f9b61be 100644 --- a/src/build.sh +++ b/src/build.sh @@ -20,9 +20,10 @@ # debugger is currently only supported on Linux. # # The build script assumes Python, cmake, curl and gcc are installed. -# To install those on Debian, run this commandd: -# sudo apt-get install curl ca-certificates gcc build-essential cmake \ -# python python-dev libpython2.7 python-setuptools +# To install these dependencies on Debian, run this commandd: +# sudo apt-get -y -q --no-install-recommends install \ +# curl ca-certificates gcc build-essential cmake \ +# python python-dev libpython2.7 python-setuptools # # The Python Cloud Debugger agent uses glog and gflags libraries. We build them # first. Then we use setuptools to build the debugger agent. The entire From ec7cbb256e7beab806fe5346c306f7ebc13b1578 Mon Sep 17 00:00:00 2001 From: cpovirk Date: Wed, 20 Jul 2016 14:42:50 -0700 Subject: [PATCH 009/241] Set compatible_with=appengine on targets depended on by App Engine apps or by the App Engine runtime itself. This CL has no immediate impact on the targets, but it has two consequences moving forward: - When google3 moves to Java 8, compatible_with=appengine targets will remain on Java 7. (If this target is a non-java_* target, you can ignore this bullet.) - compatible_with=appengine targets can depend only on other compatible_with=appengine targets. (Fortunately, most common libraries will be compatible_with=appengine: http://gpaste/5720237664108544) This CL is one of thousands in a migration on the critical path for Java 8, so please respond promptly, and, if you are the wrong reviewer, please reassign the CL. More information: http://go/compatible_with_appengine_lsc Tested: blaze build --nobuild ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=127990496 --- src/googleclouddebugger/labels.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/googleclouddebugger/labels.py b/src/googleclouddebugger/labels.py index 72751ff..6e2999e 100644 --- a/src/googleclouddebugger/labels.py +++ b/src/googleclouddebugger/labels.py @@ -18,25 +18,25 @@ """ +class Breakpoint(object): + REQUEST_LOG_ID = 'requestlogid' + + SET_ALL = frozenset([ + 'requestlogid', + ]) + class Debuggee(object): - MINOR_VERSION = 'minorversion' + DOMAIN = 'domain' PROJECT_ID = 'projectid' MODULE = 'module' - DOMAIN = 'domain' VERSION = 'version' + MINOR_VERSION = 'minorversion' SET_ALL = frozenset([ - 'minorversion', + 'domain', 'projectid', 'module', - 'domain', 'version', - ]) - -class Breakpoint(object): - REQUEST_LOG_ID = 'requestlogid' - - SET_ALL = frozenset([ - 'requestlogid', + 'minorversion', ]) From 42d4393ac4f50384cf0ab191c8d1917f30647850 Mon Sep 17 00:00:00 2001 From: erezh Date: Fri, 22 Jul 2016 16:09:06 -0700 Subject: [PATCH 010/241] Automated g4 rollback of changelist 125829312. *** Reason for rollback *** Breaks Cloud Debugger for GAE *** Original change description *** Modified python agent to include the request log id into the breakpoint labels for GAE Classic applications only. *** ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=128221983 --- src/googleclouddebugger/capture_collector.py | 22 -------------------- 1 file changed, 22 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 30a9711..cd5e9d6 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -24,7 +24,6 @@ import sys import types -import labels import cdbg_native as native # Externally defined functions to actually log a message. If these variables @@ -33,9 +32,6 @@ log_warning_message = None log_error_message = None -# Externally defined function to collect the request log id. -request_log_id_collector = None - _PRIMITIVE_TYPES = (int, long, float, complex, str, unicode, bool, types.NoneType) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) @@ -168,8 +164,6 @@ def Collect(self, top_frame): # didn't make it point to var_index of 0 ("buffer full") self.TrimVariableTable(i) - self._CaptureRequestLogId() - def CaptureFrameLocals(self, frame): """Captures local variables and arguments of the specified frame. @@ -387,22 +381,6 @@ def ProcessBufferFull(variables): ProcessBufferFull(self._var_table) ProcessBufferFull(self.breakpoint['evaluatedExpressions']) - def _CaptureRequestLogId(self): - """Captures the request log id if possible. - - The request log id is stored inside the breakpoint labels. - """ - # pylint: disable=not-callable - if callable(request_log_id_collector): - request_log_id = request_log_id_collector() - if request_log_id: - # We have a request_log_id, save it into the breakpoint labels - if 'labels' not in self.breakpoint: - self.breakpoint['labels'] = {} - - self.breakpoint['labels'][ - labels.Breakpoint.REQUEST_LOG_ID] = request_log_id - @staticmethod def _NormalizePath(path): """Converts an absolute path to a relative one. From 9492633f186810db892f6d59a7047305f2983763 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Fri, 12 Aug 2016 09:00:56 -0700 Subject: [PATCH 011/241] Fix Python E2E tests. See bug for an explanation of the problem. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=130105263 --- src/googleclouddebugger/gcp_hub_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index a3e3bb3..dbeb34e 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -188,7 +188,7 @@ def EnableGceAuth(self): service. It is done lazily from the worker thread. The motivation is to speed up initialization and be able to recover from failures. """ - self._credentials = AppAssertionCredentials(_CLOUD_PLATFORM_SCOPE) + self._credentials = AppAssertionCredentials() self._project_id = lambda: self._QueryGcpProject('project-id') self._project_number = lambda: self._QueryGcpProject('numeric-project-id') From ac04fbb33d2d75b63c8aaeece313b7707dce2c9f Mon Sep 17 00:00:00 2001 From: gigid Date: Mon, 15 Aug 2016 10:05:18 -0700 Subject: [PATCH 012/241] Modified python agent to include the request log id into the breakpoint labels for GAE Classic applications only. This is a re-submit of cl/125829312 that was rolled back. It should work now that we checked in the modified serializer to/from json/protobuf[2|3]. I will send another CL for the updated aeta python test, i prefer having the test get in into a separate CL (so that @emrekultursay can run the plain test the day after we check in against nightly and also the updated one). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=130292452 --- src/googleclouddebugger/capture_collector.py | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index cd5e9d6..30a9711 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -24,6 +24,7 @@ import sys import types +import labels import cdbg_native as native # Externally defined functions to actually log a message. If these variables @@ -32,6 +33,9 @@ log_warning_message = None log_error_message = None +# Externally defined function to collect the request log id. +request_log_id_collector = None + _PRIMITIVE_TYPES = (int, long, float, complex, str, unicode, bool, types.NoneType) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) @@ -164,6 +168,8 @@ def Collect(self, top_frame): # didn't make it point to var_index of 0 ("buffer full") self.TrimVariableTable(i) + self._CaptureRequestLogId() + def CaptureFrameLocals(self, frame): """Captures local variables and arguments of the specified frame. @@ -381,6 +387,22 @@ def ProcessBufferFull(variables): ProcessBufferFull(self._var_table) ProcessBufferFull(self.breakpoint['evaluatedExpressions']) + def _CaptureRequestLogId(self): + """Captures the request log id if possible. + + The request log id is stored inside the breakpoint labels. + """ + # pylint: disable=not-callable + if callable(request_log_id_collector): + request_log_id = request_log_id_collector() + if request_log_id: + # We have a request_log_id, save it into the breakpoint labels + if 'labels' not in self.breakpoint: + self.breakpoint['labels'] = {} + + self.breakpoint['labels'][ + labels.Breakpoint.REQUEST_LOG_ID] = request_log_id + @staticmethod def _NormalizePath(path): """Converts an absolute path to a relative one. From 9fbff2157bac3d3b0a01f622c57784afa34b8558 Mon Sep 17 00:00:00 2001 From: danielsb Date: Mon, 15 Aug 2016 10:19:54 -0700 Subject: [PATCH 013/241] Fix location information for logpoint output. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=130294233 --- src/googleclouddebugger/__init__.py | 4 +- src/googleclouddebugger/capture_collector.py | 102 +++++++++++++------ 2 files changed, 71 insertions(+), 35 deletions(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 94f94a0..8c02fe3 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -54,9 +54,7 @@ def _StartDebugger(): _breakpoints_manager = breakpoints_manager.BreakpointsManager(_hub_client) # Set up loggers for logpoints. - capture_collector.log_info_message = logging.info - capture_collector.log_warning_message = logging.warning - capture_collector.log_error_message = logging.error + capture_collector.SetLogger(logging.getLogger()) capture_collector.CaptureCollector.pretty_printers.append( appengine_pretty_printers.PrettyPrinter) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 30a9711..79b284d 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -19,6 +19,7 @@ import copy import datetime import inspect +import logging import os import re import sys @@ -49,6 +50,70 @@ INVALID_EXPRESSION_INDEX = '' +def NormalizePath(path): + """Removes any Python system path prefix from the given path. + + Python keeps almost all paths absolute. This is not what we actually + want to return. This loops through system paths (directories in which + Python will load modules). If "path" is relative to one of them, the + directory prefix is removed. + + Args: + path: absolute path to normalize (relative paths will not be altered) + + Returns: + Relative path if "path" is within one of the sys.path directories or + the input otherwise. + """ + path = os.path.normpath(path) + + for sys_path in sys.path: + if not sys_path: + continue + + # Append '/' at the end of the path if it's not there already. + sys_path = os.path.join(sys_path, '') + + if path.startswith(sys_path): + return path[len(sys_path):] + + return path + + +class LineNoFilter(logging.Filter): + """Enables overriding the path and line number in a logging record. + + The "extra" parameter in logging cannot override existing fields in log + record, so we can't use it to directly set pathname and lineno. Instead, + we add this filter to the default logger, and it looks for "cdbg_pathname" + and "cdbg_lineno", moving them to the pathname and lineno fields accordingly. + """ + + def filter(self, record): + # This method gets invoked for user-generated logging, so verify that this + # particular invocation came from our logging code. + if record.pathname != inspect.currentframe().f_code.co_filename: + return True + if hasattr(record, 'cdbg_pathname'): + record.pathname = record.cdbg_pathname + del record.cdbg_pathname + if hasattr(record, 'cdbg_lineno'): + record.lineno = record.cdbg_lineno + del record.cdbg_lineno + return True + + +def SetLogger(logger): + """Sets the logger object to use for all 'LOG' breakpoint actions.""" + global log_info_message + global log_warning_message + global log_error_message + log_info_message = logger.info + log_warning_message = logger.warning + log_error_message = logger.error + logger.addFilter(LineNoFilter()) + + class CaptureCollector(object): """Captures application state snapshot. @@ -145,7 +210,7 @@ def Collect(self, top_frame): breakpoint_frames.append({ 'function': code.co_name, 'location': { - 'path': CaptureCollector._NormalizePath(code.co_filename), + 'path': NormalizePath(code.co_filename), 'line': frame.f_lineno}, 'arguments': frame_arguments, 'locals': frame_locals}) @@ -403,36 +468,6 @@ def _CaptureRequestLogId(self): self.breakpoint['labels'][ labels.Breakpoint.REQUEST_LOG_ID] = request_log_id - @staticmethod - def _NormalizePath(path): - """Converts an absolute path to a relative one. - - Python keeps almost all paths absolute. This is not what we actually - want to return. This loops through system paths (directories in which - Python will load modules). If "path" is relative to one of them, the - directory prefix is removed. - - Args: - path: absolute path to normalize (relative paths will not be altered) - - Returns: - Relative path if "path" is within one of the sys.path directories or - the input otherwise. - """ - path = os.path.normpath(path) - - for sys_path in sys.path: - if not sys_path: - continue - - # Append '/' at the end of the path if it's not there already. - sys_path = os.path.join(sys_path, '') - - if path.startswith(sys_path): - return path[len(sys_path):] - - return path - class LogCollector(object): """Captures minimal application snapshot and logs it to application log. @@ -489,7 +524,10 @@ def Log(self, frame): self._definition.get('logMessageFormat', ''), self._EvaluateExpressions(frame)) - self._log_message('LOGPOINT: ' + message) + self._log_message('LOGPOINT: ' + message, extra={ + 'cdbg_pathname': NormalizePath(frame.f_code.co_filename), + 'cdbg_lineno': frame.f_lineno + }) return None def _EvaluateExpressions(self, frame): From ca43ea9d63e03a3f920f42b558b32153f1add019 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Mon, 15 Aug 2016 10:33:05 -0700 Subject: [PATCH 014/241] Increment Python opensource version to 1.11 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=130295829 --- src/googleclouddebugger/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 8c02fe3..630ca8f 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -36,7 +36,7 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.10' +__version__ = '1.11' _flags = None _hub_client = None From 2ffc9f9520507baa982de512da84af33866ccdc9 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Mon, 15 Aug 2016 12:21:45 -0700 Subject: [PATCH 015/241] Automated g4 rollback of changelist 130294233. *** Reason for rollback *** Crashes with 'module' object has no attribute 'getLogger' *** Original change description *** Fix location information for logpoint output. *** ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=130309700 --- src/googleclouddebugger/__init__.py | 4 +- src/googleclouddebugger/capture_collector.py | 102 ++++++------------- 2 files changed, 35 insertions(+), 71 deletions(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 630ca8f..9a21d82 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -54,7 +54,9 @@ def _StartDebugger(): _breakpoints_manager = breakpoints_manager.BreakpointsManager(_hub_client) # Set up loggers for logpoints. - capture_collector.SetLogger(logging.getLogger()) + capture_collector.log_info_message = logging.info + capture_collector.log_warning_message = logging.warning + capture_collector.log_error_message = logging.error capture_collector.CaptureCollector.pretty_printers.append( appengine_pretty_printers.PrettyPrinter) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 79b284d..30a9711 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -19,7 +19,6 @@ import copy import datetime import inspect -import logging import os import re import sys @@ -50,70 +49,6 @@ INVALID_EXPRESSION_INDEX = '' -def NormalizePath(path): - """Removes any Python system path prefix from the given path. - - Python keeps almost all paths absolute. This is not what we actually - want to return. This loops through system paths (directories in which - Python will load modules). If "path" is relative to one of them, the - directory prefix is removed. - - Args: - path: absolute path to normalize (relative paths will not be altered) - - Returns: - Relative path if "path" is within one of the sys.path directories or - the input otherwise. - """ - path = os.path.normpath(path) - - for sys_path in sys.path: - if not sys_path: - continue - - # Append '/' at the end of the path if it's not there already. - sys_path = os.path.join(sys_path, '') - - if path.startswith(sys_path): - return path[len(sys_path):] - - return path - - -class LineNoFilter(logging.Filter): - """Enables overriding the path and line number in a logging record. - - The "extra" parameter in logging cannot override existing fields in log - record, so we can't use it to directly set pathname and lineno. Instead, - we add this filter to the default logger, and it looks for "cdbg_pathname" - and "cdbg_lineno", moving them to the pathname and lineno fields accordingly. - """ - - def filter(self, record): - # This method gets invoked for user-generated logging, so verify that this - # particular invocation came from our logging code. - if record.pathname != inspect.currentframe().f_code.co_filename: - return True - if hasattr(record, 'cdbg_pathname'): - record.pathname = record.cdbg_pathname - del record.cdbg_pathname - if hasattr(record, 'cdbg_lineno'): - record.lineno = record.cdbg_lineno - del record.cdbg_lineno - return True - - -def SetLogger(logger): - """Sets the logger object to use for all 'LOG' breakpoint actions.""" - global log_info_message - global log_warning_message - global log_error_message - log_info_message = logger.info - log_warning_message = logger.warning - log_error_message = logger.error - logger.addFilter(LineNoFilter()) - - class CaptureCollector(object): """Captures application state snapshot. @@ -210,7 +145,7 @@ def Collect(self, top_frame): breakpoint_frames.append({ 'function': code.co_name, 'location': { - 'path': NormalizePath(code.co_filename), + 'path': CaptureCollector._NormalizePath(code.co_filename), 'line': frame.f_lineno}, 'arguments': frame_arguments, 'locals': frame_locals}) @@ -468,6 +403,36 @@ def _CaptureRequestLogId(self): self.breakpoint['labels'][ labels.Breakpoint.REQUEST_LOG_ID] = request_log_id + @staticmethod + def _NormalizePath(path): + """Converts an absolute path to a relative one. + + Python keeps almost all paths absolute. This is not what we actually + want to return. This loops through system paths (directories in which + Python will load modules). If "path" is relative to one of them, the + directory prefix is removed. + + Args: + path: absolute path to normalize (relative paths will not be altered) + + Returns: + Relative path if "path" is within one of the sys.path directories or + the input otherwise. + """ + path = os.path.normpath(path) + + for sys_path in sys.path: + if not sys_path: + continue + + # Append '/' at the end of the path if it's not there already. + sys_path = os.path.join(sys_path, '') + + if path.startswith(sys_path): + return path[len(sys_path):] + + return path + class LogCollector(object): """Captures minimal application snapshot and logs it to application log. @@ -524,10 +489,7 @@ def Log(self, frame): self._definition.get('logMessageFormat', ''), self._EvaluateExpressions(frame)) - self._log_message('LOGPOINT: ' + message, extra={ - 'cdbg_pathname': NormalizePath(frame.f_code.co_filename), - 'cdbg_lineno': frame.f_lineno - }) + self._log_message('LOGPOINT: ' + message) return None def _EvaluateExpressions(self, frame): From 6c713aa7339b90641341b920c78d7f116b6a65a7 Mon Sep 17 00:00:00 2001 From: danielsb Date: Wed, 17 Aug 2016 15:29:34 -0700 Subject: [PATCH 016/241] Fix location information for logpoint output for GAE and open source environments. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=130571025 --- src/googleclouddebugger/__init__.py | 4 +- src/googleclouddebugger/capture_collector.py | 113 ++++++++++++++----- 2 files changed, 83 insertions(+), 34 deletions(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 9a21d82..630ca8f 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -54,9 +54,7 @@ def _StartDebugger(): _breakpoints_manager = breakpoints_manager.BreakpointsManager(_hub_client) # Set up loggers for logpoints. - capture_collector.log_info_message = logging.info - capture_collector.log_warning_message = logging.warning - capture_collector.log_error_message = logging.error + capture_collector.SetLogger(logging.getLogger()) capture_collector.CaptureCollector.pretty_printers.append( appengine_pretty_printers.PrettyPrinter) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 30a9711..ed61027 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -19,6 +19,7 @@ import copy import datetime import inspect +import logging import os import re import sys @@ -49,6 +50,82 @@ INVALID_EXPRESSION_INDEX = '' +def NormalizePath(path): + """Removes any Python system path prefix from the given path. + + Python keeps almost all paths absolute. This is not what we actually + want to return. This loops through system paths (directories in which + Python will load modules). If "path" is relative to one of them, the + directory prefix is removed. + + Args: + path: absolute path to normalize (relative paths will not be altered) + + Returns: + Relative path if "path" is within one of the sys.path directories or + the input otherwise. + """ + path = os.path.normpath(path) + + for sys_path in sys.path: + if not sys_path: + continue + + # Append '/' at the end of the path if it's not there already. + sys_path = os.path.join(sys_path, '') + + if path.startswith(sys_path): + return path[len(sys_path):] + + return path + + +class LineNoFilter(logging.Filter): + """Enables overriding the path and line number in a logging record. + + The "extra" parameter in logging cannot override existing fields in log + record, so we can't use it to directly set pathname and lineno. Instead, + we add this filter to the default logger, and it looks for "cdbg_pathname" + and "cdbg_lineno", moving them to the pathname and lineno fields accordingly. + """ + + def filter(self, record): + # This method gets invoked for user-generated logging, so verify that this + # particular invocation came from our logging code. + if record.pathname != inspect.currentframe().f_code.co_filename: + return True + pathname, lineno = GetLoggingFileAndLine() + if pathname and lineno: + record.pathname = pathname + record.lineno = lineno + return True + + +def GetLoggingFileAndLine(): + """Search for and return the file and line number from the log collector.""" + frame = inspect.currentframe() + this_file = frame.f_code.co_filename + frame = frame.f_back + while frame: + if this_file == frame.f_code.co_filename: + if 'cdbg_logging_pathname' in frame.f_locals: + return (frame.f_locals['cdbg_logging_pathname'], + frame.f_locals.get('cdbg_logging_lineno', None)) + frame = frame.f_back + return (None, None) + + +def SetLogger(logger): + """Sets the logger object to use for all 'LOG' breakpoint actions.""" + global log_info_message + global log_warning_message + global log_error_message + log_info_message = logger.info + log_warning_message = logger.warning + log_error_message = logger.error + logger.addFilter(LineNoFilter()) + + class CaptureCollector(object): """Captures application state snapshot. @@ -145,7 +222,7 @@ def Collect(self, top_frame): breakpoint_frames.append({ 'function': code.co_name, 'location': { - 'path': CaptureCollector._NormalizePath(code.co_filename), + 'path': NormalizePath(code.co_filename), 'line': frame.f_lineno}, 'arguments': frame_arguments, 'locals': frame_locals}) @@ -403,36 +480,6 @@ def _CaptureRequestLogId(self): self.breakpoint['labels'][ labels.Breakpoint.REQUEST_LOG_ID] = request_log_id - @staticmethod - def _NormalizePath(path): - """Converts an absolute path to a relative one. - - Python keeps almost all paths absolute. This is not what we actually - want to return. This loops through system paths (directories in which - Python will load modules). If "path" is relative to one of them, the - directory prefix is removed. - - Args: - path: absolute path to normalize (relative paths will not be altered) - - Returns: - Relative path if "path" is within one of the sys.path directories or - the input otherwise. - """ - path = os.path.normpath(path) - - for sys_path in sys.path: - if not sys_path: - continue - - # Append '/' at the end of the path if it's not there already. - sys_path = os.path.join(sys_path, '') - - if path.startswith(sys_path): - return path[len(sys_path):] - - return path - class LogCollector(object): """Captures minimal application snapshot and logs it to application log. @@ -489,7 +536,11 @@ def Log(self, frame): self._definition.get('logMessageFormat', ''), self._EvaluateExpressions(frame)) + cdbg_logging_pathname = NormalizePath(frame.f_code.co_filename) + cdbg_logging_lineno = frame.f_lineno self._log_message('LOGPOINT: ' + message) + del cdbg_logging_pathname + del cdbg_logging_lineno return None def _EvaluateExpressions(self, frame): From 24f7eff089a1fff184baf239e27fb87dd83acc72 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Fri, 19 Aug 2016 13:25:43 -0700 Subject: [PATCH 017/241] Automated g4 rollback of changelist 130292452. *** Reason for rollback *** Broken GAE nightly python_cloud_debugger_aeta_test. Too hard to debug. We should verify the CL in PDC first. *** Original change description *** Modified python agent to include the request log id into the breakpoint labels for GAE Classic applications only. This is a re-submit of cl/125829312 that was rolled back. It should work now that we checked in the modified serializer to/from json/protobuf[2|3]. I will send another CL for the updated aeta python test, i prefer having the test get in into a separate CL (so that @emrekultursay can run the plain test the day after we check in against nightly and also the updated one). *** ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=130785034 --- src/googleclouddebugger/capture_collector.py | 22 -------------------- 1 file changed, 22 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index ed61027..131a7ec 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -25,7 +25,6 @@ import sys import types -import labels import cdbg_native as native # Externally defined functions to actually log a message. If these variables @@ -34,9 +33,6 @@ log_warning_message = None log_error_message = None -# Externally defined function to collect the request log id. -request_log_id_collector = None - _PRIMITIVE_TYPES = (int, long, float, complex, str, unicode, bool, types.NoneType) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) @@ -245,8 +241,6 @@ def Collect(self, top_frame): # didn't make it point to var_index of 0 ("buffer full") self.TrimVariableTable(i) - self._CaptureRequestLogId() - def CaptureFrameLocals(self, frame): """Captures local variables and arguments of the specified frame. @@ -464,22 +458,6 @@ def ProcessBufferFull(variables): ProcessBufferFull(self._var_table) ProcessBufferFull(self.breakpoint['evaluatedExpressions']) - def _CaptureRequestLogId(self): - """Captures the request log id if possible. - - The request log id is stored inside the breakpoint labels. - """ - # pylint: disable=not-callable - if callable(request_log_id_collector): - request_log_id = request_log_id_collector() - if request_log_id: - # We have a request_log_id, save it into the breakpoint labels - if 'labels' not in self.breakpoint: - self.breakpoint['labels'] = {} - - self.breakpoint['labels'][ - labels.Breakpoint.REQUEST_LOG_ID] = request_log_id - class LogCollector(object): """Captures minimal application snapshot and logs it to application log. From e0501e0630a88a3ae37676951ea180143c69d10b Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Wed, 14 Sep 2016 15:14:20 -0700 Subject: [PATCH 018/241] Fix parsing of timestamp when nanos=0. One Platform team broke us with their change. They are rolling out a new version of ESF that omits '.000'. Details: https://groups.google.com/a/google.com/forum/#!searchin/api-discuss/timestamp%7Csort:relevance/api-discuss/-Ko9cvPV82o/yTEa0ARQBgAJ Our agents were unable to parse createTime field, and therefore, could not set a 24h deadline. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=133180683 --- src/googleclouddebugger/python_breakpoint.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 9651dd2..1791bba 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -122,9 +122,14 @@ def GetBreakpointId(self): def GetExpirationTime(self): """Computes the timestamp at which this breakpoint will expire.""" + # TODO(emrekultursay): Move this to a common method. + if '.' not in self.definition['createTime']: + fmt = '%Y-%m-%dT%H:%M:%S%Z' + else: + fmt = '%Y-%m-%dT%H:%M:%S.%f%Z' + create_datetime = datetime.strptime( - self.definition['createTime'].replace('Z', 'UTC'), - '%Y-%m-%dT%H:%M:%S.%f%Z') + self.definition['createTime'].replace('Z', 'UTC'), fmt) return create_datetime + self.expiration_period def ExpireBreakpoint(self): From ea06e7e3765263da7b3bdc9f44a31cdbbdf39189 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Mon, 19 Sep 2016 11:40:37 -0700 Subject: [PATCH 019/241] Move version to a separate file to break circular dependency of gcp_hub_client[test] and opensource/__init__ ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=133610884 --- src/googleclouddebugger/__init__.py | 7 ++----- src/googleclouddebugger/gcp_hub_client.py | 5 ++--- src/googleclouddebugger/version.py | 7 +++++++ src/setup.py | 4 ++-- 4 files changed, 13 insertions(+), 10 deletions(-) create mode 100644 src/googleclouddebugger/version.py diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 630ca8f..b1ef688 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -31,12 +31,9 @@ import capture_collector import cdbg_native import gcp_hub_client +import version -# Versioning scheme: MAJOR.MINOR -# The major version should only change on breaking changes. Minor version -# changes go between regular updates. Instances running debuggers with -# different major versions will show up as two different debuggees. -__version__ = '1.11' +__version__ = version.__version__ _flags = None _hub_client = None diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index dbeb34e..a977442 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -38,7 +38,7 @@ import labels import cdbg_native as native import uniquifier_computer -import googleclouddebugger +import version # This module catches all exception. This is safe because it runs in # a daemon thread (so we are not blocking Ctrl+C). We need to catch all @@ -425,8 +425,7 @@ def _QueryGcpProject(self, resource): def _GetDebuggee(self): """Builds the debuggee structure.""" - version = googleclouddebugger.__version__ - major_version = version.split('.')[0] + major_version = version.__version__.split('.')[0] debuggee = { 'project': self._project_number(), diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py new file mode 100644 index 0000000..91d8bfe --- /dev/null +++ b/src/googleclouddebugger/version.py @@ -0,0 +1,7 @@ +"""Version of the Google Python Cloud Debugger.""" + +# Versioning scheme: MAJOR.MINOR +# The major version should only change on breaking changes. Minor version +# changes go between regular updates. Instances running debuggers with +# different major versions will show up as two different debuggees. +__version__ = '1.11' diff --git a/src/setup.py b/src/setup.py index f747b8a..1409b56 100644 --- a/src/setup.py +++ b/src/setup.py @@ -74,9 +74,9 @@ def ReadConfig(section, value, default): # Unfortunately we can't do that because "googleclouddebugger" depends on # "cdbg_native" that hasn't been built yet. version = None -with open('googleclouddebugger/__init__.py', 'r') as init_file: +with open('googleclouddebugger/version.py', 'r') as version_file: version_pattern = re.compile(r"^\s*__version__\s*=\s*'([0-9.]*)'") - for line in init_file: + for line in version_file: match = version_pattern.match(line) if match: version = match.groups()[0] From c5c4c1a8089a0cba377f181a533ade0013ac17a5 Mon Sep 17 00:00:00 2001 From: danielsb Date: Mon, 19 Sep 2016 18:42:54 -0700 Subject: [PATCH 020/241] Change service auth to use JSON files, since p12 keyfiles are not well supported in oauth2client any more. Note that the integration test still uses the old code, because it runs against //third_party, and oauth2client.service_account didn't support ServiceAccountCredentials yet in that version. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=133660532 --- README.md | 13 +++---- src/googleclouddebugger/__init__.py | 22 +++++++++--- src/googleclouddebugger/gcp_hub_client.py | 44 ++++++++++++++++++----- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 51060e7..4d4d9f1 100644 --- a/README.md +++ b/README.md @@ -122,17 +122,16 @@ created in [Google Developers Console](https://console.developers.google.com). If your application runs on Google Compute Engine, [metadata service authentication](#Google_Compute_Engine) is an easier option. -The first step for this setup is to create the service account in .p12 format. +The first step for this setup is to create the service account in .json format. Please see this [page](https://cloud.google.com/storage/docs/authentication?hl=en#generating-a-private-key) for detailed instructions. If you don't have a Google Cloud Platform project, you can create one for free on [Google Developers Console](https://console.developers.google.com). Once you have the service account, please note the service account e-mail, [project ID and project number](https://developers.google.com/console/help/new/#projectnumber). -Then copy the .p12 file to all the machines that run your application. +Then copy the .json file to all the machines that run your application. -Then, enable the debugger agent in a similary way as described in -the [previous](#Google_Compute_Engine) section: +Then, enable the debugger agent using one of these two options: _Option A_: add this code to the beginning of your `main()` function: @@ -144,8 +143,7 @@ try: enable_service_account_auth=True, project_id='my-gcp-project-id', project_number='123456789', - service_account_email='123@developer.gserviceaccount.com', - service_account_p12_file='/opt/cdbg/gcp.p12') + service_account_json_file='/opt/cdbg/gcp.json') except ImportError: pass ``` @@ -158,8 +156,7 @@ python \ --enable_service_account_auth=1 \ --project_id=my-gcp-project-id \ --project_number=123456789 \ - --service_account_email=123@developer.gserviceaccount.com \ - --service_account_p12_file=/opt/cdbg/gcp.p12 \ + --service_account_json_file=/opt/cdbg/gcp.json \ -- \ myapp.py diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index b1ef688..b25f2cc 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -60,11 +60,23 @@ def _StartDebugger(): _breakpoints_manager.SetActiveBreakpoints) _hub_client.on_idle = _breakpoints_manager.CheckBreakpointsExpiration if _flags.get('enable_service_account_auth') in ('1', 'true', True): - _hub_client.EnableServiceAccountAuth( - _flags['project_id'], - _flags['project_number'], - _flags['service_account_email'], - _flags['service_account_p12_file']) + if _flags.get('service_account_p12_file'): + try: + _hub_client.EnableServiceAccountAuthP12( + _flags['project_id'], + _flags['project_number'], + _flags['service_account_email'], + _flags['service_account_p12_file']) + except NotImplementedError as e: + raise NotImplementedError( + '{0}\nYou must specify project_id, project_number, and ' + 'service_account_json_file in order to use service account ' + 'authentication.'.format(e)) + else: + _hub_client.EnableServiceAccountAuthJson( + _flags['project_id'], + _flags['project_number'], + _flags['service_account_json_file']) else: _hub_client.EnableGceAuth() _hub_client.InitializeDebuggeeLabels(_flags) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index a977442..b3eda3f 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -33,6 +33,7 @@ from backoff import Backoff import httplib2 import oauth2client +from oauth2client import service_account from oauth2client.contrib.gce import AppAssertionCredentials import labels @@ -165,19 +166,46 @@ def InitializeDebuggeeLabels(self, flags): self._debuggee_labels['projectid'] = self._project_id() - def EnableServiceAccountAuth(self, project_id, project_number, - email, p12_file): - """Selects to use the service account authentication. + def EnableServiceAccountAuthP12(self, project_id, project_number, + email, p12_file): + """Selects service account authentication with a p12 file. + Using this function is not recommended. Use EnableServiceAccountAuthJson + for authentication, instead. The p12 file format is no longer recommended. Args: project_id: GCP project ID (e.g. myproject). project_number: numberic GCP project ID (e.g. 72386324623). - email: service account identifier (...@developer.gserviceaccount.com). - p12_file: path to the file with the private key. + email: service account identifier for use with p12_file + (...@developer.gserviceaccount.com). + p12_file: (deprecated) path to an old-style p12 file with the + private key. + Raises: + NotImplementedError indicates that the installed version of oauth2client + does not support using a p12 file. """ - with open(p12_file, 'rb') as f: - self._credentials = oauth2client.client.SignedJwtAssertionCredentials( - email, f.read(), scope=_CLOUD_PLATFORM_SCOPE) + try: + with open(p12_file, 'rb') as f: + self._credentials = oauth2client.client.SignedJwtAssertionCredentials( + email, f.read(), scope=_CLOUD_PLATFORM_SCOPE) + except AttributeError: + raise NotImplementedError( + 'P12 key files are no longer supported. Please use a JSON ' + 'credentials file instead.') + self._project_id = lambda: project_id + self._project_number = lambda: project_number + + def EnableServiceAccountAuthJson(self, project_id, project_number, + auth_json_file): + """Selects service account authentication using Json credentials. + + Args: + project_id: GCP project ID (e.g. myproject). + project_number: numberic GCP project ID (e.g. 72386324623). + auth_json_file: the JSON keyfile + """ + self._credentials = ( + service_account.ServiceAccountCredentials + .from_json_keyfile_name(auth_json_file, scopes=_CLOUD_PLATFORM_SCOPE)) self._project_id = lambda: project_id self._project_number = lambda: project_number From e843c13300cfcbfc88b72512de4fe6ef0deee734 Mon Sep 17 00:00:00 2001 From: danielsb Date: Thu, 22 Sep 2016 11:59:45 -0700 Subject: [PATCH 021/241] Use items() instead of iteritems() on dict, because iteritems() can sometimes hit runtime errors due to GC resizing the dict. For some applications, it happens very predictably. See b/29322454 for more information. This change also adds some handling for RuntimeError exceptions, in case there are other unknown causes of this sort of problem. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=133981932 --- src/googleclouddebugger/capture_collector.py | 150 ++++++++++++------- 1 file changed, 93 insertions(+), 57 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 131a7ec..72bfa59 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -207,39 +207,62 @@ def Collect(self, top_frame): # Evaluate call stack. frame = top_frame breakpoint_frames = self.breakpoint['stackFrames'] - while frame and (len(breakpoint_frames) < self.max_frames): - code = frame.f_code - if len(breakpoint_frames) < self.max_expand_frames: - frame_arguments, frame_locals = self.CaptureFrameLocals(frame) - else: - frame_arguments = [] - frame_locals = [] - - breakpoint_frames.append({ - 'function': code.co_name, - 'location': { - 'path': NormalizePath(code.co_filename), - 'line': frame.f_lineno}, - 'arguments': frame_arguments, - 'locals': frame_locals}) - frame = frame.f_back - - # Evaluate watched expressions. - if 'expressions' in self.breakpoint: - self.breakpoint['evaluatedExpressions'] = [ - self._CaptureExpression(top_frame, expression) for expression - in self.breakpoint['expressions']] - - # Explore variables table in BFS fashion. The variables table will grow - # inside CaptureVariable as we encounter new references. - i = 1 - while (i < len(self._var_table)) and (self._total_size < self.max_size): - self._var_table[i] = self.CaptureVariable(self._var_table[i], 0, False) - i += 1 + # Number of entries in _var_table. Starts at 1 (index 0 is the 'buffer full' + # status value). + num_vars = 1 + try: + while frame and (len(breakpoint_frames) < self.max_frames): + code = frame.f_code + if len(breakpoint_frames) < self.max_expand_frames: + frame_arguments, frame_locals = self.CaptureFrameLocals(frame) + else: + frame_arguments = [] + frame_locals = [] + + breakpoint_frames.append({ + 'function': code.co_name, + 'location': { + 'path': NormalizePath(code.co_filename), + 'line': frame.f_lineno}, + 'arguments': frame_arguments, + 'locals': frame_locals}) + frame = frame.f_back + + # Evaluate watched expressions. + if 'expressions' in self.breakpoint: + self.breakpoint['evaluatedExpressions'] = [ + self._CaptureExpression(top_frame, expression) for expression + in self.breakpoint['expressions']] + + # Explore variables table in BFS fashion. The variables table will grow + # inside CaptureVariable as we encounter new references. + while (num_vars < len(self._var_table)) and ( + self._total_size < self.max_size): + try: + self._var_table[num_vars] = self.CaptureVariable( + self._var_table[num_vars], 0, False) + num_vars += 1 + except RuntimeError as e: + # Capture details on the failure and let the outer handler convert it + # to a status. + raise RuntimeError( + 'Failed while capturing an object of type {0}: {1}'.format( + type(self._var_table[num_vars]), e)) + + except BaseException as e: # pylint: disable=broad-except + # The variable table will get serialized even though there was a + # failure. The results can be useful for diagnosing the internal + # error so just trim the excess values. + self.breakpoint['status'] = { + 'isError': True, + 'description': { + 'format': ( + 'INTERNAL ERROR: Debugger failed to capture frame $0: $1'), + 'parameters': [str(len(breakpoint_frames)), str(e)]}} # Trim variables table and change make all references to variables that # didn't make it point to var_index of 0 ("buffer full") - self.TrimVariableTable(i) + self.TrimVariableTable(num_vars) def CaptureFrameLocals(self, frame): """Captures local variables and arguments of the specified frame. @@ -276,14 +299,18 @@ def CaptureNamedVariable(self, name, value, depth=1): Returns: Formatted captured data as per Variable proto with name. """ - if not hasattr(name, '__dict__'): - name = str(name) - else: # TODO(vlif): call str(name) with immutability verifier here. - name = str(id(name)) - self._total_size += len(name) - - v = self.CaptureVariable(value, depth) - v['name'] = name + try: + if not hasattr(name, '__dict__'): + name = str(name) + else: # TODO(vlif): call str(name) with immutability verifier here. + name = str(id(name)) + self._total_size += len(name) + + v = self.CaptureVariable(value, depth) + v['name'] = name + except RuntimeError as e: + raise RuntimeError( + 'INTERNAL ERROR while capturing {0}: {1}'.format(name, e)) return v def CaptureVariablesList(self, items, depth, empty_message): @@ -298,23 +325,29 @@ def CaptureVariablesList(self, items, depth, empty_message): List of formatted variable objects. """ v = [] - for name, value in items: - if (self._total_size >= self.max_size) or (len(v) >= self.max_list_items): - v.append({ - 'status': { - 'refers_to': 'VARIABLE_VALUE', - 'description': { - 'format': 'Only first $0 items were captured', - 'parameters': [str(len(v))]}}}) - break - v.append(self.CaptureNamedVariable(name, value, depth)) - - if not v: - return [{'status': { - 'is_error': False, - 'refers_to': 'VARIABLE_NAME', - 'description': {'format': empty_message}}}] - + try: + for name, value in items: + if (self._total_size >= self.max_size) or ( + len(v) >= self.max_list_items): + v.append({ + 'status': { + 'refers_to': 'VARIABLE_VALUE', + 'description': { + 'format': 'Only first $0 items were captured', + 'parameters': [str(len(v))]}}}) + break + v.append(self.CaptureNamedVariable(name, value, depth)) + + if not v: + return [{'status': { + 'is_error': False, + 'refers_to': 'VARIABLE_NAME', + 'description': {'format': empty_message}}}] + except RuntimeError as e: + raise RuntimeError( + 'Failed while capturing variables: {0}\n' + 'The following elements were successfully captured: {1}'.format( + e, ', '.join([c['name'] for c in v if 'name' in c]))) return v def CaptureVariable(self, value, depth=1, can_enqueue=True): @@ -350,7 +383,10 @@ def CaptureVariable(self, value, depth=1, can_enqueue=True): return {'value': r, 'type': 'datetime.'+ type(value).__name__} if isinstance(value, dict): - return {'members': self.CaptureVariablesList(value.iteritems(), + # Do not use iteritems() here. If GC happens during iteration (which it + # often can for dictionaries containing large variables), you will get a + # RunTimeError exception. + return {'members': self.CaptureVariablesList(value.items(), depth + 1, EMPTY_DICTIONARY), 'type': 'dict'} @@ -648,7 +684,7 @@ def _EvaluateExpression(frame, expression): try: return (True, native.CallImmutable(frame, code)) - except BaseException as e: + except BaseException as e: # pylint: disable=broad-except return (False, { 'isError': True, 'refersTo': 'VARIABLE_VALUE', From bf4f0e426cf9844e4620b1e544f11a83f774a53b Mon Sep 17 00:00:00 2001 From: danielsb Date: Thu, 22 Sep 2016 14:44:59 -0700 Subject: [PATCH 022/241] Bump debugger version numver to 1.12 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=134002327 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 91d8bfe..06c8349 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.11' +__version__ = '1.12' From d604aff2237e7cf2d4839a8c12e2406e3ec929b1 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Mon, 26 Sep 2016 14:46:43 -0700 Subject: [PATCH 023/241] Capture request log id in Python Agent (3rd attempt with only test changes). Please note that this should now work due to CL/133743860. Old attempts: 2nd attempt: CL/130292452, rolled back at CL/130785034 1st attempt: CL/125829312, rolled back at CL/128221983 Testing: 1. Passed Guitar tests for GCP and Borg. 2. Passed E2E tests for GAE PDC. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=134329565 --- src/googleclouddebugger/capture_collector.py | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 72bfa59..5d36d55 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -25,6 +25,7 @@ import sys import types +import labels import cdbg_native as native # Externally defined functions to actually log a message. If these variables @@ -33,6 +34,9 @@ log_warning_message = None log_error_message = None +# Externally defined function to collect the request log id. +request_log_id_collector = None + _PRIMITIVE_TYPES = (int, long, float, complex, str, unicode, bool, types.NoneType) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) @@ -264,6 +268,8 @@ def Collect(self, top_frame): # didn't make it point to var_index of 0 ("buffer full") self.TrimVariableTable(num_vars) + self._CaptureRequestLogId() + def CaptureFrameLocals(self, frame): """Captures local variables and arguments of the specified frame. @@ -494,6 +500,22 @@ def ProcessBufferFull(variables): ProcessBufferFull(self._var_table) ProcessBufferFull(self.breakpoint['evaluatedExpressions']) + def _CaptureRequestLogId(self): + """Captures the request log id if possible. + + The request log id is stored inside the breakpoint labels. + """ + # pylint: disable=not-callable + if callable(request_log_id_collector): + request_log_id = request_log_id_collector() + if request_log_id: + # We have a request_log_id, save it into the breakpoint labels + if 'labels' not in self.breakpoint: + self.breakpoint['labels'] = {} + + self.breakpoint['labels'][ + labels.Breakpoint.REQUEST_LOG_ID] = request_log_id + class LogCollector(object): """Captures minimal application snapshot and logs it to application log. From 8295a648cb68c626a9fdcd6558fd90c40cadea69 Mon Sep 17 00:00:00 2001 From: danielsb Date: Fri, 30 Sep 2016 18:35:34 -0700 Subject: [PATCH 024/241] Allow more recursion for list formatting in logpoints. Also make the formatting more consistent with Python. In the course of fixing this, I also found and fixed an issue with the formatting of slice objects (they're primitives, and not enumerable). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=134851559 --- src/googleclouddebugger/capture_collector.py | 36 +++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 5d36d55..4973f19 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -38,9 +38,9 @@ request_log_id_collector = None _PRIMITIVE_TYPES = (int, long, float, complex, str, unicode, bool, - types.NoneType) + types.NoneType, types.SliceType) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) -_VECTOR_TYPES = (types.TupleType, types.ListType, types.SliceType, set) +_VECTOR_TYPES = (types.TupleType, types.ListType, set) # TODO(vlif): move to messages.py module. EMPTY_DICTIONARY = 'Empty dictionary' @@ -50,6 +50,16 @@ INVALID_EXPRESSION_INDEX = '' +def _ListTypeFormatString(value): + """Returns the appropriate format string for formatting a list object.""" + + if isinstance(value, types.TupleType): + return '({0})' + if isinstance(value, set): + return '{{{0}}}' + return '[{0}]' + + def NormalizePath(path): """Removes any Python system path prefix from the given path. @@ -539,9 +549,15 @@ def __init__(self, definition): # are truncated. self.max_value_len = 256 - # Maximum number of items in a list to capture. + # Maximum recursion depth. + self.max_depth = 2 + + # Maximum number of items in a list to capture at the top level. self.max_list_items = 10 + # When capturing recursively, limit on the size of sublists. + self.max_sublist_items = 5 + # Select log function. level = self._definition.get('logLevel') if not level or level == 'INFO': @@ -637,20 +653,21 @@ def FormatDictItem(key_value): ': ' + self._FormatValue(value, level + 1)) - def LimitedEnumerate(items, formatter): + def LimitedEnumerate(items, formatter, level=0): """Returns items in the specified enumerable enforcing threshold.""" count = 0 + limit = self.max_sublist_items if level > 0 else self.max_list_items for item in items: - if count == self.max_list_items: + if count == limit: yield '...' break yield formatter(item) count += 1 - def FormatList(items, formatter): + def FormatList(items, formatter, level=0): """Formats a list using a custom item formatter enforcing threshold.""" - return ', '.join(LimitedEnumerate(items, formatter)) + return ', '.join(LimitedEnumerate(items, formatter, level=level)) if isinstance(value, _PRIMITIVE_TYPES): return _TrimString(repr(value), # Primitive type, always immutable. @@ -659,14 +676,15 @@ def FormatList(items, formatter): if isinstance(value, _DATE_TYPES): return str(value) - if level > 0: + if level > self.max_depth: return str(type(value)) if isinstance(value, dict): return '{' + FormatList(value.iteritems(), FormatDictItem) + '}' if isinstance(value, _VECTOR_TYPES): - return FormatList(value, lambda item: self._FormatValue(item, level + 1)) + return _ListTypeFormatString(value).format(FormatList( + value, lambda item: self._FormatValue(item, level + 1), level=level)) if isinstance(value, types.FunctionType): return 'function ' + value.func_name From cddfde9745c1c0520c59d5641fb952a7a9863711 Mon Sep 17 00:00:00 2001 From: danielsb Date: Fri, 14 Oct 2016 09:23:19 -0700 Subject: [PATCH 025/241] Python debugger: Indicate expiration using the refers_to field, to allow for better display. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=136163985 --- src/googleclouddebugger/python_breakpoint.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 1791bba..5ab46ea 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -41,8 +41,10 @@ 'the snapshot to a less frequently called statement.') MUTABLE_CONDITION = ( 'Only immutable expressions can be used in snapshot conditions') -BREAKPOINT_EXPIRED = ( +SNAPSHOT_EXPIRED = ( 'The snapshot has expired') +LOGPOINT_EXPIRED = ( + 'The logpoint has expired') INTERNAL_ERROR = ( 'Internal error occurred') @@ -138,11 +140,15 @@ def ExpireBreakpoint(self): if not self._SetCompleted(): return + if self.definition.get('action') == 'LOG': + message = LOGPOINT_EXPIRED + else: + message = SNAPSHOT_EXPIRED self._CompleteBreakpoint({ 'status': { 'isError': True, - 'refersTo': 'UNSPECIFIED', - 'description': {'format': BREAKPOINT_EXPIRED}}}) + 'refersTo': 'BREAKPOINT_AGE', + 'description': {'format': message}}}) def _TryActivateBreakpoint(self): """Sets the breakpoint if the module has already been loaded. From 866fa5dc8fca95fc53178b954dc1997029638335 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Mon, 7 Mar 2016 21:50:49 -0800 Subject: [PATCH 026/241] Add a finite timeout value to be used in GCE and MVM python agents. This is similar to CL/134140973. I observed that when a MVM instance is created, the very first API calls have trouble making HTTP calls, and these calls get stuck indefinitely due to infinite timeout. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=136677939 --- src/googleclouddebugger/gcp_hub_client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index b3eda3f..50f1856 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -69,6 +69,10 @@ labels.Debuggee.PROJECT_ID, labels.Debuggee.MODULE, labels.Debuggee.VERSION ] +# HTTP timeout when accessing the cloud debugger API. It is selected to be +# longer than the typical controller.breakpoints.list hanging get latency +# of 40 seconds. +_HTTP_TIMEOUT_SECONDS = 100 class GcpHubClient(object): """Controller API client. @@ -264,7 +268,7 @@ def EnqueueBreakpointUpdate(self, breakpoint): self._new_updates.set() # Wake up the worker thread to send immediately. def _BuildService(self): - http = httplib2.Http() + http = httplib2.Http(timeout=_HTTP_TIMEOUT_SECONDS) http = self._credentials.authorize(http) api = apiclient.discovery.build('clouddebugger', 'v2', http=http) From 1ef53efd77672674830038e390035e49754f6050 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 20 Oct 2016 11:04:17 -0700 Subject: [PATCH 027/241] Use successOnTimeout in GCE and MVM python agents. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=136734682 --- src/googleclouddebugger/gcp_hub_client.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 50f1856..65dc873 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -353,7 +353,8 @@ def _ListActiveBreakpoints(self, service): """ try: response = service.debuggees().breakpoints().list( - debuggeeId=self._debuggee_id, waitToken=self._wait_token).execute() + debuggeeId=self._debuggee_id, waitToken=self._wait_token, + successOnTimeout=True).execute() breakpoints = response.get('breakpoints') or [] self._wait_token = response.get('nextWaitToken') if cmp(self._breakpoints, breakpoints) != 0: @@ -363,15 +364,14 @@ def _ListActiveBreakpoints(self, service): len(self._breakpoints), self._wait_token)) self.on_active_breakpoints_changed(copy.deepcopy(self._breakpoints)) except Exception as e: - if not isinstance(e, apiclient.errors.HttpError) or e.resp.status != 409: - native.LogInfo('Failed to query active breakpoints: ' + - traceback.format_exc()) + native.LogInfo('Failed to query active breakpoints: ' + + traceback.format_exc()) - # Forget debuggee ID to trigger repeated debuggee registration. Once the - # registration succeeds, the worker thread will retry this query - self._debuggee_id = None + # Forget debuggee ID to trigger repeated debuggee registration. Once the + # registration succeeds, the worker thread will retry this query + self._debuggee_id = None - return (True, self.list_backoff.Failed()) + return (True, self.list_backoff.Failed()) self.list_backoff.Succeeded() return (False, 0) From 596c48a3ea36dd53a61ce5ad43deb6c05298c907 Mon Sep 17 00:00:00 2001 From: danielsb Date: Tue, 8 Nov 2016 10:46:58 -0800 Subject: [PATCH 028/241] Remove the import hook after successfully setting a breakpoint, instead of waiting until it is completed. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=138536493 --- src/googleclouddebugger/python_breakpoint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 5ab46ea..6e8476f 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -208,6 +208,7 @@ def _TryActivateBreakpoint(self): condition, self._BreakpointEvent) + self._RemoveImportHook() return True def _FindCodeObject(self): From fcb5ce82ca4ecc7b82cd0ab495c4b195a4cd63e1 Mon Sep 17 00:00:00 2001 From: danielsb Date: Tue, 8 Nov 2016 11:17:22 -0800 Subject: [PATCH 029/241] Increment python debugger version number. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=138540763 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 06c8349..05e89ca 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.12' +__version__ = '1.13' From 84d4febc099a4dd1d10cdc86f9828c7468ef447c Mon Sep 17 00:00:00 2001 From: danielsb Date: Wed, 30 Nov 2016 11:18:39 -0800 Subject: [PATCH 030/241] Prioritize collecting expression values above anything else, and remove the per-object limits on size (keep only the limit on total size). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=140627529 --- src/googleclouddebugger/capture_collector.py | 93 ++++++++++++-------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 4973f19..bffdda2 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -136,6 +136,22 @@ def SetLogger(logger): logger.addFilter(LineNoFilter()) +class _CaptureLimits(object): + """Limits for variable capture. + + Args: + max_value_len: Maximum number of character to allow for a single string + value. Longer strings are truncated. + max_list_items: Maximum number of items in a list to capture. + max_depth: Maximum depth of dictionaries to capture. + """ + + def __init__(self, max_value_len=256, max_list_items=25, max_depth=5): + self.max_value_len = max_value_len + self.max_list_items = max_list_items + self.max_depth = max_depth + + class CaptureCollector(object): """Captures application state snapshot. @@ -198,15 +214,16 @@ def __init__(self, definition): # names is not counted. self.max_size = 32768 # 32 KB - # Maximum number of character to allow for a single value. Longer strings - # are truncated. - self.max_value_len = 256 + self.default_capture_limits = _CaptureLimits() - # Maximum number of items in a list to capture. - self.max_list_items = 25 - - # Maximum depth of dictionaries to capture. - self.max_depth = 5 + # When the user provides an expression, they've indicated that they're + # interested in some specific data. Use higher per-object capture limits + # for expressions. We don't want to globally increase capture limits, + # because in the case where the user has not indicated a preference, we + # don't want a single large object on the stack to use the entire max_size + # quota and hide the rest of the data. + self.expression_capture_limits = _CaptureLimits(max_value_len=32768, + max_list_items=32768) def Collect(self, top_frame): """Collects call stack, local variables and objects. @@ -225,6 +242,12 @@ def Collect(self, top_frame): # status value). num_vars = 1 try: + # Evaluate watched expressions. + if 'expressions' in self.breakpoint: + self.breakpoint['evaluatedExpressions'] = [ + self._CaptureExpression(top_frame, expression) for expression + in self.breakpoint['expressions']] + while frame and (len(breakpoint_frames) < self.max_frames): code = frame.f_code if len(breakpoint_frames) < self.max_expand_frames: @@ -242,19 +265,14 @@ def Collect(self, top_frame): 'locals': frame_locals}) frame = frame.f_back - # Evaluate watched expressions. - if 'expressions' in self.breakpoint: - self.breakpoint['evaluatedExpressions'] = [ - self._CaptureExpression(top_frame, expression) for expression - in self.breakpoint['expressions']] - # Explore variables table in BFS fashion. The variables table will grow # inside CaptureVariable as we encounter new references. while (num_vars < len(self._var_table)) and ( self._total_size < self.max_size): try: self._var_table[num_vars] = self.CaptureVariable( - self._var_table[num_vars], 0, False) + self._var_table[num_vars], 0, self.default_capture_limits, + can_enqueue=False) num_vars += 1 except RuntimeError as e: # Capture details on the failure and let the outer handler convert it @@ -290,7 +308,8 @@ def CaptureFrameLocals(self, frame): (arguments, locals) tuple. """ # Capture all local variables (including method arguments). - variables = {n: self.CaptureNamedVariable(n, v) + variables = {n: self.CaptureNamedVariable(n, v, 1, + self.default_capture_limits) for n, v in frame.f_locals.viewitems()} # Split between locals and arguments (keeping arguments in the right order). @@ -304,13 +323,14 @@ def CaptureFrameLocals(self, frame): return (frame_arguments, list(variables.viewvalues())) - def CaptureNamedVariable(self, name, value, depth=1): + def CaptureNamedVariable(self, name, value, depth, limits): """Appends name to the product of CaptureVariable. Args: name: name of the variable. value: data to capture depth: nested depth of dictionaries and vectors so far. + limits: Per-object limits for capturing variable data. Returns: Formatted captured data as per Variable proto with name. @@ -322,20 +342,21 @@ def CaptureNamedVariable(self, name, value, depth=1): name = str(id(name)) self._total_size += len(name) - v = self.CaptureVariable(value, depth) + v = self.CaptureVariable(value, depth, limits) v['name'] = name except RuntimeError as e: raise RuntimeError( 'INTERNAL ERROR while capturing {0}: {1}'.format(name, e)) return v - def CaptureVariablesList(self, items, depth, empty_message): + def CaptureVariablesList(self, items, depth, empty_message, limits): """Captures list of named items. Args: items: iterable of (name, value) tuples. depth: nested depth of dictionaries and vectors for items. empty_message: info status message to set if items is empty. + limits: Per-object limits for capturing variable data. Returns: List of formatted variable objects. @@ -344,7 +365,7 @@ def CaptureVariablesList(self, items, depth, empty_message): try: for name, value in items: if (self._total_size >= self.max_size) or ( - len(v) >= self.max_list_items): + len(v) >= limits.max_list_items): v.append({ 'status': { 'refers_to': 'VARIABLE_VALUE', @@ -352,7 +373,7 @@ def CaptureVariablesList(self, items, depth, empty_message): 'format': 'Only first $0 items were captured', 'parameters': [str(len(v))]}}}) break - v.append(self.CaptureNamedVariable(name, value, depth)) + v.append(self.CaptureNamedVariable(name, value, depth, limits)) if not v: return [{'status': { @@ -366,7 +387,7 @@ def CaptureVariablesList(self, items, depth, empty_message): e, ', '.join([c['name'] for c in v if 'name' in c]))) return v - def CaptureVariable(self, value, depth=1, can_enqueue=True): + def CaptureVariable(self, value, depth, limits, can_enqueue=True): """Captures a single nameless object into Variable message. TODO(vlif): safely evaluate iterable types. @@ -375,12 +396,13 @@ def CaptureVariable(self, value, depth=1, can_enqueue=True): Args: value: data to capture depth: nested depth of dictionaries and vectors so far. + limits: Per-object limits for capturing variable data. can_enqueue: allows referencing the object in variables table. Returns: Formatted captured data as per Variable proto. """ - if depth == self.max_depth: + if depth == limits.max_depth: return {'varTableIndex': 0} # Buffer full. if value is None: @@ -389,7 +411,8 @@ def CaptureVariable(self, value, depth=1, can_enqueue=True): if isinstance(value, _PRIMITIVE_TYPES): r = _TrimString(repr(value), # Primitive type, always immutable. - self.max_value_len) + min(limits.max_value_len, + self.max_size - self._total_size)) self._total_size += len(r) return {'value': r, 'type': type(value).__name__} @@ -402,16 +425,15 @@ def CaptureVariable(self, value, depth=1, can_enqueue=True): # Do not use iteritems() here. If GC happens during iteration (which it # often can for dictionaries containing large variables), you will get a # RunTimeError exception. - return {'members': self.CaptureVariablesList(value.items(), - depth + 1, - EMPTY_DICTIONARY), + return {'members': + self.CaptureVariablesList(value.items(), depth + 1, + EMPTY_DICTIONARY, limits), 'type': 'dict'} if isinstance(value, _VECTOR_TYPES): fields = self.CaptureVariablesList( (('[%d]' % i, x) for i, x in enumerate(value)), - depth + 1, - EMPTY_COLLECTION) + depth + 1, EMPTY_COLLECTION, limits) return {'members': fields, 'type': type(value).__name__} if isinstance(value, types.FunctionType): @@ -434,9 +456,9 @@ def CaptureVariable(self, value, depth=1, can_enqueue=True): continue fields, object_type = pretty_value - return {'members': self.CaptureVariablesList(fields, - depth + 1, - OBJECT_HAS_NO_FIELDS), + return {'members': + self.CaptureVariablesList(fields, depth + 1, OBJECT_HAS_NO_FIELDS, + limits), 'type': object_type} if not hasattr(value, '__dict__'): @@ -446,7 +468,7 @@ def CaptureVariable(self, value, depth=1, can_enqueue=True): return {'value': r} if value.__dict__: - v = self.CaptureVariable(value.__dict__, depth + 1) + v = self.CaptureVariable(value.__dict__, depth + 1, limits) else: v = {'members': [ @@ -481,7 +503,8 @@ def _CaptureExpression(self, frame, expression): if not rc: return {'name': expression, 'status': value} - return self.CaptureNamedVariable(expression, value) + return self.CaptureNamedVariable(expression, value, 0, + self.expression_capture_limits) def TrimVariableTable(self, new_size): """Trims the variable table in the formatted breakpoint message. @@ -504,11 +527,11 @@ def ProcessBufferFull(variables): ProcessBufferFull(members) del self._var_table[new_size:] + ProcessBufferFull(self.breakpoint['evaluatedExpressions']) for stack_frame in self.breakpoint['stackFrames']: ProcessBufferFull(stack_frame['arguments']) ProcessBufferFull(stack_frame['locals']) ProcessBufferFull(self._var_table) - ProcessBufferFull(self.breakpoint['evaluatedExpressions']) def _CaptureRequestLogId(self): """Captures the request log id if possible. From c0242179c4a0549134f0f5284e5ef3cff8709c02 Mon Sep 17 00:00:00 2001 From: aasun Date: Thu, 5 Jan 2017 18:31:42 -0800 Subject: [PATCH 031/241] [Python] Include class name in function name in the call stack ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=143733607 --- src/googleclouddebugger/capture_collector.py | 32 +++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index bffdda2..69eba7d 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -16,6 +16,7 @@ # TODO(vlif): rename this file to collector.py. +from collections import namedtuple import copy import datetime import inspect @@ -250,19 +251,25 @@ def Collect(self, top_frame): while frame and (len(breakpoint_frames) < self.max_frames): code = frame.f_code + function = code.co_name + frame_arguments = [] + frame_locals = [] if len(breakpoint_frames) < self.max_expand_frames: - frame_arguments, frame_locals = self.CaptureFrameLocals(frame) - else: - frame_arguments = [] - frame_locals = [] + frame_capture = self.CaptureFrameLocals(frame) + frame_arguments = frame_capture.arguments + frame_locals = frame_capture.locals + if frame_capture.classname: + function = frame_capture.classname + '.' + code.co_name breakpoint_frames.append({ - 'function': code.co_name, + 'function': function, 'location': { 'path': NormalizePath(code.co_filename), - 'line': frame.f_lineno}, + 'line': frame.f_lineno + }, 'arguments': frame_arguments, - 'locals': frame_locals}) + 'locals': frame_locals + }) frame = frame.f_back # Explore variables table in BFS fashion. The variables table will grow @@ -317,11 +324,20 @@ def CaptureFrameLocals(self, frame): if frame.f_code.co_flags & inspect.CO_VARARGS: nargs += 1 if frame.f_code.co_flags & inspect.CO_VARKEYWORDS: nargs += 1 + # Retrieve class name of function if we think it is a member function + # This functions under the assumption that member functions will name their + # first parameter argument 'self' but has some edge-cases. + if nargs >= 1 and 'self' == frame.f_code.co_varnames[0]: + frame_class = frame.f_locals['self'].__class__.__name__ + else: + frame_class = None + frame_arguments = [] for argname in frame.f_code.co_varnames[:nargs]: if argname in variables: frame_arguments.append(variables.pop(argname)) - return (frame_arguments, list(variables.viewvalues())) + frame = namedtuple('frame', 'classname arguments locals') + return frame(frame_class, frame_arguments, list(variables.viewvalues())) def CaptureNamedVariable(self, name, value, depth, limits): """Appends name to the product of CaptureVariable. From 7b370d0c3daf2ded37ecc8b86ccba3e260f3147e Mon Sep 17 00:00:00 2001 From: aasun Date: Fri, 6 Jan 2017 15:17:08 -0800 Subject: [PATCH 032/241] Automated g4 rollback of changelist 143733607. *** Reason for rollback *** Failing guitar tests, not getting class name in MVM/GAE *** Original change description *** [Python] Include class name in function name in the call stack *** ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=143821844 --- src/googleclouddebugger/capture_collector.py | 32 +++++--------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 69eba7d..bffdda2 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -16,7 +16,6 @@ # TODO(vlif): rename this file to collector.py. -from collections import namedtuple import copy import datetime import inspect @@ -251,25 +250,19 @@ def Collect(self, top_frame): while frame and (len(breakpoint_frames) < self.max_frames): code = frame.f_code - function = code.co_name - frame_arguments = [] - frame_locals = [] if len(breakpoint_frames) < self.max_expand_frames: - frame_capture = self.CaptureFrameLocals(frame) - frame_arguments = frame_capture.arguments - frame_locals = frame_capture.locals - if frame_capture.classname: - function = frame_capture.classname + '.' + code.co_name + frame_arguments, frame_locals = self.CaptureFrameLocals(frame) + else: + frame_arguments = [] + frame_locals = [] breakpoint_frames.append({ - 'function': function, + 'function': code.co_name, 'location': { 'path': NormalizePath(code.co_filename), - 'line': frame.f_lineno - }, + 'line': frame.f_lineno}, 'arguments': frame_arguments, - 'locals': frame_locals - }) + 'locals': frame_locals}) frame = frame.f_back # Explore variables table in BFS fashion. The variables table will grow @@ -324,20 +317,11 @@ def CaptureFrameLocals(self, frame): if frame.f_code.co_flags & inspect.CO_VARARGS: nargs += 1 if frame.f_code.co_flags & inspect.CO_VARKEYWORDS: nargs += 1 - # Retrieve class name of function if we think it is a member function - # This functions under the assumption that member functions will name their - # first parameter argument 'self' but has some edge-cases. - if nargs >= 1 and 'self' == frame.f_code.co_varnames[0]: - frame_class = frame.f_locals['self'].__class__.__name__ - else: - frame_class = None - frame_arguments = [] for argname in frame.f_code.co_varnames[:nargs]: if argname in variables: frame_arguments.append(variables.pop(argname)) - frame = namedtuple('frame', 'classname arguments locals') - return frame(frame_class, frame_arguments, list(variables.viewvalues())) + return (frame_arguments, list(variables.viewvalues())) def CaptureNamedVariable(self, name, value, depth, limits): """Appends name to the product of CaptureVariable. From 513b8aa788015067590fd4757e1fea9d16531e7f Mon Sep 17 00:00:00 2001 From: danielsb Date: Fri, 6 Jan 2017 16:15:30 -0800 Subject: [PATCH 033/241] Do a dummy call to strptime at startup to work around a concurrency issue on the first call to strptime. See http://bugs.python.org/issue7980 for discussion of the Python bug. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=143828002 --- src/googleclouddebugger/python_breakpoint.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 6e8476f..e369efc 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -66,6 +66,13 @@ 'refersTo': 'BREAKPOINT_CONDITION', 'description': {'format': MUTABLE_CONDITION}})]) +# The implementation of datetime.strptime imports an undocumented module called +# _strptime. If it happens at the wrong time, we can get an exception about +# trying to import while another thread holds the import lock. This dummy call +# to strptime ensures that the module is loaded at startup. +# See http://bugs.python.org/issue7980 for discussion of the Python bug. +datetime.strptime('2017-01-01', '%Y-%m-%d') + class PythonBreakpoint(object): """Handles a single Python breakpoint. From ea0b5a4cd2e190d391e33cfbcad715d533179b0a Mon Sep 17 00:00:00 2001 From: danielsb Date: Tue, 10 Jan 2017 17:07:00 -0800 Subject: [PATCH 034/241] Populate the funcName and filename fields of the LogRecord with the correct values. The funcName field is propagated unchanged by appengine logging. The filename field is not currently used anywhere, but its value is currently inconsistent with pathname for logpoints. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=144147325 --- src/googleclouddebugger/capture_collector.py | 33 +++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index bffdda2..da4e502 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -104,25 +104,34 @@ def filter(self, record): # particular invocation came from our logging code. if record.pathname != inspect.currentframe().f_code.co_filename: return True - pathname, lineno = GetLoggingFileAndLine() - if pathname and lineno: + pathname, lineno, func_name = GetLoggingLocation() + if pathname: record.pathname = pathname + record.filename = os.path.basename(pathname) record.lineno = lineno + record.funcName = func_name return True -def GetLoggingFileAndLine(): - """Search for and return the file and line number from the log collector.""" +def GetLoggingLocation(): + """Search for and return the file and line number from the log collector. + + Returns: + (pathname, lineno, func_name) The full path, line number, and function name + for the logpoint location. + """ frame = inspect.currentframe() this_file = frame.f_code.co_filename frame = frame.f_back while frame: if this_file == frame.f_code.co_filename: - if 'cdbg_logging_pathname' in frame.f_locals: - return (frame.f_locals['cdbg_logging_pathname'], - frame.f_locals.get('cdbg_logging_lineno', None)) + if 'cdbg_logging_location' in frame.f_locals: + ret = frame.f_locals['cdbg_logging_location'] + if len(ret) != 3: + return (None, None, None) + return ret frame = frame.f_back - return (None, None) + return (None, None, None) def SetLogger(logger): @@ -611,11 +620,11 @@ def Log(self, frame): self._definition.get('logMessageFormat', ''), self._EvaluateExpressions(frame)) - cdbg_logging_pathname = NormalizePath(frame.f_code.co_filename) - cdbg_logging_lineno = frame.f_lineno + cdbg_logging_location = ( + NormalizePath(frame.f_code.co_filename), frame.f_lineno, + frame.f_code.co_name) self._log_message('LOGPOINT: ' + message) - del cdbg_logging_pathname - del cdbg_logging_lineno + del cdbg_logging_location return None def _EvaluateExpressions(self, frame): From 2a2f1057d05008994028e671f5eae275d031db96 Mon Sep 17 00:00:00 2001 From: danielsb Date: Wed, 11 Jan 2017 11:50:42 -0800 Subject: [PATCH 035/241] Bump python debugger version number. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=144232316 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 05e89ca..1ff90e3 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.13' +__version__ = '1.14' From cc1f33c71bdf9179a8e64bbfeb10643888f986f4 Mon Sep 17 00:00:00 2001 From: aasun Date: Wed, 18 Jan 2017 15:18:03 -0800 Subject: [PATCH 036/241] [Python] w/fixes Include class name in function name in the call stack ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=144889140 --- src/googleclouddebugger/capture_collector.py | 32 +++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index da4e502..75ed1aa 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -16,6 +16,7 @@ # TODO(vlif): rename this file to collector.py. +from collections import namedtuple import copy import datetime import inspect @@ -259,19 +260,25 @@ def Collect(self, top_frame): while frame and (len(breakpoint_frames) < self.max_frames): code = frame.f_code + function = code.co_name + frame_arguments = [] + frame_locals = [] if len(breakpoint_frames) < self.max_expand_frames: - frame_arguments, frame_locals = self.CaptureFrameLocals(frame) - else: - frame_arguments = [] - frame_locals = [] + frame_capture = self.CaptureFrameLocals(frame) + frame_arguments = frame_capture.arguments + frame_locals = frame_capture.locals + if frame_capture.classname: + function = frame_capture.classname + '.' + code.co_name breakpoint_frames.append({ - 'function': code.co_name, + 'function': function, 'location': { 'path': NormalizePath(code.co_filename), - 'line': frame.f_lineno}, + 'line': frame.f_lineno + }, 'arguments': frame_arguments, - 'locals': frame_locals}) + 'locals': frame_locals + }) frame = frame.f_back # Explore variables table in BFS fashion. The variables table will grow @@ -326,11 +333,20 @@ def CaptureFrameLocals(self, frame): if frame.f_code.co_flags & inspect.CO_VARARGS: nargs += 1 if frame.f_code.co_flags & inspect.CO_VARKEYWORDS: nargs += 1 + # Retrieve class name of function if we think it is a member function + # This functions under the assumption that member functions will name their + # first parameter argument 'self' but has some edge-cases. + if nargs >= 1 and 'self' == frame.f_code.co_varnames[0]: + frame_class = frame.f_locals['self'].__class__.__name__ + else: + frame_class = None + frame_arguments = [] for argname in frame.f_code.co_varnames[:nargs]: if argname in variables: frame_arguments.append(variables.pop(argname)) - return (frame_arguments, list(variables.viewvalues())) + frame = namedtuple('frame', 'classname arguments locals') + return frame(frame_class, frame_arguments, list(variables.viewvalues())) def CaptureNamedVariable(self, name, value, depth, limits): """Appends name to the product of CaptureVariable. From d25dbd5af7167d6dcb9f73f13cca0d63ae2149f0 Mon Sep 17 00:00:00 2001 From: aasun Date: Thu, 26 Jan 2017 18:46:38 -0800 Subject: [PATCH 037/241] [Python] Add class name to logging. Adding the class name has already been added to stack frame captures, however this feature has not been added to captured logpoints. In an effort to keep our captures consistent, we will apply the same method name capture logic to both the stack frame and logpoints. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=145750191 --- src/googleclouddebugger/capture_collector.py | 51 +++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 75ed1aa..3dfbd2d 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -16,7 +16,6 @@ # TODO(vlif): rename this file to collector.py. -from collections import namedtuple import copy import datetime import inspect @@ -260,18 +259,14 @@ def Collect(self, top_frame): while frame and (len(breakpoint_frames) < self.max_frames): code = frame.f_code - function = code.co_name - frame_arguments = [] - frame_locals = [] if len(breakpoint_frames) < self.max_expand_frames: - frame_capture = self.CaptureFrameLocals(frame) - frame_arguments = frame_capture.arguments - frame_locals = frame_capture.locals - if frame_capture.classname: - function = frame_capture.classname + '.' + code.co_name + frame_arguments, frame_locals = self.CaptureFrameLocals(frame) + else: + frame_arguments = [] + frame_locals = [] breakpoint_frames.append({ - 'function': function, + 'function': _GetFrameCodeObjectName(frame), 'location': { 'path': NormalizePath(code.co_filename), 'line': frame.f_lineno @@ -333,20 +328,11 @@ def CaptureFrameLocals(self, frame): if frame.f_code.co_flags & inspect.CO_VARARGS: nargs += 1 if frame.f_code.co_flags & inspect.CO_VARKEYWORDS: nargs += 1 - # Retrieve class name of function if we think it is a member function - # This functions under the assumption that member functions will name their - # first parameter argument 'self' but has some edge-cases. - if nargs >= 1 and 'self' == frame.f_code.co_varnames[0]: - frame_class = frame.f_locals['self'].__class__.__name__ - else: - frame_class = None - frame_arguments = [] for argname in frame.f_code.co_varnames[:nargs]: if argname in variables: frame_arguments.append(variables.pop(argname)) - frame = namedtuple('frame', 'classname arguments locals') - return frame(frame_class, frame_arguments, list(variables.viewvalues())) + return (frame_arguments, list(variables.viewvalues())) def CaptureNamedVariable(self, name, value, depth, limits): """Appends name to the product of CaptureVariable. @@ -636,9 +622,9 @@ def Log(self, frame): self._definition.get('logMessageFormat', ''), self._EvaluateExpressions(frame)) - cdbg_logging_location = ( - NormalizePath(frame.f_code.co_filename), frame.f_lineno, - frame.f_code.co_name) + cdbg_logging_location = (NormalizePath(frame.f_code.co_filename), + frame.f_lineno, _GetFrameCodeObjectName(frame)) + self._log_message('LOGPOINT: ' + message) del cdbg_logging_location return None @@ -781,6 +767,25 @@ def _EvaluateExpression(frame, expression): 'parameters': [e.message]}}) +def _GetFrameCodeObjectName(frame): + """Gets the code object name for the frame. + + Args: + frame: the frame to get the name from + + Returns: + The function name if the code is a static function or the class name with + the method name if it is an member function. + """ + # This functions under the assumption that member functions will name their + # first parameter argument 'self' but has some edge-cases. + if frame.f_code.co_argcount >= 1 and 'self' == frame.f_code.co_varnames[0]: + return (frame.f_locals['self'].__class__.__name__ + + '.' + frame.f_code.co_name) + else: + return frame.f_code.co_name + + def _FormatMessage(template, parameters): """Formats the message. From 716187fe877e967c7170246e62970a7c1625fe3e Mon Sep 17 00:00:00 2001 From: danielsb Date: Tue, 31 Jan 2017 15:44:55 -0800 Subject: [PATCH 038/241] Rename module->service. App Engine Flex has renamed several environment variables, so handle the new names. The compat and standard environments still use the old values, so continue to support them. Per offline discussion, I'm also changing adding GAE_DEPLOYMENT_ID as an alternative to GAE_MINOR_VERSION. GAE_MINOR_VERSION has already been removed in fle. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=146176224 --- src/googleclouddebugger/gcp_hub_client.py | 26 ++++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 65dc873..9843e4a 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -58,9 +58,9 @@ # a map is optional environment variable that can be used to set the flag # (flags still take precedence). _DEBUGGEE_LABELS = { - labels.Debuggee.MODULE: 'GAE_MODULE_NAME', - labels.Debuggee.VERSION: 'GAE_MODULE_VERSION', - labels.Debuggee.MINOR_VERSION: 'GAE_MINOR_VERSION' + labels.Debuggee.MODULE: ['GAE_SERVICE', 'GAE_MODULE_NAME'], + labels.Debuggee.VERSION: ['GAE_VERSION', 'GAE_MODULE_VERSION'], + labels.Debuggee.MINOR_VERSION: ['GAE_DEPLOYMENT_ID', 'GAE_MINOR_VERSION'] } # Debuggee labels used to format debuggee description (ordered). The minor @@ -74,6 +74,7 @@ # of 40 seconds. _HTTP_TIMEOUT_SECONDS = 100 + class GcpHubClient(object): """Controller API client. @@ -155,13 +156,18 @@ def InitializeDebuggeeLabels(self, flags): """ self._debuggee_labels = {} - for (label, env) in _DEBUGGEE_LABELS.iteritems(): - if env and env in os.environ: - # Special case for GAE_MODULE_NAME. We omit the "default" module - # to stay consistent with AppEngine. - if env == 'GAE_MODULE_NAME' and os.environ[env] == 'default': - continue - self._debuggee_labels[label] = os.environ[env] + for (label, var_names) in _DEBUGGEE_LABELS.iteritems(): + # var_names is a list of possible environment variables that may contain + # the label value. Find the first one that is set. + for name in var_names: + value = os.environ.get(name) + if value: + # Special case for module. We omit the "default" module + # to stay consistent with AppEngine. + if label == labels.Debuggee.MODULE and value == 'default': + break + self._debuggee_labels[label] = value + break if flags: self._debuggee_labels.update( From 3814ed9df9c27bf0236344ce09e6f708f9ae1336 Mon Sep 17 00:00:00 2001 From: danielsb Date: Mon, 13 Feb 2017 10:56:15 -0800 Subject: [PATCH 039/241] Add the ability to capture bytearray values, rather than just the type. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=147365162 --- src/googleclouddebugger/capture_collector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 3dfbd2d..b1739fd 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -37,8 +37,8 @@ # Externally defined function to collect the request log id. request_log_id_collector = None -_PRIMITIVE_TYPES = (int, long, float, complex, str, unicode, bool, - types.NoneType, types.SliceType) +_PRIMITIVE_TYPES = (int, long, float, complex, types.StringTypes, bool, + types.NoneType, types.SliceType, bytearray) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) _VECTOR_TYPES = (types.TupleType, types.ListType, set) From be608a1274207ea98739ff66495cfe99c5bc6286 Mon Sep 17 00:00:00 2001 From: maxgold Date: Tue, 28 Feb 2017 21:10:26 -0800 Subject: [PATCH 040/241] Extend truncation message and suggest usage of Expressions. Iterables and arrays will not be trimmed when used in Expressions. Suggest user to do it if we trim data for arguments or locals of call stack frames. I modify message for Java and Python where we fixed Expression truncation. For other languages I will update bugs and ask to do the same. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=148851700 --- src/googleclouddebugger/capture_collector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index b1739fd..096eca1 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -381,7 +381,9 @@ def CaptureVariablesList(self, items, depth, empty_message, limits): 'status': { 'refers_to': 'VARIABLE_VALUE', 'description': { - 'format': 'Only first $0 items were captured', + 'format': + ('Only first $0 items were captured. Use in an ' + 'expression to see all items.'), 'parameters': [str(len(v))]}}}) break v.append(self.CaptureNamedVariable(name, value, depth, limits)) From 924e242784783084d9f913d6fb270023c574ba81 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Tue, 4 Apr 2017 12:25:19 -0700 Subject: [PATCH 041/241] Delete dead rate-limit code from the days where we used trace-callbacks for breakpoints. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=152163756 --- src/googleclouddebugger/rate_limit.cc | 3 +-- src/googleclouddebugger/rate_limit.h | 12 ------------ 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/googleclouddebugger/rate_limit.cc b/src/googleclouddebugger/rate_limit.cc index 7a15ace..84fca7b 100644 --- a/src/googleclouddebugger/rate_limit.cc +++ b/src/googleclouddebugger/rate_limit.cc @@ -27,7 +27,7 @@ DEFINE_int32( namespace devtools { namespace cdbg { -// Define capacity of "trace_quota_" leaky bucket: +// Define capacity of leaky bucket: // capacity = fill_rate * capacity_factor // // The capacity is conceptually unrelated to fill rate, but we don't want to @@ -38,7 +38,6 @@ namespace cdbg { // debugger wil not impact the service throughput. Longer values will allow the // burst, and will only disable the breakpoint if CPU consumption due to // debugger is continuous for a prolonged period of time. -static const double kMaxTraceRateCapacityFactor = 10; static const double kConditionCostCapacityFactor = 0.1; static std::unique_ptr g_global_condition_quota; diff --git a/src/googleclouddebugger/rate_limit.h b/src/googleclouddebugger/rate_limit.h index ebe2737..30038fa 100644 --- a/src/googleclouddebugger/rate_limit.h +++ b/src/googleclouddebugger/rate_limit.h @@ -30,18 +30,6 @@ void LazyInitializeRateLimit(); // Release quota objects. void CleanupRateLimit(); -// Gets the global quota on number of trace calls per second. Once the quota is -// exceeded we disable all the breakpoints. This is because the overhead is -// due to having trace callback and a specific breakpoint doesn't impact -// much. -// We don't measure total time, because: -// 1. There is an overhead of calling the trace function in CPython. We -// can't measure it. -// 2. Most of these callbacks are too fast to reliably measure. -// The quota is not a function of number of CPUs because Python is inherently -// single threaded. -LeakyBucket* GetTraceQuota(); - // Condition and dynamic logging rate limits are defined as the maximum // number of lines of Python code per second to execute. These rate are enforced // as following: From 51cd9e0052594e1dfa00f00eed70fcf2c210c1b1 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Tue, 4 Apr 2017 17:16:03 -0700 Subject: [PATCH 042/241] Increment minor version to 1.15 to prepare for a new release. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=152201064 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 1ff90e3..e425e3c 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.14' +__version__ = '1.15' From ee1b2cdef97fd7bbbd19f43a9e46de53593e0085 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 6 Apr 2017 10:59:16 -0700 Subject: [PATCH 043/241] Add global total messages and total bytes rate limiting for python logpoints ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=152401375 --- src/googleclouddebugger/capture_collector.py | 27 ++++++++++- src/googleclouddebugger/native_module.cc | 48 +++++++++++++------- src/googleclouddebugger/python_breakpoint.py | 6 ++- src/googleclouddebugger/rate_limit.cc | 34 +++++++++++++- src/googleclouddebugger/rate_limit.h | 3 +- 5 files changed, 95 insertions(+), 23 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 096eca1..41ac174 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -23,6 +23,7 @@ import os import re import sys +import time import types import labels @@ -48,6 +49,9 @@ OBJECT_HAS_NO_FIELDS = 'Object has no fields' LOG_ACTION_NOT_SUPPORTED = 'Log action on a breakpoint not supported' INVALID_EXPRESSION_INDEX = '' +DYNAMIC_LOG_OUT_OF_QUOTA = ( + 'LOGPOINT: Logpoint is paused due to high log rate until log ' + 'quota is restored') def _ListTypeFormatString(value): @@ -594,6 +598,12 @@ def __init__(self, definition): # When capturing recursively, limit on the size of sublists. self.max_sublist_items = 5 + # Time to pause after dynamic log quota has run out. + self.quota_recovery_ms = 500 + + # The time when we first entered the quota period + self._quota_recovery_start_time = None + # Select log function. level = self._definition.get('logLevel') if not level or level == 'INFO': @@ -619,15 +629,28 @@ def Log(self, frame): return {'isError': True, 'description': {'format': LOG_ACTION_NOT_SUPPORTED}} + if self._quota_recovery_start_time: + ms_elapsed = (time.time() - self._quota_recovery_start_time) * 1000 + if ms_elapsed > self.quota_recovery_ms: + # We are out of the recovery period, clear the time and continue + self._quota_recovery_start_time = None + else: + # We are in the recovery period, exit + return + # Evaluate watched expressions. - message = _FormatMessage( + message = 'LOGPOINT: ' + _FormatMessage( self._definition.get('logMessageFormat', ''), self._EvaluateExpressions(frame)) cdbg_logging_location = (NormalizePath(frame.f_code.co_filename), frame.f_lineno, _GetFrameCodeObjectName(frame)) - self._log_message('LOGPOINT: ' + message) + if native.ApplyDynamicLogsQuota(len(message)): + self._log_message(message) + else: + self._quota_recovery_start_time = time.time() + self._log_message(DYNAMIC_LOG_OUT_OF_QUOTA) del cdbg_logging_location return None diff --git a/src/googleclouddebugger/native_module.cc b/src/googleclouddebugger/native_module.cc index 63995c7..37649cb 100644 --- a/src/googleclouddebugger/native_module.cc +++ b/src/googleclouddebugger/native_module.cc @@ -66,23 +66,6 @@ static const INTEGER_CONSTANT kIntegerConstants[] = { // Class to set zero overhead breakpoints. static BytecodeBreakpoint g_bytecode_breakpoint; -// Condition and dynamic logging rate limits are defined as the maximum -// amount of time in nanoseconds to spend on particular processing per -// second. These rate are enforced as following: -// 1. If a single breakpoint contributes to half the maximum rate, that -// breakpoint will be deactivated. -// 2. If all breakpoints combined hit the maximum rate, any breakpoint to -// exceed the limit gets disabled. -// -// The first rule ensures that in vast majority of scenarios expensive -// breakpoints will get deactivated. The second rule guarantees that in edge -// case scenarios the total amount of time spent in condition evaluation will -// not exceed the alotted limit. -// -// All limits ignore the number of CPUs since Python is inherently single -// threaded. -static std::unique_ptr g_global_condition_quota_; - // Initializes C++ flags and logging. // // This function should be called exactly once during debugger bootstrap. It @@ -376,6 +359,31 @@ static PyObject* CallImmutable(PyObject* self, PyObject* py_args) { return PyEval_EvalCode(code, frame->f_globals, frame->f_locals); } +// Applies the dynamic logs quota, which is limited by both total messages and +// total bytes. This should be called before doing the actual logging call. +// +// Args: +// num_bytes: number of bytes in the message to log. +// Returns: +// True if there is quota available, False otherwise. +static PyObject* ApplyDynamicLogsQuota(PyObject* self, PyObject* py_args) { + LazyInitializeRateLimit(); + int num_bytes = -1; + if (!PyArg_ParseTuple(py_args, "i", &num_bytes) || num_bytes < 1) { + Py_RETURN_FALSE; + } + + LeakyBucket* global_dynamic_log_limiter = GetGlobalDynamicLogQuota(); + LeakyBucket* global_dynamic_log_bytes_limiter = + GetGlobalDynamicLogBytesQuota(); + + if (global_dynamic_log_limiter->RequestTokens(1) && + global_dynamic_log_bytes_limiter->RequestTokens(num_bytes)) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} static PyMethodDef g_module_functions[] = { { @@ -427,6 +435,12 @@ static PyMethodDef g_module_functions[] = { METH_VARARGS, "Invokes a Python callable object with immutability tracer." }, + { + "ApplyDynamicLogsQuota", + ApplyDynamicLogsQuota, + METH_VARARGS, + "Applies the dynamic log quota" + }, { nullptr, nullptr, 0, nullptr } // sentinel }; diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index e369efc..0d84adf 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -109,6 +109,9 @@ def __init__(self, definition, hub_client, breakpoints_manager): self._lock = Lock() self._completed = False + if self.definition.get('action') == 'LOG': + self._collector = capture_collector.LogCollector(self.definition) + if not self._TryActivateBreakpoint() and not self._completed: self._DeferBreakpoint() @@ -328,8 +331,7 @@ def _BreakpointEvent(self, event, frame): if event != native.BREAKPOINT_EVENT_HIT: error_status = _BREAKPOINT_EVENT_STATUS[event] elif self.definition.get('action') == 'LOG': - collector = capture_collector.LogCollector(self.definition) - error_status = collector.Log(frame) + error_status = self._collector.Log(frame) if not error_status: return # Log action successful, no need to clear the breakpoint. diff --git a/src/googleclouddebugger/rate_limit.cc b/src/googleclouddebugger/rate_limit.cc index 84fca7b..20d7bb1 100644 --- a/src/googleclouddebugger/rate_limit.cc +++ b/src/googleclouddebugger/rate_limit.cc @@ -24,6 +24,18 @@ DEFINE_int32( 5000, "maximum number of Python lines/sec to spend on condition evaluation"); +DEFINE_int32( + max_dynamic_log_rate, + 50, // maximum of 50 log entries per second on average + "maximum rate of dynamic log entries in this process; short bursts are " + "allowed to exceed this limit"); + +DEFINE_int32( + max_dynamic_log_bytes_rate, + 20480, // maximum of 20K bytes per second on average + "maximum rate of dynamic log bytes in this process; short bursts are " + "allowed to exceed this limit"); + namespace devtools { namespace cdbg { @@ -39,26 +51,39 @@ namespace cdbg { // burst, and will only disable the breakpoint if CPU consumption due to // debugger is continuous for a prolonged period of time. static const double kConditionCostCapacityFactor = 0.1; +static const double kDynamicLogCapacityFactor = 5; +static const double kDynamicLogBytesCapacityFactor = 2; static std::unique_ptr g_global_condition_quota; +static std::unique_ptr g_global_dynamic_log_quota; +static std::unique_ptr g_global_dynamic_log_bytes_quota; static int64 GetBaseConditionQuotaCapacity() { return FLAGS_max_condition_lines_rate * kConditionCostCapacityFactor; } - void LazyInitializeRateLimit() { if (g_global_condition_quota == nullptr) { g_global_condition_quota.reset(new LeakyBucket( GetBaseConditionQuotaCapacity(), FLAGS_max_condition_lines_rate)); + + g_global_dynamic_log_quota.reset(new LeakyBucket( + FLAGS_max_dynamic_log_rate * kDynamicLogCapacityFactor, + FLAGS_max_dynamic_log_rate)); + + g_global_dynamic_log_bytes_quota.reset(new LeakyBucket( + FLAGS_max_dynamic_log_bytes_rate * kDynamicLogBytesCapacityFactor, + FLAGS_max_dynamic_log_bytes_rate)); } } void CleanupRateLimit() { g_global_condition_quota = nullptr; + g_global_dynamic_log_quota = nullptr; + g_global_dynamic_log_bytes_quota = nullptr; } @@ -66,6 +91,13 @@ LeakyBucket* GetGlobalConditionQuota() { return g_global_condition_quota.get(); } +LeakyBucket* GetGlobalDynamicLogQuota() { + return g_global_dynamic_log_quota.get(); +} + +LeakyBucket* GetGlobalDynamicLogBytesQuota() { + return g_global_dynamic_log_bytes_quota.get(); +} std::unique_ptr CreatePerBreakpointConditionQuota() { return std::unique_ptr(new LeakyBucket( diff --git a/src/googleclouddebugger/rate_limit.h b/src/googleclouddebugger/rate_limit.h index 30038fa..c7db0c0 100644 --- a/src/googleclouddebugger/rate_limit.h +++ b/src/googleclouddebugger/rate_limit.h @@ -48,7 +48,8 @@ void CleanupRateLimit(); // single threaded. LeakyBucket* GetGlobalConditionQuota(); std::unique_ptr CreatePerBreakpointConditionQuota(); - +LeakyBucket* GetGlobalDynamicLogQuota(); +LeakyBucket* GetGlobalDynamicLogBytesQuota(); } // namespace cdbg } // namespace devtools From 42c5b9214acc57b39b05276d0ff7838c6251b9f5 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 13 Apr 2017 13:06:10 -0700 Subject: [PATCH 044/241] Increment python opensource version to prepare for release with logpoint rate limiting ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=153096018 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index e425e3c..573356b 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.15' +__version__ = '1.16' From cb25ed058e9f26dc42952f619ebf898b71a7513c Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 17 Apr 2017 14:09:08 -0700 Subject: [PATCH 045/241] Use the line number from the definition instead of the frame for the top frame ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=153386232 --- src/googleclouddebugger/capture_collector.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 41ac174..073c131 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -250,6 +250,7 @@ def Collect(self, top_frame): """ # Evaluate call stack. frame = top_frame + top_line = self.breakpoint['location']['line'] breakpoint_frames = self.breakpoint['stackFrames'] # Number of entries in _var_table. Starts at 1 (index 0 is the 'buffer full' # status value). @@ -262,6 +263,7 @@ def Collect(self, top_frame): in self.breakpoint['expressions']] while frame and (len(breakpoint_frames) < self.max_frames): + line = top_line if frame == top_frame else frame.f_lineno code = frame.f_code if len(breakpoint_frames) < self.max_expand_frames: frame_arguments, frame_locals = self.CaptureFrameLocals(frame) @@ -273,7 +275,7 @@ def Collect(self, top_frame): 'function': _GetFrameCodeObjectName(frame), 'location': { 'path': NormalizePath(code.co_filename), - 'line': frame.f_lineno + 'line': line }, 'arguments': frame_arguments, 'locals': frame_locals @@ -643,8 +645,9 @@ def Log(self, frame): self._definition.get('logMessageFormat', ''), self._EvaluateExpressions(frame)) - cdbg_logging_location = (NormalizePath(frame.f_code.co_filename), - frame.f_lineno, _GetFrameCodeObjectName(frame)) + line = self._definition['location']['line'] + cdbg_logging_location = (NormalizePath(frame.f_code.co_filename), line, + _GetFrameCodeObjectName(frame)) if native.ApplyDynamicLogsQuota(len(message)): self._log_message(message) From 65c1079102463f7b2050ce300eabc29f972695b0 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Wed, 19 Apr 2017 10:43:24 -0700 Subject: [PATCH 046/241] increment python agent version to prepare for release ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=153605200 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 573356b..ff27e2e 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.16' +__version__ = '1.17' From 7390ef069b0150effb7ed4a426cc0dbc96633d56 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Fri, 12 May 2017 10:53:36 -0700 Subject: [PATCH 047/241] Add '$$' escaping for logpoints in python agent. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=155885468 --- src/googleclouddebugger/capture_collector.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 073c131..3ed6b0f 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -815,7 +815,7 @@ def _GetFrameCodeObjectName(frame): def _FormatMessage(template, parameters): - """Formats the message. + """Formats the message. Unescapes '$$' with '$'. Args: template: message template (e.g. 'a = $0, b = $1'). @@ -830,7 +830,8 @@ def GetParameter(m): except IndexError: return INVALID_EXPRESSION_INDEX - return re.sub(r'\$\d+', GetParameter, template) + parts = template.split('$$') + return '$'.join(re.sub(r'\$\d+', GetParameter, part) for part in parts) def _TrimString(s, max_len): From 809fe5f708ebbf3e393b202e307dd5f44319f255 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 15 May 2017 09:56:25 -0700 Subject: [PATCH 048/241] Exclude lambda and generator expressions from possible code objects that breakpoints can be set on. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=156066080 --- src/googleclouddebugger/module_explorer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index ef12312..5bcacf2 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -114,7 +114,10 @@ def _FindCodeObjectsReferents(module, start_objects, visit_recorder): List of code objects. """ def CheckIgnoreCodeObject(code_object): - """Checks if the code object originated from "module". + """Checks if the code object can be ignored. + + Code objects that are not implemented in the module, or are from a lambda or + generator expression can be ignored. If the module was precompiled, the code object may point to .py file, while the module says that it originated from .pyc file. We just strip extension @@ -124,8 +127,11 @@ def CheckIgnoreCodeObject(code_object): code_object: code object that we want to check against module. Returns: - False if code_object was implemented in module or True otherwise. + True if the code object can be ignored, False otherwise. """ + if code_object.co_name in ('', ''): + return True + code_object_file = os.path.splitext(code_object.co_filename)[0] module_file = os.path.splitext(module.__file__)[0] @@ -203,4 +209,3 @@ def Record(self, obj): self._visit_recorder_objects[obj_id] = obj return True - From 0c3631ab59bd5d16df32cea09c4e944079f8c5d0 Mon Sep 17 00:00:00 2001 From: cclaeys Date: Wed, 31 May 2017 18:56:35 -0700 Subject: [PATCH 049/241] Improve python module not found error message. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=157665300 --- src/googleclouddebugger/python_breakpoint.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 0d84adf..31b79d4 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -29,7 +29,8 @@ BREAKPOINT_ONLY_SUPPORTS_PY_FILES = ( 'Only files with .py or .pyc extension are supported') MODULE_NOT_FOUND = ( - 'Python module not found') + 'Python module not found. Please ensure this file is present in the ' + 'version of the service you are trying to debug.') NO_CODE_FOUND_AT_LINE = ( 'No code found at line $0') GLOBAL_CONDITION_QUOTA_EXCEEDED = ( From f3597cc2e68cf7413516d21501b3380b73c05c83 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 5 Jun 2017 17:17:43 -0700 Subject: [PATCH 050/241] Allow unicode values for keys in dictionaries in the python agent ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=158083734 --- src/googleclouddebugger/capture_collector.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 3ed6b0f..8791f38 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -354,7 +354,10 @@ def CaptureNamedVariable(self, name, value, depth, limits): """ try: if not hasattr(name, '__dict__'): - name = str(name) + if isinstance(name, unicode): + name = name.encode('unicode_escape') + else: + name = str(name) else: # TODO(vlif): call str(name) with immutability verifier here. name = str(id(name)) self._total_size += len(name) From 6cce6c388e229a6b2cba786762882836fa5eaf6a Mon Sep 17 00:00:00 2001 From: bauerpower Date: Tue, 6 Jun 2017 18:46:37 -0700 Subject: [PATCH 051/241] Make the "Buffer full" message more helpful by telling the user they can add an expression to prioritize capturing the variable in question. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=158220466 --- src/googleclouddebugger/capture_collector.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 8791f38..164b37f 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -201,7 +201,11 @@ def __init__(self, definition): 'status': { 'isError': True, 'refersTo': 'VARIABLE_VALUE', - 'description': {'format': 'Buffer full'}}}] + 'description': { + 'format': 'Buffer full. Use an expression to see more data' + } + } + }] # Shortcut to variables table in the breakpoint message. self._var_table = self.breakpoint['variableTable'] From d39ae9261ebc4dbe55362470bc1ba8ae1a1e0dd7 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Fri, 9 Jun 2017 09:31:17 -0700 Subject: [PATCH 052/241] Give better error message when the agent can't find the line. Include the path and the nearest line above and below the line. Removed old implementation of hasSourceLine. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=158526641 --- src/googleclouddebugger/module_explorer.py | 44 ++++++++++++++++---- src/googleclouddebugger/native_module.cc | 42 ------------------- src/googleclouddebugger/python_breakpoint.py | 30 +++++++++---- 3 files changed, 60 insertions(+), 56 deletions(-) diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index 5bcacf2..ab18b74 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -19,8 +19,6 @@ import sys import types -import cdbg_native as native - # Maximum traversal depth when looking for all the code objects referenced by # a module or another code object. _MAX_REFERENTS_BFS_DEPTH = 15 @@ -44,16 +42,48 @@ def GetCodeObjectAtLine(module, line): line: 1-based line number of the statement. Returns: - Code object or None if not found. + (True, Code object) on success or (False, (prev_line, next_line)) on + failure, where prev_line and next_line are the closest lines with code above + and below the specified line, or None if they do not exist. """ if not hasattr(module, '__file__'): - return None + return (False, (None, None)) + + prev_line = 0 + next_line = sys.maxint for code_object in _GetModuleCodeObjects(module): - if native.HasSourceLine(code_object, line): - return code_object + for co_line_number in _GetLineNumbers(code_object): + if co_line_number == line: + return (True, code_object) + elif co_line_number < line: + prev_line = max(prev_line, co_line_number) + elif co_line_number > line: + next_line = min(next_line, co_line_number) + break + + prev_line = None if prev_line == 0 else prev_line + next_line = None if next_line == sys.maxint else next_line + return (False, (prev_line, next_line)) + - return None +def _GetLineNumbers(code_object): + """Generator for getting the line numbers of a code object. + + Args: + code_object: the code object. + + Yields: + The next line number in the code object. + """ + # Get the line number deltas, which are the odd number entries, from the + # lnotab. See + # https://svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt + line_incrs = (ord(c) for c in code_object.co_lnotab[1::2]) + current_line = code_object.co_firstlineno + for line_incr in line_incrs: + current_line += line_incr + yield current_line def _GetModuleCodeObjects(module): diff --git a/src/googleclouddebugger/native_module.cc b/src/googleclouddebugger/native_module.cc index 37649cb..4a11b26 100644 --- a/src/googleclouddebugger/native_module.cc +++ b/src/googleclouddebugger/native_module.cc @@ -187,41 +187,6 @@ static PyObject* LogError(PyObject* self, PyObject* py_args) { } -// Searches for a statement with the specified line number in the specified -// code object. -// -// Args: -// code_object: Python code object to analyze. -// line: 1-based line number to search. -// -// Returns: -// True if code_object includes a statement that maps to the specified -// source line or False otherwise. -static PyObject* HasSourceLine(PyObject* self, PyObject* py_args) { - PyCodeObject* code_object = nullptr; - int line = -1; - if (!PyArg_ParseTuple(py_args, "Oi", &code_object, &line)) { - return nullptr; - } - - if ((code_object == nullptr) || !PyCode_Check(code_object)) { - PyErr_SetString( - PyExc_TypeError, - "code_object must be a code object"); - return nullptr; - } - - CodeObjectLinesEnumerator enumerator(code_object); - do { - if (enumerator.line_number() == line) { - Py_RETURN_TRUE; - } - } while (enumerator.Next()); - - Py_RETURN_FALSE; -} - - // Sets a new breakpoint in Python code. The breakpoint may have an optional // condition to evaluate. When the breakpoint hits (and the condition matches) // a callable object will be invoked from that thread. @@ -410,13 +375,6 @@ static PyMethodDef g_module_functions[] = { METH_VARARGS, "ERROR level logging from Python code." }, - { - "HasSourceLine", - HasSourceLine, - METH_VARARGS, - "Checks whether Python code object includes the specified source " - "line number." - }, { "SetConditionalBreakpoint", SetConditionalBreakpoint, diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 31b79d4..fe167ea 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -31,8 +31,11 @@ MODULE_NOT_FOUND = ( 'Python module not found. Please ensure this file is present in the ' 'version of the service you are trying to debug.') -NO_CODE_FOUND_AT_LINE = ( - 'No code found at line $0') +NO_CODE_FOUND_AT_LINE = 'No code found at line $0 in $1' +NO_CODE_FOUND_AT_LINE_ALT_LINE = ( + 'No code found at line $0 in $1. Try line $2.') +NO_CODE_FOUND_AT_LINE_TWO_ALT_LINES = ( + 'No code found at line $0 in $1. Try lines $2 or $3.') GLOBAL_CONDITION_QUOTA_EXCEEDED = ( 'Snapshot cancelled. The condition evaluation cost for all active ' 'snapshots might affect the application performance.') @@ -241,18 +244,31 @@ def _FindCodeObject(self): if not module: return None - code_object = module_explorer.GetCodeObjectAtLine(module, line) - if code_object is None: + status, val = module_explorer.GetCodeObjectAtLine(module, line) + if not status: + # module.__file__ must be defined or else it wouldn't have been returned + # from FindModule + params = [str(line), module.__file__] + alt_lines = (str(l) for l in val if l is not None) + params += alt_lines + + if len(params) == 4: + fmt = NO_CODE_FOUND_AT_LINE_TWO_ALT_LINES + elif len(params) == 3: + fmt = NO_CODE_FOUND_AT_LINE_ALT_LINE + else: + fmt = NO_CODE_FOUND_AT_LINE + self._CompleteBreakpoint({ 'status': { 'isError': True, 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', 'description': { - 'format': NO_CODE_FOUND_AT_LINE, - 'parameters': [str(line)]}}}) + 'format': fmt, + 'parameters': params}}}) return None - return code_object + return val # Enables deferred breakpoints. def _DeferBreakpoint(self): From 3aed0960fc2e9b8189a1f649fcd5f63a4faded26 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 12 Jun 2017 14:34:08 -0700 Subject: [PATCH 053/241] Add more conditions in the BFS of module_explorer so there is less of a chance of hitting the _MAX_VISIT_OBJECTS limit. This fixes integration_test.py where sometimes it returns 'no code found at line' when it exhausted all the quota exploring the list of mock calls to the hub. Don't explore lists. Don't explore any object with a high number of referents if it is not the root module. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=158765504 --- src/googleclouddebugger/module_explorer.py | 41 +++++++++++++++------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index ab18b74..f715a91 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -27,11 +27,17 @@ # objects implemented in a module. _MAX_VISIT_OBJECTS = 100000 +# Maximum referents an object can have before it is skipped in the BFS +# traversal. This is to prevent things like long objects or dictionaries that +# probably do not contain code objects from using the _MAX_VISIT_OBJECTS quota. +_MAX_OBJECT_REFERENTS = 1000 + # Object types to ignore when looking for the code objects. _BFS_IGNORE_TYPES = (types.ModuleType, types.NoneType, types.BooleanType, types.IntType, types.LongType, types.FloatType, types.StringType, types.UnicodeType, - types.BuiltinFunctionType, types.BuiltinMethodType) + types.BuiltinFunctionType, types.BuiltinMethodType, + types.ListType) def GetCodeObjectAtLine(module, line): @@ -184,25 +190,34 @@ def CheckIgnoreClass(cls): code_objects = set() current = start_objects + for obj in current: + visit_recorder.Record(current) + depth = 0 while current and depth < _MAX_REFERENTS_BFS_DEPTH: - referents = gc.get_referents(*current) - current = [] - for obj in referents: - if isinstance(obj, _BFS_IGNORE_TYPES) or not visit_recorder.Record(obj): + new_current = [] + for current_obj in current: + referents = gc.get_referents(current_obj) + if (current_obj is not module.__dict__ and + len(referents) > _MAX_OBJECT_REFERENTS): continue - if isinstance(obj, types.CodeType) and CheckIgnoreCodeObject(obj): - continue + for obj in referents: + if isinstance(obj, _BFS_IGNORE_TYPES) or not visit_recorder.Record(obj): + continue - if isinstance(obj, types.ClassType) and CheckIgnoreClass(obj): - continue + if isinstance(obj, types.CodeType) and CheckIgnoreCodeObject(obj): + continue + + if isinstance(obj, types.ClassType) and CheckIgnoreClass(obj): + continue - if isinstance(obj, types.CodeType): - code_objects.add(obj) - else: - current.append(obj) + if isinstance(obj, types.CodeType): + code_objects.add(obj) + else: + new_current.append(obj) + current = new_current depth += 1 return code_objects From 56615ec199a7dbaefd81efd7c568f6eeba731488 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 12 Jun 2017 15:03:26 -0700 Subject: [PATCH 054/241] Call repr on dictionary keys. This also solves the UnicodeDecodeError in the related bug. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=158769818 --- src/googleclouddebugger/capture_collector.py | 22 +++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 164b37f..a0e02d7 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -358,10 +358,7 @@ def CaptureNamedVariable(self, name, value, depth, limits): """ try: if not hasattr(name, '__dict__'): - if isinstance(name, unicode): - name = name.encode('unicode_escape') - else: - name = str(name) + name = str(name) else: # TODO(vlif): call str(name) with immutability verifier here. name = str(id(name)) self._total_size += len(name) @@ -451,8 +448,9 @@ def CaptureVariable(self, value, depth, limits, can_enqueue=True): # Do not use iteritems() here. If GC happens during iteration (which it # often can for dictionaries containing large variables), you will get a # RunTimeError exception. + items = [(repr(k), v) for (k, v) in value.items()] return {'members': - self.CaptureVariablesList(value.items(), depth + 1, + self.CaptureVariablesList(items, depth + 1, EMPTY_DICTIONARY, limits), 'type': 'dict'} @@ -493,16 +491,10 @@ def CaptureVariable(self, value, depth, limits, can_enqueue=True): self._total_size += len(r) return {'value': r} - if value.__dict__: - v = self.CaptureVariable(value.__dict__, depth + 1, limits) - else: - v = {'members': - [ - {'status': { - 'is_error': False, - 'refers_to': 'VARIABLE_NAME', - 'description': {'format': OBJECT_HAS_NO_FIELDS}}} - ]} + # Add an additional depth for the object itself + members = self.CaptureVariablesList(value.__dict__.items(), depth + 2, + OBJECT_HAS_NO_FIELDS, limits) + v = {'members': members} object_type = type(value) if hasattr(object_type, '__name__'): From 9821a1756ff6485f17f28e9576aa2ef4c69ad733 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Tue, 13 Jun 2017 10:17:55 -0700 Subject: [PATCH 055/241] Increment python agent version ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=158861802 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index ff27e2e..4405fc4 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.17' +__version__ = '1.18' From f19caa5c8d55d50437abaf52ab29bbf935038f02 Mon Sep 17 00:00:00 2001 From: erezh Date: Fri, 21 Jul 2017 14:26:17 -0700 Subject: [PATCH 056/241] Change Python GCP agent version scheme to match the recommended format. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=162785226 --- src/googleclouddebugger/gcp_hub_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 9843e4a..743424a 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -469,7 +469,7 @@ def _GetDebuggee(self): 'project': self._project_number(), 'description': self._GetDebuggeeDescription(), 'labels': self._debuggee_labels, - 'agentVersion': 'google.com/python2.7-' + major_version + 'agentVersion': 'google.com/python27-gcp/v' + major_version } source_context = self._ReadAppJsonFile('source-context.json') From ce58bd5d5fe2effe69ebb2adb029ac05e0f79e12 Mon Sep 17 00:00:00 2001 From: erezh Date: Mon, 24 Jul 2017 10:12:08 -0700 Subject: [PATCH 057/241] Fix a bug where the agent was resetting the breakpoints list when the server wait expired. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=162952146 --- src/googleclouddebugger/gcp_hub_client.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 743424a..9c8e9b6 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -361,15 +361,16 @@ def _ListActiveBreakpoints(self, service): response = service.debuggees().breakpoints().list( debuggeeId=self._debuggee_id, waitToken=self._wait_token, successOnTimeout=True).execute() - breakpoints = response.get('breakpoints') or [] - self._wait_token = response.get('nextWaitToken') - if cmp(self._breakpoints, breakpoints) != 0: - self._breakpoints = breakpoints - native.LogInfo( - 'Breakpoints list changed, %d active, wait token: %s' % ( - len(self._breakpoints), self._wait_token)) - self.on_active_breakpoints_changed(copy.deepcopy(self._breakpoints)) - except Exception as e: + if not response.get('waitExpired'): + self._wait_token = response.get('nextWaitToken') + breakpoints = response.get('breakpoints') or [] + if cmp(self._breakpoints, breakpoints) != 0: + self._breakpoints = breakpoints + native.LogInfo( + 'Breakpoints list changed, %d active, wait token: %s' % ( + len(self._breakpoints), self._wait_token)) + self.on_active_breakpoints_changed(copy.deepcopy(self._breakpoints)) + except BaseException: native.LogInfo('Failed to query active breakpoints: ' + traceback.format_exc()) @@ -433,7 +434,7 @@ def _TransmitBreakpointUpdates(self, service): # This is very common if multiple instances are sending final update # simultaneously. native.LogInfo('%s, breakpoint: %s' % (err, breakpoint['id'])) - except Exception: + except BaseException: native.LogWarning( 'Fatal error sending breakpoint %s update: %s' % ( breakpoint['id'], traceback.format_exc())) From 5c62a9687f7c6b93c583d0c9c95f8dc0e4e6c9a5 Mon Sep 17 00:00:00 2001 From: erezh Date: Mon, 24 Jul 2017 13:44:38 -0700 Subject: [PATCH 058/241] Remove extSourceContext from the Python GCP agent. It is not being used and it's the only agent that implements it. It inflates the size of the debuggee on the wire. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=162981207 --- src/googleclouddebugger/gcp_hub_client.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 9c8e9b6..047fa88 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -477,12 +477,6 @@ def _GetDebuggee(self): if source_context: debuggee['sourceContexts'] = [source_context] - source_contexts = self._ReadAppJsonFile('source-contexts.json') - if source_contexts: - debuggee['extSourceContexts'] = source_contexts - elif source_context: - debuggee['extSourceContexts'] = [{'context': source_context}] - debuggee['uniquifier'] = self._ComputeUniquifier(debuggee) return debuggee @@ -519,8 +513,7 @@ def _ComputeUniquifier(self, debuggee): # Compute hash of application files if we don't have source context. This # way we can still distinguish between different deployments. if ('minorversion' not in debuggee.get('labels', []) and - 'sourceContexts' not in debuggee and - 'extSourceContexts' not in debuggee): + 'sourceContexts' not in debuggee): uniquifier_computer.ComputeApplicationUniquifier(uniquifier) return uniquifier.hexdigest() From cf88c253e71c902a486f30621a413859d2b59673 Mon Sep 17 00:00:00 2001 From: erezh Date: Mon, 24 Jul 2017 18:49:43 -0700 Subject: [PATCH 059/241] Fix a bug in the Python GCP agent where the uniquifier value was unstable across different agents or agent restarts. The main issue is that str(debuggee) does not create a stable string as python object attributes can appear in any order. The fix stops hashing any fields already included in the debuggee proto into the uniquifier . The server already hashes the entire debuggee to generate the debuggee-id. Keep calculating the uniquifier when the information in the debuggee is not unique enough (no minor version or source context) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=163020134 --- src/googleclouddebugger/gcp_hub_client.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 047fa88..d11d064 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -503,13 +503,6 @@ def _ComputeUniquifier(self, debuggee): """ uniquifier = hashlib.sha1() - # Project information. - uniquifier.update(self._project_id()) - uniquifier.update(self._project_number()) - - # Debuggee information. - uniquifier.update(str(debuggee)) - # Compute hash of application files if we don't have source context. This # way we can still distinguish between different deployments. if ('minorversion' not in debuggee.get('labels', []) and From e6b11dee14acf7b401183b91a124d3306d103fbd Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Wed, 26 Jul 2017 09:17:19 -0700 Subject: [PATCH 060/241] When a breakpoint path does not match an already loaded module, search all lazy-loadable paths and find the best match (rather than any match). This CL is a no-op for the end user. It just prepares infrastructure for a future CL. Example: Breakpoint path: 'a/b.py' os.path = ['/foo/baz', '/foo/baz'] Files in the system: ['/foo/baz/b.py', '/foo/bar/a/b.py'] Before: '/foo/baz/b.py' matches 'a/b.py' After: Both files match 'a/b.py', but '/foo/bar/a/b.py' is a better match. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=163214735 --- src/googleclouddebugger/deferred_modules.py | 76 +++++++++++++++----- src/googleclouddebugger/python_breakpoint.py | 11 ++- 2 files changed, 66 insertions(+), 21 deletions(-) diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py index 816f7f5..f5519db 100644 --- a/src/googleclouddebugger/deferred_modules.py +++ b/src/googleclouddebugger/deferred_modules.py @@ -21,7 +21,7 @@ import cdbg_native as native -# Maximum number of directories that IsValidSourcePath will scan. +# Maximum number of directories that FindModulePath will scan. _DIRECTORY_LOOKUP_QUOTA = 250 # Callbacks to invoke when a module is imported. @@ -31,11 +31,12 @@ _real_import = None -def IsValidSourcePath(source_path): +# TODO(emrekultursay): Move this method out of deferred_modules.py file. +def FindModulePath(source_path): """Checks availability of a Python module. - This function checks if it is possible that a module will match the specified - path. We only use the file name and we ignore the directory. + This function checks if it is possible that a module (loaded or not) + will match the specified path. There is no absolutely correct way to do this. The application may just import a module from a string, or dynamically change sys.path. This function @@ -45,16 +46,14 @@ def IsValidSourcePath(source_path): There can be some edge cases when this code is going to scan a huge number of directories. This can be very expensive. To mitigate it, we limit the number of directories that can be scanned. If this threshold is reached, - false negatives are possible. + false negatives (i.e., missing modules in the output) are possible. Args: source_path: source path as specified in the breakpoint. Returns: - True if it is possible that a module matching source_path will ever be - loaded or false otherwise. + A list containing the paths of modules that best match source_path. """ - def IsPackage(path): """Checks if the specified directory is a valid Python package.""" init_base_path = os.path.join(path, '__init__.py') @@ -81,9 +80,12 @@ def SubPackages(path): start_time = time.time() directory_lookups = [0] - file_name = _GetModuleName(source_path) - if not file_name: - return False + # For packages, module_name will be the name of the package (e.g., for + # 'a/b/c/__init__.py' it will be 'c'). Otherwise, module_name will be the + # name of the module (e.g., for 'a/b/c/foo.py' it will be 'foo'). + module_name = _GetModuleName(source_path) + if not module_name: + return [] # Recursively discover all the subpackages in all the Python paths. paths = set() @@ -104,23 +106,23 @@ def SubPackages(path): path, unused_name = os.path.split(file_path) if file_path else (None, None) paths.add(path or default_path) - try: - imp.find_module(file_name, list(paths)) - rc = True - except ImportError: - rc = False + # Normalize paths and remove duplicates. + paths = set(os.path.abspath(path) for path in paths) + + best_match = _FindBestMatch(source_path, module_name, paths) native.LogInfo( ('Look up for %s completed in %d directories, ' 'scanned %d directories (quota: %d), ' 'result: %r, total time: %f ms') % ( - file_name, + module_name, len(paths), directory_lookups[0], _DIRECTORY_LOOKUP_QUOTA, - rc, + best_match, (time.time() - start_time) * 1000)) - return rc + + return best_match def AddImportCallback(source_path, callback): @@ -236,3 +238,39 @@ def _InvokeImportCallback(module_name): for callback in callbacks.copy(): callback(module_name) + +# TODO(emrekultursay): Try reusing the Disambiguate method in module_lookup.py. +def _FindBestMatch(source_path, module_name, paths): + """Returns paths entries that have longest suffix match with source_path.""" + best = [] + best_suffix_len = 0 + for path in paths: + try: + (f, p, unused_d) = imp.find_module(module_name, [path]) + + # find_module returns f=None when it finds a package, in which case we + # should be finding common suffix against __init__.py in that package. + if not f: + p = os.path.join(p, '__init__.py') + + suffix_len = _CommonSuffix(source_path, p) + + if suffix_len > best_suffix_len: + best = [p] + best_suffix_len = suffix_len + elif suffix_len == best_suffix_len: + best.append(p) + + except ImportError: + pass # a module with the given name was not found inside path. + + return best + + +# TODO(emrekultursay): Remove duplicate copy in module_lookup.py. +def _CommonSuffix(path1, path2): + """Returns the number of common directory names at the tail of the paths.""" + return len(os.path.commonprefix([ + path1[::-1].split(os.sep), + path2[::-1].split(os.sep)])) + diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index fe167ea..0c632d0 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -116,6 +116,7 @@ def __init__(self, definition, hub_client, breakpoints_manager): if self.definition.get('action') == 'LOG': self._collector = capture_collector.LogCollector(self.definition) + # TODO(emrekultursay): Check both loaded and deferred modules. if not self._TryActivateBreakpoint() and not self._completed: self._DeferBreakpoint() @@ -295,16 +296,22 @@ def _DeferBreakpoint(self): 'description': {'format': BREAKPOINT_ONLY_SUPPORTS_PY_FILES}}}) return - if not deferred_modules.IsValidSourcePath(path): + # This is a best-effort lookup to identify any modules that may be loaded in + # the future. + deferred_paths = deferred_modules.FindModulePath(path) + if not deferred_paths: self._CompleteBreakpoint({ 'status': { 'isError': True, 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', 'description': {'format': MODULE_NOT_FOUND}}}) + return + # TODO(emrekultursay): Print error if there are multiple deferred_paths. + # TODO(emrekultursay): Use deferred_paths[0] instead of path. assert not self._import_hook_cleanup self._import_hook_cleanup = deferred_modules.AddImportCallback( - self.definition['location']['path'], + path, lambda unused_module_name: self._TryActivateBreakpoint()) def _RemoveImportHook(self): From 5a2b1a173efe9951c0854f90c0e05068fb111333 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 27 Jul 2017 10:14:03 -0700 Subject: [PATCH 061/241] Complete the breakpoint with error if multiple deferred modules match the provided breakpoint path. Currently, this code is only triggered if there are *no* already-loaded modules that match the user-provided pattern. (Fixing that is coming later) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=163355496 --- src/googleclouddebugger/deferred_modules.py | 3 +- src/googleclouddebugger/python_breakpoint.py | 48 +++++++++++++++++++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py index f5519db..8df84f2 100644 --- a/src/googleclouddebugger/deferred_modules.py +++ b/src/googleclouddebugger/deferred_modules.py @@ -121,8 +121,7 @@ def SubPackages(path): _DIRECTORY_LOOKUP_QUOTA, best_match, (time.time() - start_time) * 1000)) - - return best_match + return sorted(best_match) def AddImportCallback(source_path, callback): diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 0c632d0..024f9f1 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -31,6 +31,10 @@ MODULE_NOT_FOUND = ( 'Python module not found. Please ensure this file is present in the ' 'version of the service you are trying to debug.') +MULTIPLE_MODULES_FOUND2 = ( + 'Multiple modules matching $0 ($1, $2)') +MULTIPLE_MODULES_FOUND3_OR_MORE = ( + 'Multiple modules matching $0 ($1, $2, and $3 more)') NO_CODE_FOUND_AT_LINE = 'No code found at line $0 in $1' NO_CODE_FOUND_AT_LINE_ALT_LINE = ( 'No code found at line $0 in $1. Try line $2.') @@ -286,6 +290,38 @@ def _DeferBreakpoint(self): the module to get loaded. Once the module is loaded, the debugger will automatically try to activate the breakpoint. """ + + def StripCommonPrefixSegments(paths): + """Removes common prefix segments from a list of path strings.""" + # Find the longest common prefix in terms of characters. + common_prefix = os.path.commonprefix(paths) + # Truncate at last segment boundary. E.g. '/aa/bb1/x.py' and '/a/bb2/x.py' + # have '/aa/bb' as the common prefix, but we should strip '/aa/' instead. + # If there's no '/' found, returns -1+1=0. + common_prefix_len = common_prefix.rfind('/') + 1 + return [path[common_prefix_len:] for path in paths] + + def MultipleModulesFoundError(path, candidates): + """Generates an error message to be used when multiple matches are found. + + Args: + path: The breakpoint location path that the user provided. + candidates: List of paths that match the user provided path. Must + contain at least 2 entries (throws AssertionError otherwise). + + Returns: + A (format, parameters) tuple that should be used in the description + field of the breakpoint error status. + """ + assert len(candidates) > 1 + params = [path] + StripCommonPrefixSegments(candidates[:2]) + if len(candidates) == 2: + fmt = MULTIPLE_MODULES_FOUND2 + else: + fmt = MULTIPLE_MODULES_FOUND3_OR_MORE + params.append(str(len(candidates) - 2)) + return fmt, params + path = self.definition['location']['path'] if os.path.splitext(path)[1] != '.py': @@ -307,7 +343,17 @@ def _DeferBreakpoint(self): 'description': {'format': MODULE_NOT_FOUND}}}) return - # TODO(emrekultursay): Print error if there are multiple deferred_paths. + if len(deferred_paths) > 1: + fmt, params = MultipleModulesFoundError(path, deferred_paths) + self._CompleteBreakpoint({ + 'status': { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': { + 'format': fmt, + 'parameters': params}}}) + return + # TODO(emrekultursay): Use deferred_paths[0] instead of path. assert not self._import_hook_cleanup self._import_hook_cleanup = deferred_modules.AddImportCallback( From 9822b8f7943f5ddd4c0205416f6990101ca24b13 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 27 Jul 2017 12:34:09 -0700 Subject: [PATCH 062/241] Return multiple matches from module_lookup.FindModules(). This CL enables the caller (python_breakpoint.py) to take custom actions on the returned result (future CL). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=163377134 --- src/googleclouddebugger/module_lookup.py | 75 +++++++++++++------- src/googleclouddebugger/python_breakpoint.py | 8 ++- 2 files changed, 54 insertions(+), 29 deletions(-) diff --git a/src/googleclouddebugger/module_lookup.py b/src/googleclouddebugger/module_lookup.py index 039fc4f..df547f5 100644 --- a/src/googleclouddebugger/module_lookup.py +++ b/src/googleclouddebugger/module_lookup.py @@ -22,34 +22,45 @@ import sys -def FindModule(source_path): - """Find the loaded module by source path. +def FindModules(source_path): + """Finds the loaded modules whose paths match the given source_path best. - If there are multiple possible matches, chooses the best match. + If there are multiple possible matches, returns them all. Args: source_path: source file path as specified in the breakpoint. Returns: - Module object that best matches the source_path or None if no match found. + List of module objects that best match the source_path or [] if no + match is found. """ - file_name, ext = os.path.splitext(os.path.basename(source_path)) + # The lookup is performed in two steps. First, we search all modules whose + # name match the given source_path's file name (i.e., ignore the leading + # directory). Then, we select the results whose directory matches the given + # input best. + dirname, basename = os.path.split(source_path) + file_name_root, ext = os.path.splitext(basename) if ext != '.py': - return None # ".py" extension is expected + return [] # ".py" extension is expected - candidates = _GetModulesByFileName(file_name) + candidates = _GetModulesByFileName(file_name_root) if not candidates: - return None + return [] if len(candidates) == 1: - return candidates[0] + return candidates - return candidates[_Disambiguate( - os.path.split(source_path)[0], - [os.path.split(module.__file__)[0] for module in candidates])] + if not dirname: + return candidates # No need disambiguate. + # Find the module that has the best path prefix. + indices = _Disambiguate( + dirname, + [os.path.dirname(module.__file__) for module in candidates]) + return [candidates[i] for i in indices] -def _GetModulesByFileName(lookup_file_name): + +def _GetModulesByFileName(lookup_file_name_root): """Gets list of all the loaded modules by file name (ignores directory).""" matches = [] @@ -58,15 +69,18 @@ def _GetModulesByFileName(lookup_file_name): if not hasattr(module, '__file__'): continue # This is a built-in module. - file_name, ext = os.path.splitext(os.path.basename(module.__file__)) - if file_name == lookup_file_name and (ext == '.py' or ext == '.pyc'): + file_name_root, ext = os.path.splitext(os.path.basename(module.__file__)) + + # TODO(emrekultursay): Verify why we are discarding .pyo files here. + if (file_name_root == lookup_file_name_root and + (ext == '.py' or ext == '.pyc')): matches.append(module) return matches def _Disambiguate(lookup_path, paths): - """Disambiguate multiple candidates based on the longest suffix. + """Disambiguates multiple candidates based on the longest suffix. Example when this disambiguation is needed: Breakpoint at: 'myproject/app/db/common.py' @@ -76,26 +90,33 @@ def _Disambiguate(lookup_path, paths): lookup_path = 'myproject/app/db' paths = ['/home/root/fe', '/home/root/db'] - The second path is clearly the best match, so this function will return 1. + The second path is clearly the best match, so this function will return [1]. Args: lookup_path: the source path of the searched module (without file name - and extension). - paths: candidate paths (each without file name and extension). + and extension). Must be non-empty. + paths: candidate paths (each without file name and extension). Must have + two or more elements. Returns: - Index of the best match or arbitrary index if this function can't - discriminate. + List of indices of the best matches. """ - best_index = 0 - best_len = 0 - for i in range(len(paths)): - current_len = _CommonSuffix(lookup_path, paths[i]) + assert lookup_path + assert len(paths) > 1 + + best_indices = [] + best_len = 1 # zero-length matches should be discarded. + + for i, path in enumerate(paths): + current_len = _CommonSuffix(lookup_path, path) + if current_len > best_len: - best_index = i + best_indices = [i] best_len = current_len + elif current_len == best_len: + best_indices.append(i) - return best_index + return best_indices def _CommonSuffix(path1, path2): diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 024f9f1..ceb9298 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -245,10 +245,14 @@ def _FindCodeObject(self): path = self.definition['location']['path'] line = self.definition['location']['line'] - module = module_lookup.FindModule(path) - if not module: + modules = module_lookup.FindModules(path) + if not modules: return None + # If there are multiple matches, We pick any one of the matching modules + # arbitrarily. TODO(emrekultursay): Return error instead. + module = modules[0] + status, val = module_explorer.GetCodeObjectAtLine(module, line) if not status: # module.__file__ must be defined or else it wouldn't have been returned From f1a0fcbd22887743ee00eaeed7f65204e61e87e4 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 27 Jul 2017 17:30:05 -0700 Subject: [PATCH 063/241] Only accept .py extension. Reject other extensions earlier. Currently, we are already not accepting .pyc files: 1. We first check for loaded modules: Immediately returns 'no match found'. 2. We then try to defer: Defer code accepts only .py files. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=163413426 --- src/googleclouddebugger/python_breakpoint.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index ceb9298..582fe7d 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -27,7 +27,7 @@ # TODO(vlif): move to messages.py module. BREAKPOINT_ONLY_SUPPORTS_PY_FILES = ( - 'Only files with .py or .pyc extension are supported') + 'Only files with .py extension are supported') MODULE_NOT_FOUND = ( 'Python module not found. Please ensure this file is present in the ' 'version of the service you are trying to debug.') @@ -120,6 +120,15 @@ def __init__(self, definition, hub_client, breakpoints_manager): if self.definition.get('action') == 'LOG': self._collector = capture_collector.LogCollector(self.definition) + path = self.definition['location']['path'] + if os.path.splitext(path)[1] != '.py': + self._CompleteBreakpoint({ + 'status': { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': {'format': BREAKPOINT_ONLY_SUPPORTS_PY_FILES}}}) + return + # TODO(emrekultursay): Check both loaded and deferred modules. if not self._TryActivateBreakpoint() and not self._completed: self._DeferBreakpoint() @@ -328,14 +337,6 @@ def MultipleModulesFoundError(path, candidates): path = self.definition['location']['path'] - if os.path.splitext(path)[1] != '.py': - self._CompleteBreakpoint({ - 'status': { - 'isError': True, - 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': {'format': BREAKPOINT_ONLY_SUPPORTS_PY_FILES}}}) - return - # This is a best-effort lookup to identify any modules that may be loaded in # the future. deferred_paths = deferred_modules.FindModulePath(path) From a79a73fe5b596db2ac8c36297dacb28981ffdbee Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 27 Jul 2017 18:15:41 -0700 Subject: [PATCH 064/241] Close file opened by imp.find_module ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=163417672 --- src/googleclouddebugger/deferred_modules.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py index 8df84f2..0c781f8 100644 --- a/src/googleclouddebugger/deferred_modules.py +++ b/src/googleclouddebugger/deferred_modules.py @@ -245,12 +245,14 @@ def _FindBestMatch(source_path, module_name, paths): best_suffix_len = 0 for path in paths: try: - (f, p, unused_d) = imp.find_module(module_name, [path]) + fp, p, unused_d = imp.find_module(module_name, [path]) # find_module returns f=None when it finds a package, in which case we # should be finding common suffix against __init__.py in that package. - if not f: + if not fp: p = os.path.join(p, '__init__.py') + else: + fp.close() suffix_len = _CommonSuffix(source_path, p) From 1ab264a010902fa49e072f61e902da8e91cb6333 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Tue, 1 Aug 2017 09:13:00 -0700 Subject: [PATCH 065/241] Trigger callbacks for modules loaded as a side effect of importing inner modules/packages. Example: (See comments inside CL for more examples). 1. User sets breakpoint in 'a/__init__.py'. 2. Package 'a' is not loaded yet. 3. Breakpoint is deferred. Callback registered for 'a'. 4. Some foo.py executes 'import a.b'. 5. Python loads both module 'a' and package 'b'. 6. Callbacks triggered by the 'import a.b' are called. Before this CL: We were only calling callback('b'). Therefore, the breakpoint was not hitting. After this CL: We will be calling callback('a') and callback('b'). Therefore, the breakpoint will hit. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=163836739 --- src/googleclouddebugger/deferred_modules.py | 36 +++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py index 0c781f8..a29035d 100644 --- a/src/googleclouddebugger/deferred_modules.py +++ b/src/googleclouddebugger/deferred_modules.py @@ -215,11 +215,43 @@ def _ImportHook(name, globals=None, locals=None, fromlist=None, level=-1): module = _real_import(name, globals, locals, fromlist, level) + # There are multiple ways to import modules which affects how the import + # hook is called. When importing a module, Python implicitly loads all + # the unloaded parents modules (e.g., import a.b loads a and b). Here are + # few examles of import statements and the __import__ hook calls: + # + # Example 1: import a + # __import__(name='a', fromlist=None) loads a + # + # Example 2: import a.b.c + # __import__(name='a.b.c', fromlist=None) loads a, b, c + # + # Example 3: import a.b.c, a.b.d + # __import__(name='a.b.c', fromlist=None) loads a, b, c + # __import__(name='a.b.d', fromlist=None) loads d (already loaded a, b) + # + # Example 4: from a import b + # __import__(name='a', fromlist=('b')) loads a, b + # + # Example 5: from a.b import c, d, e + # __import__(name='a.b', fromlist=('c', 'd', 'e')) loads a, b, c, d, e + # Note that from...import... cannot have dotted module after import. + # + # Note that we don't really know which modules were actually loaded by + # _real_import. Therefore, we try to invoke callbacks for every module + # that might get loaded, even if already loaded before calling _real_import. + # + # Also note that we only have the name of the modules, rather than their path. + # This also causes superfluous callback executions because a callback + # might have been registered for not-yet loaded module 'x.y.c', but gets + # executed when module 'a.b.c' is loaded. + # Invoke callbacks for the imported module. No need to lock, since all # operations are atomic. - pos = name.rfind('.') + 1 - _InvokeImportCallback(name[pos:]) + for part in name.split('.'): + _InvokeImportCallback(part) + # TODO(emrekultursay): Consider handling 'from p import *' case. if fromlist: for module_name in fromlist: _InvokeImportCallback(module_name) From 013af3aea7f7f8cd5201d636024f77f1222ca37f Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Wed, 9 Aug 2017 14:30:55 -0700 Subject: [PATCH 066/241] Bug fix: Use abspath on the path returned by find_module (it can be relative), just to be safe. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=164774052 --- src/googleclouddebugger/deferred_modules.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py index a29035d..6417097 100644 --- a/src/googleclouddebugger/deferred_modules.py +++ b/src/googleclouddebugger/deferred_modules.py @@ -279,7 +279,11 @@ def _FindBestMatch(source_path, module_name, paths): try: fp, p, unused_d = imp.find_module(module_name, [path]) - # find_module returns f=None when it finds a package, in which case we + # find_module may return relative path (relative to current directory), + # which requires normalization. + p = os.path.abspath(p) + + # find_module returns fp=None when it finds a package, in which case we # should be finding common suffix against __init__.py in that package. if not fp: p = os.path.join(p, '__init__.py') From 89a7b78df6faad52ba8e75e0f3243c0253aa012d Mon Sep 17 00:00:00 2001 From: erezh Date: Wed, 9 Aug 2017 16:10:12 -0700 Subject: [PATCH 067/241] Fix a leak in the import callbacks dict. The dict entry was left forever even after all callbacks are removed. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=164788146 --- src/googleclouddebugger/deferred_modules.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py index 6417097..c297c10 100644 --- a/src/googleclouddebugger/deferred_modules.py +++ b/src/googleclouddebugger/deferred_modules.py @@ -17,6 +17,7 @@ import imp import os import sys # Must be imported, otherwise import hooks don't work. +import threading import time import cdbg_native as native @@ -26,6 +27,7 @@ # Callbacks to invoke when a module is imported. _import_callbacks = {} +_import_callbacks_lock = threading.Lock() # Original __import__ function if import hook is installed or None otherwise. _real_import = None @@ -148,17 +150,24 @@ def AddImportCallback(source_path, callback): """ def RemoveCallback(): - # Atomic operations, no need to lock. - callbacks = _import_callbacks.get(module_name) - if callbacks: - callbacks.remove(callback) + # This is a read-if-del operation on _import_callbacks. Lock to prevent + # callbacks from being inserted just before the key is deleted. Thus, it + # must be locked also when inserting a new entry below. On the other hand + # read only access, in the import hook, does not require a lock. + with _import_callbacks_lock: + callbacks = _import_callbacks.get(module_name) + if callbacks: + callbacks.remove(callback) + if not callbacks: + del _import_callbacks[module_name] module_name = _GetModuleName(source_path) if not module_name: return None - # Atomic operations, no need to lock. - _import_callbacks.setdefault(module_name, set()).add(callback) + with _import_callbacks_lock: + _import_callbacks.setdefault(module_name, set()).add(callback) + _InstallImportHook() return RemoveCallback From baa970a90a3435cf790a0aa0f76b5e3dfd1f07e1 Mon Sep 17 00:00:00 2001 From: erezh Date: Fri, 11 Aug 2017 12:42:05 -0700 Subject: [PATCH 068/241] Simplify import hook by using the module path (no ext), and invoking the callback only for modules actually loaded. This change addresses existing issues. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165011824 --- src/googleclouddebugger/deferred_modules.py | 123 ++++++++++--------- src/googleclouddebugger/python_breakpoint.py | 5 +- 2 files changed, 65 insertions(+), 63 deletions(-) diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py index c297c10..1f73c3a 100644 --- a/src/googleclouddebugger/deferred_modules.py +++ b/src/googleclouddebugger/deferred_modules.py @@ -29,6 +29,9 @@ _import_callbacks = {} _import_callbacks_lock = threading.Lock() +# Module fully qualified names detected by the finder at first-time load. +_import_loading_modules = set() + # Original __import__ function if import hook is installed or None otherwise. _real_import = None @@ -126,7 +129,7 @@ def SubPackages(path): return sorted(best_match) -def AddImportCallback(source_path, callback): +def AddImportCallback(abspath, callback): """Register import hook. This function overrides the default import process. Then whenever a module @@ -134,16 +137,15 @@ def AddImportCallback(source_path, callback): A module may be imported multiple times. Import event only means that the Python code contained an "import" statement. The actual loading and - initialization of a new module normally happens only once. After that the - module is just fetched from the cache. This function doesn't care whether a - module was loaded or fetched from cache. The callback will be triggered - all the same. + initialization of a new module normally happens only once, at which time + the callback will be invoked. This function does not validates the existence + of such a module and it's the responsibility of the caller. + + TODO(erezh): handle module reload. Args: - source_path: source file path identifying the monitored module name. If - the file is __init__.py, this function will monitor package import. - Otherwise it will monitor module import. - callback: callable to invoke upon module import. + abspath: python module file absolute path. + callback: callable to invoke upon module load. Returns: Function object to invoke to remove the installed callback. @@ -155,19 +157,15 @@ def RemoveCallback(): # must be locked also when inserting a new entry below. On the other hand # read only access, in the import hook, does not require a lock. with _import_callbacks_lock: - callbacks = _import_callbacks.get(module_name) + callbacks = _import_callbacks.get(path) if callbacks: callbacks.remove(callback) if not callbacks: - del _import_callbacks[module_name] - - module_name = _GetModuleName(source_path) - if not module_name: - return None + del _import_callbacks[path] + path, unused_ext = os.path.splitext(abspath) with _import_callbacks_lock: - _import_callbacks.setdefault(module_name, set()).add(callback) - + _import_callbacks.setdefault(path, set()).add(callback) _InstallImportHook() return RemoveCallback @@ -198,6 +196,26 @@ def _GetModuleName(source_path): return file_name +class MetaFinder(object): + """The finder is called with the full module name before it is loaded.""" + + def find_module(self, name, path=None): # pylint: disable=unused-argument,invalid-name + # Store the module fullname to be used by the import hook. + # At the time of this call the module is not loaded yet, and is only called + # the first time the module is loaded. For example, the following statement + # 'from a.b import c' will make 3 calls to find_module, assuming that none + # were loaded yet, with the names 'a', 'a.b' and 'a.b.c' + # + # Moreover, name might not be a true module name. Example: module 'b' in + # package 'a' calls 'import c', but 'c' is not a submodule of 'a'. The + # loader searches for relative submodules first and calls with name='a.c'. + # Then, it looks for modules on the search path and calls with name='c'. + # This code adds both 'a.c' and 'c' to the set. However, the import hook + # handles this case by looking up the module name in sys.modules. + _import_loading_modules.add(name) + return None + + def _InstallImportHook(): """Lazily installs import hook.""" @@ -212,6 +230,7 @@ def _InstallImportHook(): assert _real_import builtin.__import__ = _ImportHook + sys.meta_path.append(MetaFinder()) # pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield @@ -222,61 +241,45 @@ def _ImportHook(name, globals=None, locals=None, fromlist=None, level=-1): to __import__ that use keyword syntax will fail: __import('a', fromlist=[]). """ + # Really import modules. module = _real_import(name, globals, locals, fromlist, level) - # There are multiple ways to import modules which affects how the import - # hook is called. When importing a module, Python implicitly loads all - # the unloaded parents modules (e.g., import a.b loads a and b). Here are - # few examles of import statements and the __import__ hook calls: - # - # Example 1: import a - # __import__(name='a', fromlist=None) loads a - # - # Example 2: import a.b.c - # __import__(name='a.b.c', fromlist=None) loads a, b, c - # - # Example 3: import a.b.c, a.b.d - # __import__(name='a.b.c', fromlist=None) loads a, b, c - # __import__(name='a.b.d', fromlist=None) loads d (already loaded a, b) - # - # Example 4: from a import b - # __import__(name='a', fromlist=('b')) loads a, b - # - # Example 5: from a.b import c, d, e - # __import__(name='a.b', fromlist=('c', 'd', 'e')) loads a, b, c, d, e - # Note that from...import... cannot have dotted module after import. - # - # Note that we don't really know which modules were actually loaded by - # _real_import. Therefore, we try to invoke callbacks for every module - # that might get loaded, even if already loaded before calling _real_import. - # - # Also note that we only have the name of the modules, rather than their path. - # This also causes superfluous callback executions because a callback - # might have been registered for not-yet loaded module 'x.y.c', but gets - # executed when module 'a.b.c' is loaded. - - # Invoke callbacks for the imported module. No need to lock, since all - # operations are atomic. - for part in name.split('.'): - _InvokeImportCallback(part) - - # TODO(emrekultursay): Consider handling 'from p import *' case. - if fromlist: - for module_name in fromlist: - _InvokeImportCallback(module_name) + # Optimize common code path when no breakponts are set. + if not _import_callbacks: + _import_loading_modules.clear() + return module + + # Capture and clear the loading module names. + imp.acquire_lock() + loaded = frozenset(_import_loading_modules) + _import_loading_modules.clear() + imp.release_lock() + + # Invoke callbacks for the loaded modules. + for m in loaded: + _InvokeImportCallback(sys.modules.get(m)) return module -def _InvokeImportCallback(module_name): +def _InvokeImportCallback(module): """Invokes import callbacks for the specified module.""" - callbacks = _import_callbacks.get(module_name) + + if not module: + return + + path = getattr(module, '__file__', None) + if not path: + return + + path, unused_ext = os.path.splitext(os.path.abspath(path)) + callbacks = _import_callbacks.get(path) if not callbacks: return # Common code path. # Clone the callbacks set, since it can change during enumeration. for callback in callbacks.copy(): - callback(module_name) + callback() # TODO(emrekultursay): Try reusing the Disambiguate method in module_lookup.py. diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 582fe7d..0867a87 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -359,11 +359,10 @@ def MultipleModulesFoundError(path, candidates): 'parameters': params}}}) return - # TODO(emrekultursay): Use deferred_paths[0] instead of path. assert not self._import_hook_cleanup self._import_hook_cleanup = deferred_modules.AddImportCallback( - path, - lambda unused_module_name: self._TryActivateBreakpoint()) + deferred_paths[0], + self._TryActivateBreakpoint) def _RemoveImportHook(self): """Removes the import hook if one was installed.""" From 3e78e5ba05479e97c8b6a70293566ee3bba3a6a4 Mon Sep 17 00:00:00 2001 From: erezh Date: Fri, 11 Aug 2017 20:56:39 -0700 Subject: [PATCH 069/241] Refactoring: Split the code related to the import hook out of deferred_modules and into it's own module imphook. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165055688 --- src/googleclouddebugger/deferred_modules.py | 141 +---------------- src/googleclouddebugger/imphook.py | 158 +++++++++++++++++++ src/googleclouddebugger/python_breakpoint.py | 3 +- 3 files changed, 161 insertions(+), 141 deletions(-) create mode 100644 src/googleclouddebugger/imphook.py diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py index 1f73c3a..ff47b3d 100644 --- a/src/googleclouddebugger/deferred_modules.py +++ b/src/googleclouddebugger/deferred_modules.py @@ -16,8 +16,7 @@ import imp import os -import sys # Must be imported, otherwise import hooks don't work. -import threading +import sys import time import cdbg_native as native @@ -25,16 +24,6 @@ # Maximum number of directories that FindModulePath will scan. _DIRECTORY_LOOKUP_QUOTA = 250 -# Callbacks to invoke when a module is imported. -_import_callbacks = {} -_import_callbacks_lock = threading.Lock() - -# Module fully qualified names detected by the finder at first-time load. -_import_loading_modules = set() - -# Original __import__ function if import hook is installed or None otherwise. -_real_import = None - # TODO(emrekultursay): Move this method out of deferred_modules.py file. def FindModulePath(source_path): @@ -129,48 +118,6 @@ def SubPackages(path): return sorted(best_match) -def AddImportCallback(abspath, callback): - """Register import hook. - - This function overrides the default import process. Then whenever a module - corresponding to source_path is imported, the callback will be invoked. - - A module may be imported multiple times. Import event only means that the - Python code contained an "import" statement. The actual loading and - initialization of a new module normally happens only once, at which time - the callback will be invoked. This function does not validates the existence - of such a module and it's the responsibility of the caller. - - TODO(erezh): handle module reload. - - Args: - abspath: python module file absolute path. - callback: callable to invoke upon module load. - - Returns: - Function object to invoke to remove the installed callback. - """ - - def RemoveCallback(): - # This is a read-if-del operation on _import_callbacks. Lock to prevent - # callbacks from being inserted just before the key is deleted. Thus, it - # must be locked also when inserting a new entry below. On the other hand - # read only access, in the import hook, does not require a lock. - with _import_callbacks_lock: - callbacks = _import_callbacks.get(path) - if callbacks: - callbacks.remove(callback) - if not callbacks: - del _import_callbacks[path] - - path, unused_ext = os.path.splitext(abspath) - with _import_callbacks_lock: - _import_callbacks.setdefault(path, set()).add(callback) - _InstallImportHook() - - return RemoveCallback - - def _GetModuleName(source_path): """Gets the name of the module that corresponds to source_path. @@ -196,92 +143,6 @@ def _GetModuleName(source_path): return file_name -class MetaFinder(object): - """The finder is called with the full module name before it is loaded.""" - - def find_module(self, name, path=None): # pylint: disable=unused-argument,invalid-name - # Store the module fullname to be used by the import hook. - # At the time of this call the module is not loaded yet, and is only called - # the first time the module is loaded. For example, the following statement - # 'from a.b import c' will make 3 calls to find_module, assuming that none - # were loaded yet, with the names 'a', 'a.b' and 'a.b.c' - # - # Moreover, name might not be a true module name. Example: module 'b' in - # package 'a' calls 'import c', but 'c' is not a submodule of 'a'. The - # loader searches for relative submodules first and calls with name='a.c'. - # Then, it looks for modules on the search path and calls with name='c'. - # This code adds both 'a.c' and 'c' to the set. However, the import hook - # handles this case by looking up the module name in sys.modules. - _import_loading_modules.add(name) - return None - - -def _InstallImportHook(): - """Lazily installs import hook.""" - - global _real_import - - if _real_import: - return # Import hook already installed - - builtin = sys.modules['__builtin__'] - - _real_import = getattr(builtin, '__import__') - assert _real_import - - builtin.__import__ = _ImportHook - sys.meta_path.append(MetaFinder()) - - -# pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield -def _ImportHook(name, globals=None, locals=None, fromlist=None, level=-1): - """Callback when a module is being imported by Python interpreter. - - Argument names have to exactly match those of __import__. Otherwise calls - to __import__ that use keyword syntax will fail: __import('a', fromlist=[]). - """ - - # Really import modules. - module = _real_import(name, globals, locals, fromlist, level) - - # Optimize common code path when no breakponts are set. - if not _import_callbacks: - _import_loading_modules.clear() - return module - - # Capture and clear the loading module names. - imp.acquire_lock() - loaded = frozenset(_import_loading_modules) - _import_loading_modules.clear() - imp.release_lock() - - # Invoke callbacks for the loaded modules. - for m in loaded: - _InvokeImportCallback(sys.modules.get(m)) - - return module - - -def _InvokeImportCallback(module): - """Invokes import callbacks for the specified module.""" - - if not module: - return - - path = getattr(module, '__file__', None) - if not path: - return - - path, unused_ext = os.path.splitext(os.path.abspath(path)) - callbacks = _import_callbacks.get(path) - if not callbacks: - return # Common code path. - - # Clone the callbacks set, since it can change during enumeration. - for callback in callbacks.copy(): - callback() - - # TODO(emrekultursay): Try reusing the Disambiguate method in module_lookup.py. def _FindBestMatch(source_path, module_name, paths): """Returns paths entries that have longest suffix match with source_path.""" diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py new file mode 100644 index 0000000..73d019b --- /dev/null +++ b/src/googleclouddebugger/imphook.py @@ -0,0 +1,158 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Support for breakpoints on modules that haven't been loaded yet.""" + +import imp +import os +import sys # Must be imported, otherwise import hooks don't work. +import threading + +# Callbacks to invoke when a module is imported. +_import_callbacks = {} +_import_callbacks_lock = threading.Lock() + +# Module fully qualified names detected by the finder at first-time load. +_import_loading_modules = set() + +# Original __import__ function if import hook is installed or None otherwise. +_real_import = None + + +def AddImportCallback(abspath, callback): + """Register import hook. + + This function overrides the default import process. Then whenever a module + corresponding to source_path is imported, the callback will be invoked. + + A module may be imported multiple times. Import event only means that the + Python code contained an "import" statement. The actual loading and + initialization of a new module normally happens only once, at which time + the callback will be invoked. This function does not validates the existence + of such a module and it's the responsibility of the caller. + + TODO(erezh): handle module reload. + + Args: + abspath: python module file absolute path. + callback: callable to invoke upon module load. + + Returns: + Function object to invoke to remove the installed callback. + """ + + def RemoveCallback(): + # This is a read-if-del operation on _import_callbacks. Lock to prevent + # callbacks from being inserted just before the key is deleted. Thus, it + # must be locked also when inserting a new entry below. On the other hand + # read only access, in the import hook, does not require a lock. + with _import_callbacks_lock: + callbacks = _import_callbacks.get(path) + if callbacks: + callbacks.remove(callback) + if not callbacks: + del _import_callbacks[path] + + path, unused_ext = os.path.splitext(abspath) + with _import_callbacks_lock: + _import_callbacks.setdefault(path, set()).add(callback) + _InstallImportHook() + + return RemoveCallback + + +class MetaFinder(object): + """The finder is called with the full module name before it is loaded.""" + + def find_module(self, name, path=None): # pylint: disable=unused-argument,invalid-name + # Store the module fullname to be used by the import hook. + # At the time of this call the module is not loaded yet, and is only called + # the first time the module is loaded. For example, the following statement + # 'from a.b import c' will make 3 calls to find_module, assuming that none + # were loaded yet, with the names 'a', 'a.b' and 'a.b.c' + # + # Moreover, name might not be a true module name. Example: module 'b' in + # package 'a' calls 'import c', but 'c' is not a submodule of 'a'. The + # loader searches for relative submodules first and calls with name='a.c'. + # Then, it looks for modules on the search path and calls with name='c'. + # This code adds both 'a.c' and 'c' to the set. However, the import hook + # handles this case by looking up the module name in sys.modules. + _import_loading_modules.add(name) + return None + + +def _InstallImportHook(): + """Lazily installs import hook.""" + + global _real_import + + if _real_import: + return # Import hook already installed + + builtin = sys.modules['__builtin__'] + + _real_import = getattr(builtin, '__import__') + assert _real_import + + builtin.__import__ = _ImportHook + sys.meta_path.append(MetaFinder()) + + +# pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield +def _ImportHook(name, globals=None, locals=None, fromlist=None, level=-1): + """Callback when a module is being imported by Python interpreter. + + Argument names have to exactly match those of __import__. Otherwise calls + to __import__ that use keyword syntax will fail: __import('a', fromlist=[]). + """ + + # Really import modules. + module = _real_import(name, globals, locals, fromlist, level) + + # Optimize common code path when no breakponts are set. + if not _import_callbacks: + _import_loading_modules.clear() + return module + + # Capture and clear the loading module names. + imp.acquire_lock() + loaded = frozenset(_import_loading_modules) + _import_loading_modules.clear() + imp.release_lock() + + # Invoke callbacks for the loaded modules. + for m in loaded: + _InvokeImportCallback(sys.modules.get(m)) + + return module + + +def _InvokeImportCallback(module): + """Invokes import callbacks for the specified module.""" + + if not module: + return + + path = getattr(module, '__file__', None) + if not path: + return + + path, unused_ext = os.path.splitext(os.path.abspath(path)) + callbacks = _import_callbacks.get(path) + if not callbacks: + return # Common code path. + + # Clone the callbacks set, since it can change during enumeration. + for callback in callbacks.copy(): + callback() diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 0867a87..c2eeee6 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -22,6 +22,7 @@ import capture_collector import cdbg_native as native import deferred_modules +import imphook import module_explorer import module_lookup @@ -360,7 +361,7 @@ def MultipleModulesFoundError(path, candidates): return assert not self._import_hook_cleanup - self._import_hook_cleanup = deferred_modules.AddImportCallback( + self._import_hook_cleanup = imphook.AddImportCallback( deferred_paths[0], self._TryActivateBreakpoint) From 9df6e2b6e659c2c3a76e6eba1d124e07f8b279e8 Mon Sep 17 00:00:00 2001 From: erezh Date: Mon, 14 Aug 2017 12:57:49 -0700 Subject: [PATCH 070/241] Refactoring: Standardize the schema for breakpoint error message constants as: ERROR___ ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165216876 --- src/googleclouddebugger/python_breakpoint.py | 54 ++++++++++---------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index c2eeee6..3c47261 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -27,53 +27,55 @@ import module_lookup # TODO(vlif): move to messages.py module. -BREAKPOINT_ONLY_SUPPORTS_PY_FILES = ( +# Use the following schema to define breakpoint error message constant: +# ERROR___ +ERROR_LOCATION_FILE_EXTENSION_0 = ( 'Only files with .py extension are supported') -MODULE_NOT_FOUND = ( +ERROR_LOCATION_MODULE_NOT_FOUND_0 = ( 'Python module not found. Please ensure this file is present in the ' 'version of the service you are trying to debug.') -MULTIPLE_MODULES_FOUND2 = ( +ERROR_LOCATION_MULTIPLE_MODULES_3 = ( 'Multiple modules matching $0 ($1, $2)') -MULTIPLE_MODULES_FOUND3_OR_MORE = ( +ERROR_LOCATION_MULTIPLE_MODULES_4 = ( 'Multiple modules matching $0 ($1, $2, and $3 more)') -NO_CODE_FOUND_AT_LINE = 'No code found at line $0 in $1' -NO_CODE_FOUND_AT_LINE_ALT_LINE = ( +ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_2 = 'No code found at line $0 in $1' +ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_3 = ( 'No code found at line $0 in $1. Try line $2.') -NO_CODE_FOUND_AT_LINE_TWO_ALT_LINES = ( +ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_4 = ( 'No code found at line $0 in $1. Try lines $2 or $3.') -GLOBAL_CONDITION_QUOTA_EXCEEDED = ( +ERROR_CONDITION_GLOBAL_QUOTA_EXCEEDED_0 = ( 'Snapshot cancelled. The condition evaluation cost for all active ' 'snapshots might affect the application performance.') -BREAKPOINT_CONDITION_QUOTA_EXCEEDED = ( +ERROR_CONDITION_BREAKPOINT_QUOTA_EXCEEDED_0 = ( 'Snapshot cancelled. The condition evaluation at this location might ' 'affect application performance. Please simplify the condition or move ' 'the snapshot to a less frequently called statement.') -MUTABLE_CONDITION = ( +ERROR_CONDITION_MUTABLE_0 = ( 'Only immutable expressions can be used in snapshot conditions') -SNAPSHOT_EXPIRED = ( +ERROR_AGE_SNAPSHOT_EXPIRED_0 = ( 'The snapshot has expired') -LOGPOINT_EXPIRED = ( +ERROR_AGE_LOGPOINT_EXPIRED_0 = ( 'The logpoint has expired') -INTERNAL_ERROR = ( +ERROR_UNSPECIFIED_INTERNAL_ERROR = ( 'Internal error occurred') # Status messages for different breakpoint events (except of "hit"). _BREAKPOINT_EVENT_STATUS = dict( [(native.BREAKPOINT_EVENT_ERROR, {'isError': True, - 'description': {'format': INTERNAL_ERROR}}), + 'description': {'format': ERROR_UNSPECIFIED_INTERNAL_ERROR}}), (native.BREAKPOINT_EVENT_GLOBAL_CONDITION_QUOTA_EXCEEDED, {'isError': True, 'refersTo': 'BREAKPOINT_CONDITION', - 'description': {'format': GLOBAL_CONDITION_QUOTA_EXCEEDED}}), + 'description': {'format': ERROR_CONDITION_GLOBAL_QUOTA_EXCEEDED_0}}), (native.BREAKPOINT_EVENT_BREAKPOINT_CONDITION_QUOTA_EXCEEDED, {'isError': True, 'refersTo': 'BREAKPOINT_CONDITION', - 'description': {'format': BREAKPOINT_CONDITION_QUOTA_EXCEEDED}}), + 'description': {'format': ERROR_CONDITION_BREAKPOINT_QUOTA_EXCEEDED_0}}), (native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE, {'isError': True, 'refersTo': 'BREAKPOINT_CONDITION', - 'description': {'format': MUTABLE_CONDITION}})]) + 'description': {'format': ERROR_CONDITION_MUTABLE_0}})]) # The implementation of datetime.strptime imports an undocumented module called # _strptime. If it happens at the wrong time, we can get an exception about @@ -127,7 +129,7 @@ def __init__(self, definition, hub_client, breakpoints_manager): 'status': { 'isError': True, 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': {'format': BREAKPOINT_ONLY_SUPPORTS_PY_FILES}}}) + 'description': {'format': ERROR_LOCATION_FILE_EXTENSION_0}}}) return # TODO(emrekultursay): Check both loaded and deferred modules. @@ -170,9 +172,9 @@ def ExpireBreakpoint(self): return if self.definition.get('action') == 'LOG': - message = LOGPOINT_EXPIRED + message = ERROR_AGE_LOGPOINT_EXPIRED_0 else: - message = SNAPSHOT_EXPIRED + message = ERROR_AGE_SNAPSHOT_EXPIRED_0 self._CompleteBreakpoint({ 'status': { 'isError': True, @@ -272,11 +274,11 @@ def _FindCodeObject(self): params += alt_lines if len(params) == 4: - fmt = NO_CODE_FOUND_AT_LINE_TWO_ALT_LINES + fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_4 elif len(params) == 3: - fmt = NO_CODE_FOUND_AT_LINE_ALT_LINE + fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_3 else: - fmt = NO_CODE_FOUND_AT_LINE + fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_2 self._CompleteBreakpoint({ 'status': { @@ -330,9 +332,9 @@ def MultipleModulesFoundError(path, candidates): assert len(candidates) > 1 params = [path] + StripCommonPrefixSegments(candidates[:2]) if len(candidates) == 2: - fmt = MULTIPLE_MODULES_FOUND2 + fmt = ERROR_LOCATION_MULTIPLE_MODULES_3 else: - fmt = MULTIPLE_MODULES_FOUND3_OR_MORE + fmt = ERROR_LOCATION_MULTIPLE_MODULES_4 params.append(str(len(candidates) - 2)) return fmt, params @@ -346,7 +348,7 @@ def MultipleModulesFoundError(path, candidates): 'status': { 'isError': True, 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': {'format': MODULE_NOT_FOUND}}}) + 'description': {'format': ERROR_LOCATION_MODULE_NOT_FOUND_0}}}) return if len(deferred_paths) > 1: From 58a86b85af5af7f31ae4fe1803a7f7bf1ffe5d96 Mon Sep 17 00:00:00 2001 From: erezh Date: Mon, 14 Aug 2017 13:38:15 -0700 Subject: [PATCH 071/241] Reject breakpoints with plain __init__.py path upfront. Users must specify a path directory to debug this file. The file __init__.py appears in every directory and using the plain name just ends up with an error. This cl, 1. Returns an appropriate error to the user. 2. Prevents returning error with the first __init__ found 3. Prevents a complete scan of the file system with large set of matching files. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165222106 --- src/googleclouddebugger/python_breakpoint.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 3c47261..1c0fcc5 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -34,6 +34,8 @@ ERROR_LOCATION_MODULE_NOT_FOUND_0 = ( 'Python module not found. Please ensure this file is present in the ' 'version of the service you are trying to debug.') +ERROR_LOCATION_MULTIPLE_MODULES_1 = ( + 'Multiple modules matching $0. Please specify the module path.') ERROR_LOCATION_MULTIPLE_MODULES_3 = ( 'Multiple modules matching $0 ($1, $2)') ERROR_LOCATION_MULTIPLE_MODULES_4 = ( @@ -85,6 +87,10 @@ datetime.strptime('2017-01-01', '%Y-%m-%d') +def _IsRootInitPy(path): + return path.lstrip(os.sep) == '__init__.py' + + class PythonBreakpoint(object): """Handles a single Python breakpoint. @@ -132,6 +138,16 @@ def __init__(self, definition, hub_client, breakpoints_manager): 'description': {'format': ERROR_LOCATION_FILE_EXTENSION_0}}}) return + if _IsRootInitPy(path): + self._CompleteBreakpoint({ + 'status': { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': { + 'format': ERROR_LOCATION_MULTIPLE_MODULES_1, + 'parameters': [path]}}}) + return + # TODO(emrekultursay): Check both loaded and deferred modules. if not self._TryActivateBreakpoint() and not self._completed: self._DeferBreakpoint() From 2c277d2e3f6b61e30bb8c877d2e83ab551478754 Mon Sep 17 00:00:00 2001 From: erezh Date: Thu, 17 Aug 2017 14:14:21 -0700 Subject: [PATCH 072/241] Replace the module search algorithm to find all modules including loaded and use pkgutil.iter_modules(), rather than scanning the disk directly and using imp.find_module(). This cl fixes several issues related to finding the right module, by using filename to compare rather than the module name. Searching loading packages paths as well as loaded modules path. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165627426 --- src/googleclouddebugger/deferred_modules.py | 186 ------------------- src/googleclouddebugger/module_search.py | 121 ++++++++++++ src/googleclouddebugger/python_breakpoint.py | 6 +- 3 files changed, 125 insertions(+), 188 deletions(-) delete mode 100644 src/googleclouddebugger/deferred_modules.py create mode 100644 src/googleclouddebugger/module_search.py diff --git a/src/googleclouddebugger/deferred_modules.py b/src/googleclouddebugger/deferred_modules.py deleted file mode 100644 index ff47b3d..0000000 --- a/src/googleclouddebugger/deferred_modules.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Support for breakpoints on modules that haven't been loaded yet.""" - -import imp -import os -import sys -import time - -import cdbg_native as native - -# Maximum number of directories that FindModulePath will scan. -_DIRECTORY_LOOKUP_QUOTA = 250 - - -# TODO(emrekultursay): Move this method out of deferred_modules.py file. -def FindModulePath(source_path): - """Checks availability of a Python module. - - This function checks if it is possible that a module (loaded or not) - will match the specified path. - - There is no absolutely correct way to do this. The application may just - import a module from a string, or dynamically change sys.path. This function - implements heuristics that should cover all reasonable cases with a good - performance. - - There can be some edge cases when this code is going to scan a huge number - of directories. This can be very expensive. To mitigate it, we limit the - number of directories that can be scanned. If this threshold is reached, - false negatives (i.e., missing modules in the output) are possible. - - Args: - source_path: source path as specified in the breakpoint. - - Returns: - A list containing the paths of modules that best match source_path. - """ - def IsPackage(path): - """Checks if the specified directory is a valid Python package.""" - init_base_path = os.path.join(path, '__init__.py') - return (os.path.isfile(init_base_path) or - os.path.isfile(init_base_path + 'c') or - os.path.isfile(init_base_path + 'o')) - - def SubPackages(path): - """Gets a list of all the directories of subpackages of path.""" - if os.path.isdir(path): - for name in os.listdir(path): - if '.' in name: - continue # This is definitely a file, package names can't have dots. - - if directory_lookups[0] >= _DIRECTORY_LOOKUP_QUOTA: - break - - directory_lookups[0] += 1 - - subpath = os.path.join(path, name) - if IsPackage(subpath): - yield subpath - - start_time = time.time() - directory_lookups = [0] - - # For packages, module_name will be the name of the package (e.g., for - # 'a/b/c/__init__.py' it will be 'c'). Otherwise, module_name will be the - # name of the module (e.g., for 'a/b/c/foo.py' it will be 'foo'). - module_name = _GetModuleName(source_path) - if not module_name: - return [] - - # Recursively discover all the subpackages in all the Python paths. - paths = set() - pending = set(sys.path) - while pending: - path = pending.pop() - paths.add(path) - pending |= frozenset(SubPackages(path)) - paths - - # Append all directories where some modules have already been loaded. There - # is a good chance that the file we are looking for will be there. This is - # only useful if a module got somehow loaded outside of sys.path. We don't - # include these paths in the recursive discovery of subpackages because it - # takes a lot of time in some edge cases and not worth it. - default_path = sys.path[0] - for unused_module_name, module in sys.modules.copy().iteritems(): - file_path = getattr(module, '__file__', None) - path, unused_name = os.path.split(file_path) if file_path else (None, None) - paths.add(path or default_path) - - # Normalize paths and remove duplicates. - paths = set(os.path.abspath(path) for path in paths) - - best_match = _FindBestMatch(source_path, module_name, paths) - - native.LogInfo( - ('Look up for %s completed in %d directories, ' - 'scanned %d directories (quota: %d), ' - 'result: %r, total time: %f ms') % ( - module_name, - len(paths), - directory_lookups[0], - _DIRECTORY_LOOKUP_QUOTA, - best_match, - (time.time() - start_time) * 1000)) - return sorted(best_match) - - -def _GetModuleName(source_path): - """Gets the name of the module that corresponds to source_path. - - Args: - source_path: file path to resolve into a module. - - Returns: - If the source file is __init__.py, this function will return the name - of the package (last directory before file name). Otherwise this function - return file name without extension. - """ - directory, name = os.path.split(source_path) - if name == '__init__.py': - if not directory.strip(os.sep): - return None # '__init__.py' is way too generic. We can't match it. - - directory, file_name = os.path.split(directory) - else: - file_name, ext = os.path.splitext(name) - if ext != '.py': - return None # ".py" extension is expected - - return file_name - - -# TODO(emrekultursay): Try reusing the Disambiguate method in module_lookup.py. -def _FindBestMatch(source_path, module_name, paths): - """Returns paths entries that have longest suffix match with source_path.""" - best = [] - best_suffix_len = 0 - for path in paths: - try: - fp, p, unused_d = imp.find_module(module_name, [path]) - - # find_module may return relative path (relative to current directory), - # which requires normalization. - p = os.path.abspath(p) - - # find_module returns fp=None when it finds a package, in which case we - # should be finding common suffix against __init__.py in that package. - if not fp: - p = os.path.join(p, '__init__.py') - else: - fp.close() - - suffix_len = _CommonSuffix(source_path, p) - - if suffix_len > best_suffix_len: - best = [p] - best_suffix_len = suffix_len - elif suffix_len == best_suffix_len: - best.append(p) - - except ImportError: - pass # a module with the given name was not found inside path. - - return best - - -# TODO(emrekultursay): Remove duplicate copy in module_lookup.py. -def _CommonSuffix(path1, path2): - """Returns the number of common directory names at the tail of the paths.""" - return len(os.path.commonprefix([ - path1[::-1].split(os.sep), - path2[::-1].split(os.sep)])) - diff --git a/src/googleclouddebugger/module_search.py b/src/googleclouddebugger/module_search.py new file mode 100644 index 0000000..c513a9f --- /dev/null +++ b/src/googleclouddebugger/module_search.py @@ -0,0 +1,121 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Inclusive search for module files.""" + +import os +import pkgutil +import sys +import time + +import cdbg_native as native + + +def _CommonPathSuffixLen(paths): + """Returns the longest common path suffix len in a list of paths.""" + return len(os.path.commonprefix([path[::-1].split(os.sep) for path in paths])) + + +def _GetIsPackageAndModuleName(path_noext): + """Returns a tuple indicating whether the path is a package and a name.""" + + directory, name = os.path.split(path_noext) + if name != '__init__': + return (False, name) + # It is a package, return the package name. + return (True, os.path.basename(directory)) + + +# TODO(erezh): Ensure we handle whitespace in paths correctly including, +# extension, basename and dirname. +def FindMatchingFiles(location_path): + """Returns a list of absolute filenames of best matching modules/packages.""" + + def AddCandidate(mod_path): + suffix_len = _CommonPathSuffixLen([src_path, mod_path]) + if suffix_len < longest_suffix_len[0]: + return + if suffix_len > longest_suffix_len[0]: + candidates.clear() + longest_suffix_len[0] = suffix_len + candidates.add(mod_path) + + # We measure the time it takes to execute the scan. + start_time = time.time() + num_dirs_scanned = 0 + + # Remove the file extension and identify if it's a package. + src_path, src_ext = os.path.splitext(location_path) + assert src_ext == '.py' + (src_ispkg, src_name) = _GetIsPackageAndModuleName(src_path) + assert src_name + + # The set of module/package path w/ no extension. + # Using mutable vars to make them available in nested functions. + # Init longest_suffix_len to 1 to avoid inserting zero length suffixes. + candidates = set() + longest_suffix_len = [1] + + # Search paths for modules and packages, init with system search paths. + search_paths = set(sys.path) + + # Add search paths from the already loaded packages and add matching modules + # or packages to the candidates list. + for module in sys.modules.values(): + # Extend the search paths with packages path and modules file directory. + # Note that __path__ only exist for packages and is a list of abs paths. + search_paths |= frozenset(getattr(module, '__path__', [])) + mod_path = os.path.splitext(getattr(module, '__file__', ''))[0] + if not mod_path: + continue + mod_path = os.path.abspath(mod_path) + search_paths.add(os.path.dirname(mod_path)) + # Add loaded modules to the candidates set. + if (src_ispkg, src_name) == _GetIsPackageAndModuleName(mod_path): + AddCandidate(mod_path) + + # Walk the aggregated search path and loook for modules or packages. + # By searching one path at the time we control the module file name + # without having to load it. + # TODO(erezh): consider using the alternative impl in cr/165133821 which + # only uses os file lookup and not using pkgutil. The alternative is faster + # but is making many more assuptions that this impl does not. + while search_paths: + num_dirs_scanned += 1 + path = search_paths.pop() + for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([path]): + mod_path = os.path.join(path, mod_name) + if mod_ispkg: + search_paths.add(mod_path) + mod_path = os.path.join(mod_path, '__init__') + if (src_ispkg, src_name) == (mod_ispkg, mod_name): + AddCandidate(mod_path) + + # Sort the list to return a stable result to the user. + # TODO(erezh): No need to add the .py extenssion, this is done just for + # compatabilty with current code. Once refactored not to use file extension + # this code can be removed to just return the sorted candidates. + candidates = sorted([path + '.py' for path in candidates]) + + # Log scan stats, without the files list to avoid very long output as well as + # the potential leak of system files that the user has no access to. + native.LogInfo( + ('Found %d files matching \'%s\' in %d scanned folders in %f ms') % ( + len(candidates), + location_path, + num_dirs_scanned, + (time.time() - start_time) * 1000)) + + # Return a sorted result for stable report to the user + return candidates diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 1c0fcc5..883d07c 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -21,10 +21,10 @@ import capture_collector import cdbg_native as native -import deferred_modules import imphook import module_explorer import module_lookup +import module_search # TODO(vlif): move to messages.py module. # Use the following schema to define breakpoint error message constant: @@ -129,6 +129,8 @@ def __init__(self, definition, hub_client, breakpoints_manager): if self.definition.get('action') == 'LOG': self._collector = capture_collector.LogCollector(self.definition) + # TODO(erezh): Ensure we handle whitespace in paths correctly. + # including, extension, basename, location_path path = self.definition['location']['path'] if os.path.splitext(path)[1] != '.py': self._CompleteBreakpoint({ @@ -358,7 +360,7 @@ def MultipleModulesFoundError(path, candidates): # This is a best-effort lookup to identify any modules that may be loaded in # the future. - deferred_paths = deferred_modules.FindModulePath(path) + deferred_paths = module_search.FindMatchingFiles(path) if not deferred_paths: self._CompleteBreakpoint({ 'status': { From 24cfb6bf9b0b5c09ce96ceb4ad475a5cf833728b Mon Sep 17 00:00:00 2001 From: erezh Date: Thu, 17 Aug 2017 15:46:29 -0700 Subject: [PATCH 073/241] Search and validate all matching modules upfront when setting a new breakpoint. This cl makes sure that there are no better matching modules than those already loaded, as well as detects ambiguous file name. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165640465 --- src/googleclouddebugger/python_breakpoint.py | 137 ++++++++----------- 1 file changed, 58 insertions(+), 79 deletions(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 883d07c..34b7e06 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -91,6 +91,39 @@ def _IsRootInitPy(path): return path.lstrip(os.sep) == '__init__.py' +def _StripCommonPathPrefix(paths): + """Removes path common prefix from a list of path strings.""" + # Find the longest common prefix in terms of characters. + common_prefix = os.path.commonprefix(paths) + # Truncate at last segment boundary. E.g. '/aa/bb1/x.py' and '/a/bb2/x.py' + # have '/aa/bb' as the common prefix, but we should strip '/aa/' instead. + # If there's no '/' found, returns -1+1=0. + common_prefix_len = common_prefix.rfind('/') + 1 + return [path[common_prefix_len:] for path in paths] + + +def _MultipleModulesFoundError(path, candidates): + """Generates an error message to be used when multiple matches are found. + + Args: + path: The breakpoint location path that the user provided. + candidates: List of paths that match the user provided path. Must + contain at least 2 entries (throws AssertionError otherwise). + + Returns: + A (format, parameters) tuple that should be used in the description + field of the breakpoint error status. + """ + assert len(candidates) > 1 + params = [path] + _StripCommonPathPrefix(candidates[:2]) + if len(candidates) == 2: + fmt = ERROR_LOCATION_MULTIPLE_MODULES_3 + else: + fmt = ERROR_LOCATION_MULTIPLE_MODULES_4 + params.append(str(len(candidates) - 2)) + return fmt, params + + class PythonBreakpoint(object): """Handles a single Python breakpoint. @@ -105,8 +138,7 @@ def __init__(self, definition, hub_client, breakpoints_manager): Tries to set the breakpoint. If the source location is invalid, the breakpoint is completed with an error message. If the source location is - valid, but the module hasn't been loaded yet, the breakpoint is initialized - as deferred. + valid, but the module hasn't been loaded yet, the breakpoint is deferred. Args: definition: breakpoint definition as it came from the backend. @@ -150,9 +182,32 @@ def __init__(self, definition, hub_client, breakpoints_manager): 'parameters': [path]}}}) return + # Find all module files matching the location path. + paths = module_search.FindMatchingFiles(path) + if not paths: + self._CompleteBreakpoint({ + 'status': { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': {'format': ERROR_LOCATION_MODULE_NOT_FOUND_0}}}) + return + + if len(paths) > 1: + fmt, params = _MultipleModulesFoundError(path, paths) + self._CompleteBreakpoint({ + 'status': { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': { + 'format': fmt, + 'parameters': params}}}) + return + # TODO(emrekultursay): Check both loaded and deferred modules. if not self._TryActivateBreakpoint() and not self._completed: - self._DeferBreakpoint() + self._import_hook_cleanup = imphook.AddImportCallback( + paths[0], + self._TryActivateBreakpoint) def Clear(self): """Clears the breakpoint and releases all breakpoint resources. @@ -309,82 +364,6 @@ def _FindCodeObject(self): return val - # Enables deferred breakpoints. - def _DeferBreakpoint(self): - """Defers breakpoint activation until the module has been loaded. - - This function first verifies that a module corresponding to breakpoint - location exists. This way if the user sets breakpoint in a file that - doesn't even exist, the debugger will not be waiting forever. If there - is definitely no module that matches this breakpoint, this function - completes the breakpoint with error status. - - Otherwise the debugger assumes that the module corresponding to breakpoint - location hasn't been loaded yet. The debugger will then start waiting for - the module to get loaded. Once the module is loaded, the debugger - will automatically try to activate the breakpoint. - """ - - def StripCommonPrefixSegments(paths): - """Removes common prefix segments from a list of path strings.""" - # Find the longest common prefix in terms of characters. - common_prefix = os.path.commonprefix(paths) - # Truncate at last segment boundary. E.g. '/aa/bb1/x.py' and '/a/bb2/x.py' - # have '/aa/bb' as the common prefix, but we should strip '/aa/' instead. - # If there's no '/' found, returns -1+1=0. - common_prefix_len = common_prefix.rfind('/') + 1 - return [path[common_prefix_len:] for path in paths] - - def MultipleModulesFoundError(path, candidates): - """Generates an error message to be used when multiple matches are found. - - Args: - path: The breakpoint location path that the user provided. - candidates: List of paths that match the user provided path. Must - contain at least 2 entries (throws AssertionError otherwise). - - Returns: - A (format, parameters) tuple that should be used in the description - field of the breakpoint error status. - """ - assert len(candidates) > 1 - params = [path] + StripCommonPrefixSegments(candidates[:2]) - if len(candidates) == 2: - fmt = ERROR_LOCATION_MULTIPLE_MODULES_3 - else: - fmt = ERROR_LOCATION_MULTIPLE_MODULES_4 - params.append(str(len(candidates) - 2)) - return fmt, params - - path = self.definition['location']['path'] - - # This is a best-effort lookup to identify any modules that may be loaded in - # the future. - deferred_paths = module_search.FindMatchingFiles(path) - if not deferred_paths: - self._CompleteBreakpoint({ - 'status': { - 'isError': True, - 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': {'format': ERROR_LOCATION_MODULE_NOT_FOUND_0}}}) - return - - if len(deferred_paths) > 1: - fmt, params = MultipleModulesFoundError(path, deferred_paths) - self._CompleteBreakpoint({ - 'status': { - 'isError': True, - 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': { - 'format': fmt, - 'parameters': params}}}) - return - - assert not self._import_hook_cleanup - self._import_hook_cleanup = imphook.AddImportCallback( - deferred_paths[0], - self._TryActivateBreakpoint) - def _RemoveImportHook(self): """Removes the import hook if one was installed.""" if self._import_hook_cleanup: From 79c3c1b58720efe459d05e179c3329fd71f1ae54 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Fri, 18 Aug 2017 11:42:37 -0700 Subject: [PATCH 074/241] Add instructions on how to use Python Cloud Debugger with Django autoreload. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165732080 --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index 4d4d9f1..37c7e47 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,7 @@ sudo apt-get -y -q --no-install-recommends install \ python -m googleclouddebugger -- myapp.py + ### Service Account Service account authentication lets you run the debugger agent on any Linux @@ -160,3 +161,28 @@ python \ -- \ myapp.py + +### Django Web Framework + +You can use the Cloud Debugger to debug Django web framework applications. + + +The best way to enable the Cloud Debugger with Django is to add the following +code fragment to your `manage.py` file: + +```python +# Attach the Python Cloud debugger (only the main server process). +if os.environ.get('RUN_MAIN') or '--noreload' in sys.argv: + try: + import googleclouddebugger + googleclouddebugger.enable() + except ImportError: + pass +``` + + +Alternatively, you can pass the `--noreload` flag when running the Django +`manage.py` and use any one of the option A and B listed earlier. Note that +using the `--noreload` flag disables the autoreload feature in Django, which +means local changes to files will not be automatically picked up by Django. + From e99213893b90a6a06e5afb1b4af49a2dd50fd10b Mon Sep 17 00:00:00 2001 From: mattwach Date: Mon, 21 Aug 2017 12:48:43 -0700 Subject: [PATCH 075/241] Add data hiding capability to python agent. This CL adds the hiding logic but not the ability to load YAML - that will come in a future CL. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=165962232 --- src/googleclouddebugger/__init__.py | 6 +- .../breakpoints_manager.py | 8 +- src/googleclouddebugger/capture_collector.py | 37 +++++++++- .../glob_data_visibility_policy.py | 73 +++++++++++++++++++ src/googleclouddebugger/python_breakpoint.py | 10 ++- 5 files changed, 127 insertions(+), 7 deletions(-) create mode 100644 src/googleclouddebugger/glob_data_visibility_policy.py diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index b25f2cc..4d95551 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -48,7 +48,11 @@ def _StartDebugger(): cdbg_native.InitializeModule(_flags) _hub_client = gcp_hub_client.GcpHubClient() - _breakpoints_manager = breakpoints_manager.BreakpointsManager(_hub_client) + # TODO(mattwach): Provide a data_visibility_policy object if the user + # provided configuration for one. + data_visibility_policy = None + _breakpoints_manager = breakpoints_manager.BreakpointsManager( + _hub_client, data_visibility_policy) # Set up loggers for logpoints. capture_collector.SetLogger(logging.getLogger()) diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index 807349c..bfb91e0 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -34,10 +34,13 @@ class BreakpointsManager(object): Args: hub_client: queries active breakpoints from the backend and sends breakpoint updates back to the backend. + data_visibility_policy: An object used to determine the visibiliy + of a captured variable. May be None if no policy is available. """ - def __init__(self, hub_client): + def __init__(self, hub_client, data_visibility_policy): self._hub_client = hub_client + self.data_visibility_policy = data_visibility_policy # Lock to synchronize access to data across multiple threads. self._lock = RLock() @@ -71,7 +74,8 @@ def SetActiveBreakpoints(self, breakpoints_data): # Create new breakpoints. self._active.update([ (x['id'], - python_breakpoint.PythonBreakpoint(x, self._hub_client, self)) + python_breakpoint.PythonBreakpoint( + x, self._hub_client, self, self.data_visibility_policy)) for x in breakpoints_data if x['id'] in ids - self._active.viewkeys() - self._completed]) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index a0e02d7..2e839fa 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -186,13 +186,17 @@ class CaptureCollector(object): # string. pretty_printers = [] - def __init__(self, definition): + def __init__(self, definition, data_visibility_policy): """Class constructor. Args: definition: breakpoint definition that this class will augment with captured data. + data_visibility_policy: An object used to determine the visibiliy + of a captured variable. May be None if no policy is available. """ + self.data_visibility_policy = data_visibility_policy + self.breakpoint = copy.deepcopy(definition) self.breakpoint['stackFrames'] = [] @@ -363,13 +367,42 @@ def CaptureNamedVariable(self, name, value, depth, limits): name = str(id(name)) self._total_size += len(name) - v = self.CaptureVariable(value, depth, limits) + v = (self.CheckDataVisiblity(name) or + self.CaptureVariable(value, depth, limits)) v['name'] = name except RuntimeError as e: raise RuntimeError( 'INTERNAL ERROR while capturing {0}: {1}'.format(name, e)) return v + def CheckDataVisiblity(self, name): + """Returns a status object if the given name is not visible. + + Args: + name: Dot-separated symbol name + + Returns: + None if the name is visible. A variable structure with an error status + if the object is not visible. + """ + if not self.data_visibility_policy: + return None + + visible, reason = self.data_visibility_policy.IsDataVisible(name) + + if visible: + return None + + return { + 'status': { + 'is_error': False, + 'refers_to': 'VARIABLE_NAME', + 'description': { + 'format': reason + } + } + } + def CaptureVariablesList(self, items, depth, empty_message, limits): """Captures list of named items. diff --git a/src/googleclouddebugger/glob_data_visibility_policy.py b/src/googleclouddebugger/glob_data_visibility_policy.py new file mode 100644 index 0000000..31c729c --- /dev/null +++ b/src/googleclouddebugger/glob_data_visibility_policy.py @@ -0,0 +1,73 @@ +"""Determines the visibilty of python data and symbols. + +Example Usage: + + blacklist_patterns = ( + 'com.private.*' + 'com.foo.bar' + ) + whitelist_patterns = ( + 'com.*' + ) + policy = GlobDataVisibilityPolicy(blacklist_patterns, whitelist_patterns) + + policy.IsDataVisible('org.foo.bar') -> (False, 'not whitelisted by config') + policy.IsDataVisible('com.foo.bar') -> (False, 'blacklisted by config') + policy.IsDataVisible('com.private.foo') -> (False, 'blacklisted by config') + policy.IsDataVisible('com.foo') -> (True, 'visible') +""" + +import fnmatch + + +# Possible visibility responses +RESPONSES = { + 'BLACKLISTED': 'blacklisted by config', + 'NOT_WHITELISTED': 'not whitelisted by config', + 'VISIBLE': 'visible', +} + + +class GlobDataVisibilityPolicy(object): + """Policy provides visibility policy details to the caller.""" + + def __init__(self, blacklist_patterns, whitelist_patterns): + self.blacklist_patterns = blacklist_patterns + self.whitelist_patterns = whitelist_patterns + + def IsDataVisible(self, path): + """Returns a tuple (visible, reason) stating if the data should be visible. + + Args: + path: A dot separated path that represents a package, class, method or + variable. The format is identical to pythons "import" statement. + + Returns: + (visible, reason) where visible is a boolean that is True if the data + should be visible. Reason is a string reason that can be displayed + to the user and indicates why data is visible or not visible. + """ + if _Matches(path, self.blacklist_patterns): + return (False, RESPONSES['BLACKLISTED']) + + if not _Matches(path, self.whitelist_patterns): + return (False, RESPONSES['NOT_WHITELISTED']) + + return (True, RESPONSES['VISIBLE']) + + +def _Matches(path, pattern_list): + """Returns true if path matches any patten found in pattern_list. + + Args: + path: A dot separated path to a package, class, method or variable + pattern_list: A list of wildcard patterns + + Returns: + True if path matches any wildcard found in pattern_list. + """ + # TODO(mattwach): This code does not scale to large pattern_list + # sizes. For now, keep things logically simple but consider a + # more optimized solution in the future. + return any(fnmatch.fnmatchcase(path, pattern) for pattern in pattern_list) + diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 34b7e06..8e97e2e 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -133,7 +133,8 @@ class PythonBreakpoint(object): to log a statement. """ - def __init__(self, definition, hub_client, breakpoints_manager): + def __init__(self, definition, hub_client, breakpoints_manager, + data_visibility_policy): """Class constructor. Tries to set the breakpoint. If the source location is invalid, the @@ -144,9 +145,13 @@ def __init__(self, definition, hub_client, breakpoints_manager): definition: breakpoint definition as it came from the backend. hub_client: asynchronously sends breakpoint updates to the backend. breakpoints_manager: parent object managing active breakpoints. + data_visibility_policy: An object used to determine the visibiliy + of a captured variable. May be None if no policy is available. """ self.definition = definition + self.data_visibility_policy = data_visibility_policy + # Breakpoint expiration time. self.expiration_period = timedelta(hours=24) @@ -419,7 +424,8 @@ def _BreakpointEvent(self, event, frame): self._CompleteBreakpoint({'status': error_status}) return - collector = capture_collector.CaptureCollector(self.definition) + collector = capture_collector.CaptureCollector( + self.definition, self.data_visibility_policy) collector.Collect(frame) self._CompleteBreakpoint(collector.breakpoint, is_incremental=False) From 7023ec8b3c793311d9561aa5c66e67ea4cd9834a Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Mon, 21 Aug 2017 17:58:31 -0700 Subject: [PATCH 076/241] Sanitize the breakpoint location path, ignore whitespace and absolute path. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=166001966 --- src/googleclouddebugger/python_breakpoint.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 8e97e2e..fac23c3 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -87,10 +87,6 @@ datetime.strptime('2017-01-01', '%Y-%m-%d') -def _IsRootInitPy(path): - return path.lstrip(os.sep) == '__init__.py' - - def _StripCommonPathPrefix(paths): """Removes path common prefix from a list of path strings.""" # Find the longest common prefix in terms of characters. @@ -124,6 +120,11 @@ def _MultipleModulesFoundError(path, candidates): return fmt, params +def _SanitizePath(path): + """Removes leading/trailing whitespace, and leading path separator.""" + return path.strip().lstrip(os.sep) + + class PythonBreakpoint(object): """Handles a single Python breakpoint. @@ -166,9 +167,9 @@ def __init__(self, definition, hub_client, breakpoints_manager, if self.definition.get('action') == 'LOG': self._collector = capture_collector.LogCollector(self.definition) - # TODO(erezh): Ensure we handle whitespace in paths correctly. - # including, extension, basename, location_path - path = self.definition['location']['path'] + path = _SanitizePath(self.definition['location']['path']) + + # Only accept .py extension. if os.path.splitext(path)[1] != '.py': self._CompleteBreakpoint({ 'status': { @@ -177,7 +178,8 @@ def __init__(self, definition, hub_client, breakpoints_manager, 'description': {'format': ERROR_LOCATION_FILE_EXTENSION_0}}}) return - if _IsRootInitPy(path): + # A flat init file is too generic; path must include package name. + if path == '__init__.py': self._CompleteBreakpoint({ 'status': { 'isError': True, @@ -429,3 +431,4 @@ def _BreakpointEvent(self, event, frame): collector.Collect(frame) self._CompleteBreakpoint(collector.breakpoint, is_incremental=False) + From 1d4596ce9bef05279e31c07855f352a6e3654216 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Mon, 21 Aug 2017 19:02:48 -0700 Subject: [PATCH 077/241] Use the full path to figure out the correct loaded module when setting the breakpoint, and avoid multiple module lookups. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=166007427 --- src/googleclouddebugger/imphook.py | 2 +- src/googleclouddebugger/module_lookup.py | 152 ------------------- src/googleclouddebugger/python_breakpoint.py | 145 ++++++++---------- 3 files changed, 68 insertions(+), 231 deletions(-) delete mode 100644 src/googleclouddebugger/module_lookup.py diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py index 73d019b..250ed22 100644 --- a/src/googleclouddebugger/imphook.py +++ b/src/googleclouddebugger/imphook.py @@ -155,4 +155,4 @@ def _InvokeImportCallback(module): # Clone the callbacks set, since it can change during enumeration. for callback in callbacks.copy(): - callback() + callback(module) diff --git a/src/googleclouddebugger/module_lookup.py b/src/googleclouddebugger/module_lookup.py deleted file mode 100644 index df547f5..0000000 --- a/src/googleclouddebugger/module_lookup.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Finds the loaded module by source path. - -The lookup is a fuzzy one, the source path coming from a breakpoint might -be a subpath of module path or may be longer than the module path. -""" - -import os -import sys - - -def FindModules(source_path): - """Finds the loaded modules whose paths match the given source_path best. - - If there are multiple possible matches, returns them all. - - Args: - source_path: source file path as specified in the breakpoint. - - Returns: - List of module objects that best match the source_path or [] if no - match is found. - """ - # The lookup is performed in two steps. First, we search all modules whose - # name match the given source_path's file name (i.e., ignore the leading - # directory). Then, we select the results whose directory matches the given - # input best. - dirname, basename = os.path.split(source_path) - file_name_root, ext = os.path.splitext(basename) - if ext != '.py': - return [] # ".py" extension is expected - - candidates = _GetModulesByFileName(file_name_root) - if not candidates: - return [] - - if len(candidates) == 1: - return candidates - - if not dirname: - return candidates # No need disambiguate. - - # Find the module that has the best path prefix. - indices = _Disambiguate( - dirname, - [os.path.dirname(module.__file__) for module in candidates]) - return [candidates[i] for i in indices] - - -def _GetModulesByFileName(lookup_file_name_root): - """Gets list of all the loaded modules by file name (ignores directory).""" - matches = [] - - # Clone modules dictionaries to allow new modules to load during iteration. - for unused_name, module in sys.modules.copy().iteritems(): - if not hasattr(module, '__file__'): - continue # This is a built-in module. - - file_name_root, ext = os.path.splitext(os.path.basename(module.__file__)) - - # TODO(emrekultursay): Verify why we are discarding .pyo files here. - if (file_name_root == lookup_file_name_root and - (ext == '.py' or ext == '.pyc')): - matches.append(module) - - return matches - - -def _Disambiguate(lookup_path, paths): - """Disambiguates multiple candidates based on the longest suffix. - - Example when this disambiguation is needed: - Breakpoint at: 'myproject/app/db/common.py' - Candidate modules: ['/home/root/fe/common.py', '/home/root/db/common.py'] - - In this example the input to this function will be: - lookup_path = 'myproject/app/db' - paths = ['/home/root/fe', '/home/root/db'] - - The second path is clearly the best match, so this function will return [1]. - - Args: - lookup_path: the source path of the searched module (without file name - and extension). Must be non-empty. - paths: candidate paths (each without file name and extension). Must have - two or more elements. - - Returns: - List of indices of the best matches. - """ - assert lookup_path - assert len(paths) > 1 - - best_indices = [] - best_len = 1 # zero-length matches should be discarded. - - for i, path in enumerate(paths): - current_len = _CommonSuffix(lookup_path, path) - - if current_len > best_len: - best_indices = [i] - best_len = current_len - elif current_len == best_len: - best_indices.append(i) - - return best_indices - - -def _CommonSuffix(path1, path2): - """Computes the number of common directory names at the tail of the paths. - - Examples: - * _CommonSuffix('a/x/y', 'b/x/y') = 2 - * _CommonSuffix('a/b/c', 'd/e/f') = 0 - * _CommonSuffix('a/b/c', 'a/b/x') = 0 - - Args: - path1: first directory path (should not have file name). - path2: second directory path (should not have file name). - - Returns: - Number of common consecutive directory segments from right. - """ - - # Normalize the paths just to be on the safe side - path1 = path1.strip(os.sep) - path2 = path2.strip(os.sep) - - counter = 0 - while path1 and path2: - path1, cur1 = os.path.split(path1) - path2, cur2 = os.path.split(path2) - - if cur1 != cur2 or not cur1: - break - - counter += 1 - - return counter diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index fac23c3..b13d569 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -17,13 +17,13 @@ from datetime import datetime from datetime import timedelta import os +import sys from threading import Lock import capture_collector import cdbg_native as native import imphook import module_explorer -import module_lookup import module_search # TODO(vlif): move to messages.py module. @@ -87,6 +87,28 @@ datetime.strptime('2017-01-01', '%Y-%m-%d') +def _GetLoadedModuleByPath(abspath): + """Returns the loaded module that matches abspath or None if not found.""" + + for module in sys.modules.values(): + path = getattr(module, '__file__', None) + if not path: + continue # This is a built-in module. + + # module.__file__ may be relative to the current directory, so we first + # convert it into absolute path. + module_abspath = os.path.abspath(path) + + # Ignore file extension while comparing the file paths (e.g., /foo/bar.py vs + # /foo/bar.pyc should match). + if os.path.splitext(module_abspath)[0] == os.path.splitext(abspath)[0]: + return module + + +def _IsRootInitPy(path): + return path.lstrip(os.sep) == '__init__.py' + + def _StripCommonPathPrefix(paths): """Removes path common prefix from a list of path strings.""" # Find the longest common prefix in terms of characters. @@ -210,11 +232,14 @@ def __init__(self, definition, hub_client, breakpoints_manager, 'parameters': params}}}) return - # TODO(emrekultursay): Check both loaded and deferred modules. - if not self._TryActivateBreakpoint() and not self._completed: + # TODO(erezh): Handle the possible thread race condtion from lookup to hook. + module = _GetLoadedModuleByPath(paths[0]) + if module: + self._ActivateBreakpoint(module) + else: self._import_hook_cleanup = imphook.AddImportCallback( paths[0], - self._TryActivateBreakpoint) + self._ActivateBreakpoint) def Clear(self): """Clears the breakpoint and releases all breakpoint resources. @@ -261,26 +286,43 @@ def ExpireBreakpoint(self): 'refersTo': 'BREAKPOINT_AGE', 'description': {'format': message}}}) - def _TryActivateBreakpoint(self): - """Sets the breakpoint if the module has already been loaded. - - This function will complete the breakpoint with error if breakpoint - definition is incorrect. Examples: invalid line or bad condition. + def _ActivateBreakpoint(self, module): + """Sets the breakpoint in the loaded module, or complete with error.""" - If the code object corresponding to the source path can't be found, - this function returns False. In this case, the breakpoint is not - completed, since the breakpoint may be deferred. + # First remove the import hook (if installed). + self._RemoveImportHook() - Returns: - True if breakpoint was set or false otherwise. False can be returned - for potentially deferred breakpoints or in case of a bad breakpoint - definition. The self._completed flag distinguishes between the two cases. - """ + line = self.definition['location']['line'] # Find the code object in which the breakpoint is being set. - code_object = self._FindCodeObject() - if not code_object: - return False + status, codeobj = module_explorer.GetCodeObjectAtLine(module, line) + if not status: + # First two parameters are common: the line of the breakpoint and the + # module we are trying to insert the breakpoint in. + # TODO(emrekultursay): Do not display the entire path of the file. Either + # strip some prefix, or display the path in the breakpoint. + params = [str(line), os.path.splitext(module.__file__)[0] + '.py'] + + # The next 0, 1, or 2 parameters are the alternative lines to set the + # breakpoint at, displayed for the user's convenience. + alt_lines = (str(l) for l in codeobj if l is not None) + params += alt_lines + + if len(params) == 4: + fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_4 + elif len(params) == 3: + fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_3 + else: + fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_2 + + self._CompleteBreakpoint({ + 'status': { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': { + 'format': fmt, + 'parameters': params}}}) + return # Compile the breakpoint condition. condition = None @@ -297,7 +339,8 @@ def _TryActivateBreakpoint(self): 'description': { 'format': 'Invalid expression', 'parameters': [str(e)]}}}) - return False + return + except SyntaxError as e: self._CompleteBreakpoint({ 'status': { @@ -306,71 +349,17 @@ def _TryActivateBreakpoint(self): 'description': { 'format': 'Expression could not be compiled: $0', 'parameters': [e.msg]}}}) - return False - - line = self.definition['location']['line'] + return native.LogInfo('Creating new Python breakpoint %s in %s, line %d' % ( - self.GetBreakpointId(), code_object, line)) + self.GetBreakpointId(), codeobj, line)) self._cookie = native.SetConditionalBreakpoint( - code_object, + codeobj, line, condition, self._BreakpointEvent) - self._RemoveImportHook() - return True - - def _FindCodeObject(self): - """Finds the target code object for the breakpoint. - - This function completes breakpoint with error if the module was found, - but the line number is invalid. When code object is not found for the - breakpoint source location, this function just returns None. It does not - assume error, because it might be a deferred breakpoint. - - Returns: - Python code object object in which the breakpoint will be set or None if - module not found or if there is no code at the specified line. - """ - path = self.definition['location']['path'] - line = self.definition['location']['line'] - - modules = module_lookup.FindModules(path) - if not modules: - return None - - # If there are multiple matches, We pick any one of the matching modules - # arbitrarily. TODO(emrekultursay): Return error instead. - module = modules[0] - - status, val = module_explorer.GetCodeObjectAtLine(module, line) - if not status: - # module.__file__ must be defined or else it wouldn't have been returned - # from FindModule - params = [str(line), module.__file__] - alt_lines = (str(l) for l in val if l is not None) - params += alt_lines - - if len(params) == 4: - fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_4 - elif len(params) == 3: - fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_3 - else: - fmt = ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_2 - - self._CompleteBreakpoint({ - 'status': { - 'isError': True, - 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': { - 'format': fmt, - 'parameters': params}}}) - return None - - return val - def _RemoveImportHook(self): """Removes the import hook if one was installed.""" if self._import_hook_cleanup: From 8c531de606426bcecf78393f94c7a02f05e397f8 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Tue, 22 Aug 2017 10:49:55 -0700 Subject: [PATCH 078/241] Increment python agent version to 2.0 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=166082121 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 4405fc4..6378a82 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '1.18' +__version__ = '2.0' From 69b334f311f661a098a05eccf79130d50f4cd9fb Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Wed, 23 Aug 2017 11:51:43 -0700 Subject: [PATCH 079/241] Add two missing os.path.abspath calls in module exploration logic. 1) To dedup sys.path which may contain relative paths. 2) To dedup the directories of packages (obtained via if getattr(module, '__path__') ) as the user can manually overwrite __path__ parameters of modules. See the unit tests for examples. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=166232776 --- src/googleclouddebugger/module_search.py | 40 +++++++++++++----------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/googleclouddebugger/module_search.py b/src/googleclouddebugger/module_search.py index c513a9f..203cd93 100644 --- a/src/googleclouddebugger/module_search.py +++ b/src/googleclouddebugger/module_search.py @@ -61,29 +61,33 @@ def AddCandidate(mod_path): (src_ispkg, src_name) = _GetIsPackageAndModuleName(src_path) assert src_name - # The set of module/package path w/ no extension. # Using mutable vars to make them available in nested functions. - # Init longest_suffix_len to 1 to avoid inserting zero length suffixes. + + # The set of module/package path w/ no extension. Contains absolute paths. candidates = set() + + # Init longest_suffix_len to 1 to avoid inserting zero length suffixes. longest_suffix_len = [1] # Search paths for modules and packages, init with system search paths. - search_paths = set(sys.path) + search_abspaths = set(os.path.abspath(path) for path in sys.path) # Add search paths from the already loaded packages and add matching modules # or packages to the candidates list. for module in sys.modules.values(): # Extend the search paths with packages path and modules file directory. - # Note that __path__ only exist for packages and is a list of abs paths. - search_paths |= frozenset(getattr(module, '__path__', [])) + # Note that __path__ only exist for packages, but does not have to be + # absolute path as the user can overwrite it. + search_abspaths |= frozenset( + os.path.abspath(path) for path in getattr(module, '__path__', [])) mod_path = os.path.splitext(getattr(module, '__file__', ''))[0] if not mod_path: continue - mod_path = os.path.abspath(mod_path) - search_paths.add(os.path.dirname(mod_path)) + mod_abspath = os.path.abspath(mod_path) + search_abspaths.add(os.path.dirname(mod_abspath)) # Add loaded modules to the candidates set. - if (src_ispkg, src_name) == _GetIsPackageAndModuleName(mod_path): - AddCandidate(mod_path) + if (src_ispkg, src_name) == _GetIsPackageAndModuleName(mod_abspath): + AddCandidate(mod_abspath) # Walk the aggregated search path and loook for modules or packages. # By searching one path at the time we control the module file name @@ -91,22 +95,22 @@ def AddCandidate(mod_path): # TODO(erezh): consider using the alternative impl in cr/165133821 which # only uses os file lookup and not using pkgutil. The alternative is faster # but is making many more assuptions that this impl does not. - while search_paths: + while search_abspaths: num_dirs_scanned += 1 - path = search_paths.pop() - for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([path]): - mod_path = os.path.join(path, mod_name) + abspath = search_abspaths.pop() + for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([abspath]): + mod_abspath = os.path.join(abspath, mod_name) if mod_ispkg: - search_paths.add(mod_path) - mod_path = os.path.join(mod_path, '__init__') - if (src_ispkg, src_name) == (mod_ispkg, mod_name): - AddCandidate(mod_path) + search_abspaths.add(mod_abspath) + mod_abspath = os.path.join(mod_abspath, '__init__') + if src_ispkg == mod_ispkg and src_name == mod_name: + AddCandidate(mod_abspath) # Sort the list to return a stable result to the user. # TODO(erezh): No need to add the .py extenssion, this is done just for # compatabilty with current code. Once refactored not to use file extension # this code can be removed to just return the sorted candidates. - candidates = sorted([path + '.py' for path in candidates]) + candidates = sorted(abspath + '.py' for abspath in candidates) # Log scan stats, without the files list to avoid very long output as well as # the potential leak of system files that the user has no access to. From 7540a1a5e1be26907e94c129abd2fcd8894cad70 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Wed, 23 Aug 2017 14:23:50 -0700 Subject: [PATCH 080/241] Increment python agent version to 2.1 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=166254229 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 6378a82..f2f0450 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.0' +__version__ = '2.1' From 29512231be1f04bffe88ae9fbe3bb61833c243f1 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Fri, 25 Aug 2017 09:46:05 -0700 Subject: [PATCH 081/241] Removes meta_path based import hook mechanism. The meta_path mechanism causes breakpoints to be activated before the module is fully loaded. Instead, uses a combination of (module,name,fromlist) to identify all modules were touched by the import. Note: Let's also test this manually before submitting. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=166487604 --- src/googleclouddebugger/imphook.py | 113 ++++++++++++++++++++--------- 1 file changed, 79 insertions(+), 34 deletions(-) diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py index 250ed22..edadacb 100644 --- a/src/googleclouddebugger/imphook.py +++ b/src/googleclouddebugger/imphook.py @@ -14,7 +14,6 @@ """Support for breakpoints on modules that haven't been loaded yet.""" -import imp import os import sys # Must be imported, otherwise import hooks don't work. import threading @@ -23,9 +22,6 @@ _import_callbacks = {} _import_callbacks_lock = threading.Lock() -# Module fully qualified names detected by the finder at first-time load. -_import_loading_modules = set() - # Original __import__ function if import hook is installed or None otherwise. _real_import = None @@ -72,26 +68,6 @@ def RemoveCallback(): return RemoveCallback -class MetaFinder(object): - """The finder is called with the full module name before it is loaded.""" - - def find_module(self, name, path=None): # pylint: disable=unused-argument,invalid-name - # Store the module fullname to be used by the import hook. - # At the time of this call the module is not loaded yet, and is only called - # the first time the module is loaded. For example, the following statement - # 'from a.b import c' will make 3 calls to find_module, assuming that none - # were loaded yet, with the names 'a', 'a.b' and 'a.b.c' - # - # Moreover, name might not be a true module name. Example: module 'b' in - # package 'a' calls 'import c', but 'c' is not a submodule of 'a'. The - # loader searches for relative submodules first and calls with name='a.c'. - # Then, it looks for modules on the search path and calls with name='c'. - # This code adds both 'a.c' and 'c' to the set. However, the import hook - # handles this case by looking up the module name in sys.modules. - _import_loading_modules.add(name) - return None - - def _InstallImportHook(): """Lazily installs import hook.""" @@ -106,7 +82,6 @@ def _InstallImportHook(): assert _real_import builtin.__import__ = _ImportHook - sys.meta_path.append(MetaFinder()) # pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield @@ -122,17 +97,87 @@ def _ImportHook(name, globals=None, locals=None, fromlist=None, level=-1): # Optimize common code path when no breakponts are set. if not _import_callbacks: - _import_loading_modules.clear() return module - # Capture and clear the loading module names. - imp.acquire_lock() - loaded = frozenset(_import_loading_modules) - _import_loading_modules.clear() - imp.release_lock() - - # Invoke callbacks for the loaded modules. - for m in loaded: + # When the _real_import statement above is executed, it can also trigger the + # loading of outer packages, if they are not loaded yet. Unfortunately, + # _real_import does not give us a list of packages/modules were loaded as + # a result of executing it. Therefore, we conservatively assume that they + # were all just loaded. + # + # To manually identify all modules that _real_import touches, we apply a + # method that combines 'module', 'name', and 'fromlist'. This method is a + # heuristic that is based on observation. + # + # Note that the list we obtain will contain false positives, i.e., modules + # that were already loaded. However, since these modules were already loaded, + # there can be no pending breakpoint callbacks on them, and therefore, the + # wasted computation will be limited to one dictionary lookup per module. + # + # Example: When module 'a.b.c' is imported, we need to activate deferred + # breakpoints in all of ['a', 'a.b', 'a.b.c']. If 'a' was already loaded, then + # _import_callbacks.get('a') will return nothing, and we will move on to + # 'a.b'. + # + # To make the code simpler, we keep track of parts of the innermost module + # (i.e., 'a', 'b', 'c') and then combine them later. + + parts = module.__name__.split('.') + if fromlist: + # In case of 'from x import y', all modules in 'fromlist' can be directly + # found in the package identified by the returned 'module'. + # Note that we discard the 'name' field, because it is a substring of the + # name of the returned module. + + # Example 1: Using absolute path. + # from a.b import c + # name = 'a.b', fromlist=['c'], module= + # + # Example 2: Using relative path from inside package 'a'. + # from b import c + # name = 'b', fromlist=['c'], module= + # + # Example 3: Using relative path from inside package 'a'. + # from b.c import d + # name = 'b.c', fromlist=['d'], module= + pass + else: + # In case of 'import a.b', we append the 'name' field to the name of the + # returned module. Note that these two have one component in common, so + # we remove that one component from the start of 'name' before appending it. + + # Example 1: Use absolute path. + # import a + # name = 'a', fromlist=None, module= + # + # Example 2: Use absolute path. + # import a.b + # name = 'a.b', fromlist=None, module= + # + # Example 3: Use absolute path. + # import a.b.c.d + # name = 'a.b.c.d', fromlist=None, module= + # + # Example 4: Use relative path from inside package 'a'. + # import b.c + # name = 'b.c', fromlist=None, module='a.b' + parts += name.split('.')[1:] + + def GenerateModules(): + """Generates module names using parts and fromlist.""" + # If parts contains ['a', 'b', 'c'], then we generate ['a', 'a.b','a.b.c']. + current = None + for part in parts: + current = (current + '.' + part) if current else part + yield current + + # We then add entries in fromlist to the final package path (i.e., 'a.b.c') + # to obtain the innermost packages (i.e., 'a.b.c.d, a.b.c.e'). + if fromlist: + for f in fromlist: + yield current + '.' + f + + for m in GenerateModules(): _InvokeImportCallback(sys.modules.get(m)) return module From 0e4b36db786bd36ebb50696342b328edbb4934e9 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 31 Aug 2017 17:03:35 -0700 Subject: [PATCH 082/241] Handle symlinks in module search by deduplicating at the right place. 1. What do we need to dedup? Only directories, not files. Example 1: Need to deduplicate directories. Assume: sys.path = ['a/b', 'd'] Where: a/b/c.py d -> a/b (symlink) Then, a breakpoint at 'c.py:12' must have only 1 match: a/b/c.py Example 2: Assume: sys.path = ['.', 'a/b'] Where: a/b/c.py d.py -> a/b/c.py Then, a breakpoint at 'c.py:12' must have only 1 match: a/b/c.py And, a breakpoint at 'd.py:23' must have only 1 match: d.py (Python compiler/interpreter treats d.py as a separate module) 2. Where do we dedup? We can dedup in two places: (a) When gathering all directories we explore. (b) When adding a path to 'candidates' set. Solution (a) requires remembering all directories we have already explored. Solution (b) is simpler, but is computationally more expensive as we search same directories multiple times. This CL uses solution (b). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=167213003 --- src/googleclouddebugger/imphook.py | 11 +++-- src/googleclouddebugger/module_search.py | 43 +++++++++++--------- src/googleclouddebugger/module_utils.py | 40 ++++++++++++++++++ src/googleclouddebugger/python_breakpoint.py | 6 +-- 4 files changed, 73 insertions(+), 27 deletions(-) create mode 100644 src/googleclouddebugger/module_utils.py diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py index edadacb..f961018 100644 --- a/src/googleclouddebugger/imphook.py +++ b/src/googleclouddebugger/imphook.py @@ -18,6 +18,8 @@ import sys # Must be imported, otherwise import hooks don't work. import threading +import module_utils + # Callbacks to invoke when a module is imported. _import_callbacks = {} _import_callbacks_lock = threading.Lock() @@ -189,12 +191,13 @@ def _InvokeImportCallback(module): if not module: return - path = getattr(module, '__file__', None) - if not path: + mod_path = getattr(module, '__file__', None) + if not mod_path: return - path, unused_ext = os.path.splitext(os.path.abspath(path)) - callbacks = _import_callbacks.get(path) + mod_abspath = module_utils.GetAbsolutePath(mod_path) + mod_abspath, unused_ext = os.path.splitext(mod_abspath) + callbacks = _import_callbacks.get(mod_abspath) if not callbacks: return # Common code path. diff --git a/src/googleclouddebugger/module_search.py b/src/googleclouddebugger/module_search.py index 203cd93..6007515 100644 --- a/src/googleclouddebugger/module_search.py +++ b/src/googleclouddebugger/module_search.py @@ -20,6 +20,7 @@ import time import cdbg_native as native +import module_utils def _CommonPathSuffixLen(paths): @@ -43,13 +44,15 @@ def FindMatchingFiles(location_path): """Returns a list of absolute filenames of best matching modules/packages.""" def AddCandidate(mod_path): - suffix_len = _CommonPathSuffixLen([src_path, mod_path]) + # We must sanitize the module path before using it for proper deduplication. + mod_abspath = module_utils.GetAbsolutePath(mod_path) + suffix_len = _CommonPathSuffixLen([src_path, mod_abspath]) if suffix_len < longest_suffix_len[0]: return if suffix_len > longest_suffix_len[0]: candidates.clear() longest_suffix_len[0] = suffix_len - candidates.add(mod_path) + candidates.add(mod_abspath) # We measure the time it takes to execute the scan. start_time = time.time() @@ -63,31 +66,31 @@ def AddCandidate(mod_path): # Using mutable vars to make them available in nested functions. - # The set of module/package path w/ no extension. Contains absolute paths. + # The set of module/package path w/ no extension. Use AddCandidate() to insert + # into this set. candidates = set() # Init longest_suffix_len to 1 to avoid inserting zero length suffixes. longest_suffix_len = [1] # Search paths for modules and packages, init with system search paths. - search_abspaths = set(os.path.abspath(path) for path in sys.path) + search_paths = set(path for path in sys.path) # Add search paths from the already loaded packages and add matching modules # or packages to the candidates list. for module in sys.modules.values(): # Extend the search paths with packages path and modules file directory. - # Note that __path__ only exist for packages, but does not have to be - # absolute path as the user can overwrite it. - search_abspaths |= frozenset( - os.path.abspath(path) for path in getattr(module, '__path__', [])) + # Note that __path__ only exist for packages. + search_paths |= frozenset(getattr(module, '__path__', [])) mod_path = os.path.splitext(getattr(module, '__file__', ''))[0] + if not mod_path: continue - mod_abspath = os.path.abspath(mod_path) - search_abspaths.add(os.path.dirname(mod_abspath)) + + search_paths.add(os.path.dirname(mod_path)) # Add loaded modules to the candidates set. - if (src_ispkg, src_name) == _GetIsPackageAndModuleName(mod_abspath): - AddCandidate(mod_abspath) + if (src_ispkg, src_name) == _GetIsPackageAndModuleName(mod_path): + AddCandidate(mod_path) # Walk the aggregated search path and loook for modules or packages. # By searching one path at the time we control the module file name @@ -95,22 +98,22 @@ def AddCandidate(mod_path): # TODO(erezh): consider using the alternative impl in cr/165133821 which # only uses os file lookup and not using pkgutil. The alternative is faster # but is making many more assuptions that this impl does not. - while search_abspaths: + while search_paths: num_dirs_scanned += 1 - abspath = search_abspaths.pop() - for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([abspath]): - mod_abspath = os.path.join(abspath, mod_name) + path = search_paths.pop() + for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([path]): + mod_path = os.path.join(path, mod_name) if mod_ispkg: - search_abspaths.add(mod_abspath) - mod_abspath = os.path.join(mod_abspath, '__init__') + search_paths.add(mod_path) + mod_path = os.path.join(mod_path, '__init__') if src_ispkg == mod_ispkg and src_name == mod_name: - AddCandidate(mod_abspath) + AddCandidate(mod_path) # Sort the list to return a stable result to the user. # TODO(erezh): No need to add the .py extenssion, this is done just for # compatabilty with current code. Once refactored not to use file extension # this code can be removed to just return the sorted candidates. - candidates = sorted(abspath + '.py' for abspath in candidates) + candidates = sorted(path + '.py' for path in candidates) # Log scan stats, without the files list to avoid very long output as well as # the potential leak of system files that the user has no access to. diff --git a/src/googleclouddebugger/module_utils.py b/src/googleclouddebugger/module_utils.py new file mode 100644 index 0000000..a36dba8 --- /dev/null +++ b/src/googleclouddebugger/module_utils.py @@ -0,0 +1,40 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provides utility functions for module path processing. +""" + + +import os + + +def GetAbsolutePath(mod_path): + """Flattens symlinks and indirections in the module path. + + To uniquely identify each module file, the file path must be sanitized + by following all symbolic links and normalizing to an absolute path. + + Note that the module file (i.e., .py/.pyc/.pyo file) itself can be a + symbolic link, but we must *NOT* follow that symbolic link. + + Args: + mod_path: A path that represents a module file. + + Returns: + The sanitized version of mod_path. + """ + pkg_path, file_name = os.path.split(mod_path) + pkg_path = os.path.abspath(os.path.realpath(pkg_path)) + return os.path.join(pkg_path, file_name) + diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index b13d569..66ec44c 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -25,6 +25,7 @@ import imphook import module_explorer import module_search +import module_utils # TODO(vlif): move to messages.py module. # Use the following schema to define breakpoint error message constant: @@ -95,9 +96,8 @@ def _GetLoadedModuleByPath(abspath): if not path: continue # This is a built-in module. - # module.__file__ may be relative to the current directory, so we first - # convert it into absolute path. - module_abspath = os.path.abspath(path) + # module.__file__ may be relative or may contain symlinks inside it. + module_abspath = module_utils.GetAbsolutePath(path) # Ignore file extension while comparing the file paths (e.g., /foo/bar.py vs # /foo/bar.pyc should match). From 5fb6ad4f764bff36ccd7fa6d415eebe6882afb9b Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Fri, 1 Sep 2017 10:30:06 -0700 Subject: [PATCH 083/241] Increment python agent minor version to 2.2 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=167292554 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index f2f0450..aeee7f4 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.1' +__version__ = '2.2' From 91ac5600717d32eeb99bde76752d1844f22d5637 Mon Sep 17 00:00:00 2001 From: mattwach Date: Mon, 11 Sep 2017 11:29:31 -0700 Subject: [PATCH 084/241] Add YAML Data Visibility Reader to python agent. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=168259028 --- src/googleclouddebugger/__init__.py | 30 +++- .../error_data_visibility_policy.py | 18 +++ .../yaml_data_visibility_config_reader.py | 128 ++++++++++++++++++ src/setup.py | 2 +- 4 files changed, 171 insertions(+), 7 deletions(-) create mode 100644 src/googleclouddebugger/error_data_visibility_policy.py create mode 100644 src/googleclouddebugger/yaml_data_visibility_config_reader.py diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 4d95551..6221cf7 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -29,8 +29,11 @@ import appengine_pretty_printers import breakpoints_manager import capture_collector -import cdbg_native +import error_data_visibility_policy import gcp_hub_client +import glob_data_visibility_policy +import yaml_data_visibility_config_reader +import cdbg_native import version __version__ = version.__version__ @@ -48,11 +51,10 @@ def _StartDebugger(): cdbg_native.InitializeModule(_flags) _hub_client = gcp_hub_client.GcpHubClient() - # TODO(mattwach): Provide a data_visibility_policy object if the user - # provided configuration for one. - data_visibility_policy = None + + visibility_policy = _GetVisibilityPolicy() _breakpoints_manager = breakpoints_manager.BreakpointsManager( - _hub_client, data_visibility_policy) + _hub_client, visibility_policy) # Set up loggers for logpoints. capture_collector.SetLogger(logging.getLogger()) @@ -87,6 +89,22 @@ def _StartDebugger(): _hub_client.Start() +def _GetVisibilityPolicy(): + """If a debugger configuration is found, create a visibility policy.""" + try: + visibility_config = yaml_data_visibility_config_reader.OpenAndRead() + except yaml_data_visibility_config_reader.Error as err: + return error_data_visibility_policy.ErrorDataVisibilityPolicy( + 'Could not process debugger config: %s' % err) + + if visibility_config: + return glob_data_visibility_policy.GlobDataVisibilityPolicy( + visibility_config.blacklist_patterns, + visibility_config.whitelist_patterns) + + return None + + def _DebuggerMain(): """Starts the debugger and runs the application with debugger attached.""" global _flags @@ -132,7 +150,7 @@ def enable(**kwargs): This function should only be called once. Args: - flags: debugger configuration. + **kwargs: debugger configuration flags. Raises: RuntimeError: if called more than once. diff --git a/src/googleclouddebugger/error_data_visibility_policy.py b/src/googleclouddebugger/error_data_visibility_policy.py new file mode 100644 index 0000000..847ac50 --- /dev/null +++ b/src/googleclouddebugger/error_data_visibility_policy.py @@ -0,0 +1,18 @@ +"""Always returns the provided error on visibility requests. + +Example Usage: + + policy = ErrorDataVisibilityPolicy('An error message') + + policy.IsDataVisible('org.foo.bar') -> (False, 'An error message') +""" + + +class ErrorDataVisibilityPolicy(object): + """Visibility policy that always returns an error to the caller.""" + + def __init__(self, error_message): + self.error_message = error_message + + def IsDataVisible(self, unused_path): + return (False, self.error_message) diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py new file mode 100644 index 0000000..08f5306 --- /dev/null +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -0,0 +1,128 @@ +"""Reads a YAML configuration file to determine visibility policy. + +Example Usage: + try: + config = yaml_data_visibility_config_reader.OpenAndRead(filename) + except yaml_data_visibility_config_reader.Error, e: + ... + + visibility_policy = GlobDataVisibilityPolicy( + config.blacklist_patterns, + config.whitelist_patterns) +""" + +import os +import sys +import yaml + + +class Error(Exception): + """Generic error class that other errors in this module inherit from.""" + pass + + +class YAMLLoadError(Error): + """Thrown when reading an opened file fails.""" + pass + + +class ParseError(Error): + """Thrown when there is a problem with the YAML structure.""" + pass + + +class UnknownConfigKeyError(Error): + """Thrown when the YAML contains an unsupported keyword.""" + pass + + +class NotAListError(Error): + """Thrown when a YAML key does not reference a list.""" + pass + + +class ElementNotAStringError(Error): + """Thrown when a YAML list element is not a string.""" + pass + + +class Config(object): + """Configuration object that Read() returns to the caller.""" + + def __init__(self, blacklist_patterns, whitelist_patterns): + self.blacklist_patterns = blacklist_patterns + self.whitelist_patterns = whitelist_patterns + + +def OpenAndRead(relative_path='debugger-config.yaml'): + """Attempts to find the yaml configuration file, then read it. + + Args: + relative_path: Optional relative path override. + + Returns: + A Config object if the open and read were successful, None if the file + does not exist (which is not considered an error). + + Raises: + Error (some subclass): As thrown by the called Read() function. + """ + + # Note: This logic follows the convention established by source-context.json + try: + with open(os.path.join(sys.path[0], relative_path), 'r') as f: + return Read(f) + except IOError: + return None + + +def Read(f): + """Reads and returns Config data from a yaml file. + + Args: + f: Yaml file to parse. + + Returns: + Config object as defined in this file. + + Raises: + Error (some subclass): If there is a problem loading or parsing the file. + """ + try: + yaml_data = yaml.load(f) + except yaml.YAMLError, e: + raise ParseError('%s' % e) + except IOError, e: + raise YAMLLoadError('%s' % e) + + _CheckData(yaml_data) + return Config( + yaml_data.get('blacklist', ()), + yaml_data.get('whitelist', ('*'))) + + +def _CheckData(yaml_data): + """Checks data for illegal keys and formatting.""" + legal_keys = set(('blacklist', 'whitelist')) + unknown_keys = set(yaml_data) - legal_keys + if unknown_keys: + raise UnknownConfigKeyError( + 'Unknown keys in configuration: %s' % unknown_keys) + + for key, data in yaml_data.iteritems(): + _AssertDataIsList(key, data) + + +def _AssertDataIsList(key, lst): + """Assert that lst contains list data and is not structured.""" + + # list and tuple are supported. Not supported are direct strings + # and dictionary; these indicate too much or two little structure. + if not isinstance(lst, list) and not isinstance(lst, tuple): + raise NotAListError('%s must be a list' % key) + + # each list entry must be a string + for element in lst: + if not isinstance(element, str): + raise ElementNotAStringError('Unsupported list element %s found in %s', + (element, lst)) diff --git a/src/setup.py b/src/setup.py index 1409b56..446ecf2 100644 --- a/src/setup.py +++ b/src/setup.py @@ -101,7 +101,7 @@ def ReadConfig(section, value, default): url='https://github.com/GoogleCloudPlatform/cloud-debug-python', author='Google Inc.', version=version, - install_requires=['google-api-python-client'], + install_requires=['google-api-python-client', 'pyyaml'], packages=['googleclouddebugger'], ext_modules=[cdbg_native_module], license='Apache License, Version 2.0', From c7f0da80ddeee1cdcacafbde1f0d37dfc70c0ef3 Mon Sep 17 00:00:00 2001 From: erezh Date: Thu, 13 Jul 2017 10:01:58 -0700 Subject: [PATCH 085/241] Re-implement GetLoadedModuleByPath and move it to module_utils. The new implementation is 20x faster so it can be used inside the import hook in subsequent cl. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=169104195 --- src/googleclouddebugger/module_utils.py | 42 ++++++++++++++++++-- src/googleclouddebugger/python_breakpoint.py | 20 +--------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/googleclouddebugger/module_utils.py b/src/googleclouddebugger/module_utils.py index a36dba8..c25f0b6 100644 --- a/src/googleclouddebugger/module_utils.py +++ b/src/googleclouddebugger/module_utils.py @@ -12,11 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Provides utility functions for module path processing. -""" - +"""Provides utility functions for module path processing.""" import os +import sys def GetAbsolutePath(mod_path): @@ -38,3 +37,40 @@ def GetAbsolutePath(mod_path): pkg_path = os.path.abspath(os.path.realpath(pkg_path)) return os.path.join(pkg_path, file_name) + +def GetLoadedModuleByPath(abspath): + """Returns the loaded module that matches abspath or None if not found.""" + + def GenModuleNames(path): + """Generates all possible module names from path.""" + parts = path.lstrip(os.sep).split(os.sep) + + # For packages, remove the __init__ file name. + if parts[-1] == '__init__': + parts = parts[:-1] + + # Generate module names from part, starting with just the leaf name. + for i in xrange(len(parts) - 1, -1, -1): + yield '.'.join(parts[i::]) + + # If non where matching, it is possible that it's the main module. + yield '__main__' + + # The extenssion is not part of the module matching, remove it. + abspath = os.path.splitext(abspath)[0] + + # Lookup every possible module name for abspath, starting with the leaf name. + # It is much faster than scanning sys.modules and comparing module paths. + for mod_name in GenModuleNames(abspath): + module = sys.modules.get(mod_name) + if not module: + continue + + mod_path = getattr(module, '__file__', None) + if not mod_path: + continue + + # Get the absolute real path (no symlink) for this module. + mod_path = os.path.splitext(GetAbsolutePath(mod_path))[0] + if mod_path == abspath: + return module diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 66ec44c..df92288 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -17,7 +17,6 @@ from datetime import datetime from datetime import timedelta import os -import sys from threading import Lock import capture_collector @@ -88,23 +87,6 @@ datetime.strptime('2017-01-01', '%Y-%m-%d') -def _GetLoadedModuleByPath(abspath): - """Returns the loaded module that matches abspath or None if not found.""" - - for module in sys.modules.values(): - path = getattr(module, '__file__', None) - if not path: - continue # This is a built-in module. - - # module.__file__ may be relative or may contain symlinks inside it. - module_abspath = module_utils.GetAbsolutePath(path) - - # Ignore file extension while comparing the file paths (e.g., /foo/bar.py vs - # /foo/bar.pyc should match). - if os.path.splitext(module_abspath)[0] == os.path.splitext(abspath)[0]: - return module - - def _IsRootInitPy(path): return path.lstrip(os.sep) == '__init__.py' @@ -233,7 +215,7 @@ def __init__(self, definition, hub_client, breakpoints_manager, return # TODO(erezh): Handle the possible thread race condtion from lookup to hook. - module = _GetLoadedModuleByPath(paths[0]) + module = module_utils.GetLoadedModuleByPath(paths[0]) if module: self._ActivateBreakpoint(module) else: From a969c3a544b595af7cf9313bdc30c24cf2dda8d9 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 5 Oct 2017 10:04:37 -0700 Subject: [PATCH 086/241] Capture end user id on python agent (Borg only). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=171167614 --- src/googleclouddebugger/capture_collector.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 2e839fa..b9bbbdb 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -38,6 +38,9 @@ # Externally defined function to collect the request log id. request_log_id_collector = None +# Externally defined function to collect the end user id. +user_id_collector = lambda: (None, None) + _PRIMITIVE_TYPES = (int, long, float, complex, types.StringTypes, bool, types.NoneType, types.SliceType, bytearray) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) @@ -322,6 +325,7 @@ def Collect(self, top_frame): self.TrimVariableTable(num_vars) self._CaptureRequestLogId() + self._CaptureUserId() def CaptureFrameLocals(self, frame): """Captures local variables and arguments of the specified frame. @@ -600,6 +604,12 @@ def _CaptureRequestLogId(self): self.breakpoint['labels'][ labels.Breakpoint.REQUEST_LOG_ID] = request_log_id + def _CaptureUserId(self): + """Captures the user id of the end user, if possible.""" + user_kind, user_id = user_id_collector() + if user_kind and user_id: + self.breakpoint['evaluated_user_id'] = {'kind': user_kind, 'id': user_id} + class LogCollector(object): """Captures minimal application snapshot and logs it to application log. From 38fa44cfb958682484a2ebcc023256eef9fd902c Mon Sep 17 00:00:00 2001 From: erezh Date: Thu, 12 Oct 2017 14:33:01 -0700 Subject: [PATCH 087/241] Update GitHub readme files for Java and Python agents with correct links and info. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=172013236 --- README.md | 104 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 37c7e47..9e0241e 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,47 @@ -# Python Cloud Debugger +# Python Cloud Debugger Agent -Google [Cloud Debugger](https://cloud.google.com/tools/cloud-debugger/) for -Python 2.7. +Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7. ## Overview -The Cloud Debugger lets you inspect the state of an application at any code -location without stopping or slowing it down. The debugger makes it easier to -view the application state without adding logging statements. +Cloud Debugger (also known as Stackdriver Debugger) lets you inspect the state +of a running cloud application, at any code location, without stopping or +slowing it down. It is not your traditional process debugger but rather an +always on, whole app debugger taking snapshots from any instance of the app. -You can use the Cloud Debugger on both production and staging instances of your -application. The debugger never pauses the application for more than a few -milliseconds. In most cases, this is not noticeable by users. The Cloud Debugger -gives a read-only experience. Application variables can't be changed through the -debugger. +Cloud Debugger is safe for use with production apps or during development. +The Python debugger agent only few milliseconds to the request latency when a +debug snapshot is captured. In most cases, this is not noticeable to users. +Furthermore, the Python debugger agent does not allow modification of +application state in any way, and has close to zero impact on the app instances. -The Cloud Debugger attaches to all instances of the application. The call stack -and the variables come from the first instance to take the snapshot. +Cloud Debugger attaches to all instances of the app providing the ability to +take debug snapshots and add logpoints. A snapshot captures the call-stack and +variables from any one instance that executes the snapshot location. A logpoint +writes a formatted message to the application log whenever any instance of the +app executes the logpoint location. -The Python Cloud Debugger is only supported on Linux at the moment. It was tested -on Debian Linux, but it should work on other distributions as well. +The Python debugger agent is only supported on Linux at the moment. It was +tested on Debian Linux, but it should work on other distributions as well. -The Cloud Debugger consists of 3 primary components: +Cloud Debugger consists of 3 primary components: -1. The debugger agent. This repo implements one for Python 2.7. -2. Cloud Debugger backend that stores the list of snapshots for each debuggee. - You can explore the API using the +1. The Python debugger agent (this repo implements one for Python 2.7). +2. Cloud Debugger service storing and managing snapshots/logpoints. + Explore the API's using [APIs Explorer](https://developers.google.com/apis-explorer/#p/clouddebugger/v2/). -3. User interface for the debugger implemented using the Cloud Debugger API. - Currently the only option for Python is the - [Google Developers Console](https://console.developers.google.com). The - UI requires that the source code is submitted to - [Google Cloud Repo](https://cloud.google.com/tools/repo/cloud-repositories/). - More options (including browsing local source files) are coming soon. +3. User interface, including a command line interface + [`gcloud debug`](https://cloud.google.com/sdk/gcloud/reference/debug/) and a + Web interface on + [Google Cloud Console](https://console.developers.google.com/debug/). + See the [online help](https://cloud.google.com/debugger/docs/debugging) on + how to use Google Cloud Console Debug page. -This document only focuses on the Python debugger agent. Please see the -this [page](https://cloud.google.com/tools/cloud-debugger/debugging) for -explanation how to debug an application with the Cloud Debugger. - -## Options for Getting Help +## Getting Help 1. StackOverflow: http://stackoverflow.com/questions/tagged/google-cloud-debugger -2. Google Group: cdbg-feedback@google.com +2. Send email to: [Cloud Debugger Feedback](mailto:cdbg-feedback@google.com) +3. Send Feedback from Google Cloud Console ## Installation @@ -79,20 +79,20 @@ sudo apt-get -y -q --no-install-recommends install \ ## Setup -### Google Compute Engine +### Google Cloud Platform 1. First, make sure that you created the VM with this option enabled: > Allow API access to all Google Cloud services in the same project. - This option lets the debugger agent authenticate with the machine account - of the Virtual Machine. + This option lets the Python debugger agent authenticate with the machine + account of the Virtual Machine. - It is possible to use Python Cloud Debugger without it. Please see the + It is possible to use the Python debugger agent without it. Please see the [next section](#Service_Account) for details. -1. Install the debugger agent as explained in the [Installation](#Installation) - section. +1. Install the Python debugger agent as explained in the + [Installation](#Installation) section. 2. Enable the debugger in your application using one of the two options: @@ -116,21 +116,23 @@ sudo apt-get -y -q --no-install-recommends install \ ### Service Account -Service account authentication lets you run the debugger agent on any Linux -machine, including outside of [Google Cloud Platform](https://cloud.google.com). -The debugger agent authenticates against the backend with the service account -created in [Google Developers Console](https://console.developers.google.com). -If your application runs on Google Compute Engine, -[metadata service authentication](#Google_Compute_Engine) is an easier option. +To use the Python debugger agent on machines not hosted by Google Cloud +Platform, the agent must use a Google Cloud Platform service-account credentials +to authenticate with the Cloud Debugger Service. + +Use the Google Cloud Console Service Accounts +[page](https://console.cloud.google.com/iam-admin/serviceaccounts/project) to +create a credentials file for an existing or new service-account. The +service-account must have at least the `Cloud Debugger Agent` role to be +accepted by the Cloud Debugger Service. +If you don't have a Google Cloud Platform project, you can create one for free +on [Google Cloud Console](https://console.cloud.google.com). -The first step for this setup is to create the service account in .json format. -Please see this [page](https://cloud.google.com/storage/docs/authentication?hl=en#generating-a-private-key) -for detailed instructions. If you don't have a Google Cloud Platform project, -you can create one for free on [Google Developers Console](https://console.developers.google.com). +Once you have the service-account JSON file, deploy it alongside the Python +debugger agent. Once you have the service account, please note the service account e-mail, -[project ID and project number](https://developers.google.com/console/help/new/#projectnumber). -Then copy the .json file to all the machines that run your application. +project ID and project number. Then, enable the debugger agent using one of these two options: @@ -144,7 +146,7 @@ try: enable_service_account_auth=True, project_id='my-gcp-project-id', project_number='123456789', - service_account_json_file='/opt/cdbg/gcp.json') + service_account_json_file='/opt/cdbg/gcp-svc.json') except ImportError: pass ``` @@ -157,7 +159,7 @@ python \ --enable_service_account_auth=1 \ --project_id=my-gcp-project-id \ --project_number=123456789 \ - --service_account_json_file=/opt/cdbg/gcp.json \ + --service_account_json_file=/opt/cdbg/gcp-svc.json \ -- \ myapp.py From e7e1e13aec8918bf96e81c03dcc712d924e249c7 Mon Sep 17 00:00:00 2001 From: lesv Date: Thu, 12 Oct 2017 17:08:04 -0700 Subject: [PATCH 088/241] Same license fix asked by OSPO that I sent to your GH repo. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=172035158 --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index b5d5055..497d805 100644 --- a/LICENSE +++ b/LICENSE @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2015 Google Inc. + Copyright [yyyy] [name of copyright owner Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From c1d10c5fa9e438efebe85a4163846ab82cb5df78 Mon Sep 17 00:00:00 2001 From: erezh Date: Mon, 16 Oct 2017 13:36:19 -0700 Subject: [PATCH 089/241] Address import callback invoke for: 1. Premature callback invoke, before module is fully loaded. This happens in nested imports where the package callback is invoked prematurely. 2. When a module/package includes the __all__ attribute indicating modules to load. This CL invokes the import callbacks only on the outer import call per thread. This CL looks up if a module matching *any* of the pending callbacks is loaded, regardless of the actually imported name. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=172370587 --- src/googleclouddebugger/imphook.py | 128 ++++++----------------------- 1 file changed, 26 insertions(+), 102 deletions(-) diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py index f961018..4d7c857 100644 --- a/src/googleclouddebugger/imphook.py +++ b/src/googleclouddebugger/imphook.py @@ -24,6 +24,9 @@ _import_callbacks = {} _import_callbacks_lock = threading.Lock() +# Per thread data holding information about the import call nest level. +_import_local = threading.local() + # Original __import__ function if import hook is installed or None otherwise. _real_import = None @@ -94,113 +97,34 @@ def _ImportHook(name, globals=None, locals=None, fromlist=None, level=-1): to __import__ that use keyword syntax will fail: __import('a', fromlist=[]). """ - # Really import modules. - module = _real_import(name, globals, locals, fromlist, level) + # This is the top call to import (no nesting), init the per-thread nest level. + if getattr(_import_local, 'nest_level', None) is None: + _import_local.nest_level = 0 + + _import_local.nest_level += 1 + + try: + # Really import modules. + module = _real_import(name, globals, locals, fromlist, level) + finally: + _import_local.nest_level -= 1 + + # No need to invoke the callbacks on nested import calls. + if _import_local.nest_level: + return module # Optimize common code path when no breakponts are set. if not _import_callbacks: return module - # When the _real_import statement above is executed, it can also trigger the - # loading of outer packages, if they are not loaded yet. Unfortunately, - # _real_import does not give us a list of packages/modules were loaded as - # a result of executing it. Therefore, we conservatively assume that they - # were all just loaded. - # - # To manually identify all modules that _real_import touches, we apply a - # method that combines 'module', 'name', and 'fromlist'. This method is a - # heuristic that is based on observation. - # - # Note that the list we obtain will contain false positives, i.e., modules - # that were already loaded. However, since these modules were already loaded, - # there can be no pending breakpoint callbacks on them, and therefore, the - # wasted computation will be limited to one dictionary lookup per module. - # - # Example: When module 'a.b.c' is imported, we need to activate deferred - # breakpoints in all of ['a', 'a.b', 'a.b.c']. If 'a' was already loaded, then - # _import_callbacks.get('a') will return nothing, and we will move on to - # 'a.b'. - # - # To make the code simpler, we keep track of parts of the innermost module - # (i.e., 'a', 'b', 'c') and then combine them later. - - parts = module.__name__.split('.') - if fromlist: - # In case of 'from x import y', all modules in 'fromlist' can be directly - # found in the package identified by the returned 'module'. - # Note that we discard the 'name' field, because it is a substring of the - # name of the returned module. - - # Example 1: Using absolute path. - # from a.b import c - # name = 'a.b', fromlist=['c'], module= - # - # Example 2: Using relative path from inside package 'a'. - # from b import c - # name = 'b', fromlist=['c'], module= - # - # Example 3: Using relative path from inside package 'a'. - # from b.c import d - # name = 'b.c', fromlist=['d'], module= - pass - else: - # In case of 'import a.b', we append the 'name' field to the name of the - # returned module. Note that these two have one component in common, so - # we remove that one component from the start of 'name' before appending it. - - # Example 1: Use absolute path. - # import a - # name = 'a', fromlist=None, module= - # - # Example 2: Use absolute path. - # import a.b - # name = 'a.b', fromlist=None, module= - # - # Example 3: Use absolute path. - # import a.b.c.d - # name = 'a.b.c.d', fromlist=None, module= - # - # Example 4: Use relative path from inside package 'a'. - # import b.c - # name = 'b.c', fromlist=None, module='a.b' - parts += name.split('.')[1:] - - def GenerateModules(): - """Generates module names using parts and fromlist.""" - # If parts contains ['a', 'b', 'c'], then we generate ['a', 'a.b','a.b.c']. - current = None - for part in parts: - current = (current + '.' + part) if current else part - yield current - - # We then add entries in fromlist to the final package path (i.e., 'a.b.c') - # to obtain the innermost packages (i.e., 'a.b.c.d, a.b.c.e'). - if fromlist: - for f in fromlist: - yield current + '.' + f - - for m in GenerateModules(): - _InvokeImportCallback(sys.modules.get(m)) - + _InvokeImportCallback() return module -def _InvokeImportCallback(module): - """Invokes import callbacks for the specified module.""" - - if not module: - return - - mod_path = getattr(module, '__file__', None) - if not mod_path: - return - - mod_abspath = module_utils.GetAbsolutePath(mod_path) - mod_abspath, unused_ext = os.path.splitext(mod_abspath) - callbacks = _import_callbacks.get(mod_abspath) - if not callbacks: - return # Common code path. - - # Clone the callbacks set, since it can change during enumeration. - for callback in callbacks.copy(): - callback(module) +def _InvokeImportCallback(): + """Invokes import callbacks for loaded modules.""" + for path, callbacks in _import_callbacks.items(): + module = module_utils.GetLoadedModuleByPath(path) + if module: + for callback in callbacks.copy(): + callback(module) From 85d69ec6897629385b66e93cebd098325c14f2e8 Mon Sep 17 00:00:00 2001 From: erezh Date: Mon, 16 Oct 2017 15:30:59 -0700 Subject: [PATCH 090/241] Normalize the location path by collapsing redundant separators and up-level references. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=172388428 --- src/googleclouddebugger/python_breakpoint.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index df92288..72e4153 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -124,9 +124,9 @@ def _MultipleModulesFoundError(path, candidates): return fmt, params -def _SanitizePath(path): - """Removes leading/trailing whitespace, and leading path separator.""" - return path.strip().lstrip(os.sep) +def _NormalizePath(path): + """Removes surrounding whitespace, leading separator and normalize.""" + return os.path.normpath(path.strip().lstrip(os.sep)) class PythonBreakpoint(object): @@ -150,7 +150,7 @@ def __init__(self, definition, hub_client, breakpoints_manager, definition: breakpoint definition as it came from the backend. hub_client: asynchronously sends breakpoint updates to the backend. breakpoints_manager: parent object managing active breakpoints. - data_visibility_policy: An object used to determine the visibiliy + data_visibility_policy: An object used to determine the visibility of a captured variable. May be None if no policy is available. """ self.definition = definition @@ -171,7 +171,7 @@ def __init__(self, definition, hub_client, breakpoints_manager, if self.definition.get('action') == 'LOG': self._collector = capture_collector.LogCollector(self.definition) - path = _SanitizePath(self.definition['location']['path']) + path = _NormalizePath(self.definition['location']['path']) # Only accept .py extension. if os.path.splitext(path)[1] != '.py': From f88a0f33ff05693853b29de1a1ac4daa282e8ec0 Mon Sep 17 00:00:00 2001 From: erezh Date: Tue, 17 Oct 2017 13:22:32 -0700 Subject: [PATCH 091/241] Use heuristics to resolve multiple files found. 1. If file is equal to one of the matching files after removing matching files common path. e.g., file__: app/views/base.py match1: app/views/base.py match2: lib/addressBookWidget/app/views/base.py 2. if sys.path[0] + file is equal to one of the matching files. in this case: sys.path[0], is the startup script directory = apps//rangers:v6-0.401616676850083236/ file__: graphy/bar_chart.py match1: apps//rangers:v6-0.401616676850083236/graphy/bar_chart.py match2: runtimes/python27_experiment/python27_lib/versions/1/google/appengine/_internal/graphy/bar_chart.py ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=172504250 --- src/googleclouddebugger/module_search.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/googleclouddebugger/module_search.py b/src/googleclouddebugger/module_search.py index 6007515..c811631 100644 --- a/src/googleclouddebugger/module_search.py +++ b/src/googleclouddebugger/module_search.py @@ -23,6 +23,13 @@ import module_utils +def _CommonPathPrefix(paths): + """The equivalent of Python 3 os.path.commonpath().""" + prefix = os.path.commonprefix(paths) + prefix_len = prefix.rfind(os.sep) + 1 + return prefix[:prefix_len] + + def _CommonPathSuffixLen(paths): """Returns the longest common path suffix len in a list of paths.""" return len(os.path.commonprefix([path[::-1].split(os.sep) for path in paths])) @@ -38,6 +45,20 @@ def _GetIsPackageAndModuleName(path_noext): return (True, os.path.basename(directory)) +def _ResolveMultiPath(path, paths): + """Returns a single path if path ambiguity can be resolved.""" + if len(paths) > 1: + candidate = os.path.join(_CommonPathPrefix(paths), path) + if candidate in paths: + return (candidate,) + + candidate = os.path.join(sys.path[0], path) + if candidate in paths: + return (candidate,) + + return paths + + # TODO(erezh): Ensure we handle whitespace in paths correctly including, # extension, basename and dirname. def FindMatchingFiles(location_path): @@ -109,6 +130,9 @@ def AddCandidate(mod_path): if src_ispkg == mod_ispkg and src_name == mod_name: AddCandidate(mod_path) + # Apply heuristics to resolve multiple matching paths into one. + candidates = _ResolveMultiPath(src_path, candidates) + # Sort the list to return a stable result to the user. # TODO(erezh): No need to add the .py extenssion, this is done just for # compatabilty with current code. Once refactored not to use file extension From 6258d83a4dd6356b0784bb485dd74df76a7ff07e Mon Sep 17 00:00:00 2001 From: mattwach Date: Tue, 17 Oct 2017 14:49:39 -0700 Subject: [PATCH 092/241] Modify capture_collector.CheckDataVisiblity to look at value type instead of name. The 'name' parameter can not be used as-is because it's only the "leaf" name and not the full path. Also improved the unit tests so they can identify this type of error (which made it to the integration test layer previously) Note that this implementation is not addressing a known bug / limitation: It's possible to speficy an expression that looks inside of a blacklisted container and get the value. Fixing this should probably be done in a separate CL to manage the completixy. I don't currently think the "fix" will have an effect on the code written here. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=172518202 --- src/googleclouddebugger/capture_collector.py | 51 ++++++++++++++----- .../glob_data_visibility_policy.py | 4 ++ 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index b9bbbdb..85801ba 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -97,6 +97,33 @@ def NormalizePath(path): return path +def DetermineType(value): + """Determines the type of val, returning a "full path" string. + + For example: + DetermineType(5) -> __builtin__.int + DetermineType(Foo()) -> com.google.bar.Foo + + Args: + value: Any value, the value is irrelevant as only the type metadata + is checked + + Returns: + Type path string. None if type cannot be determined. + """ + + object_type = type(value) + if not hasattr(object_type, '__name__'): + return None + + type_string = getattr(object_type, '__module__', '') + if type_string: + type_string += '.' + + type_string += object_type.__name__ + return type_string + + class LineNoFilter(logging.Filter): """Enables overriding the path and line number in a logging record. @@ -371,7 +398,7 @@ def CaptureNamedVariable(self, name, value, depth, limits): name = str(id(name)) self._total_size += len(name) - v = (self.CheckDataVisiblity(name) or + v = (self.CheckDataVisiblity(value) or self.CaptureVariable(value, depth, limits)) v['name'] = name except RuntimeError as e: @@ -379,27 +406,29 @@ def CaptureNamedVariable(self, name, value, depth, limits): 'INTERNAL ERROR while capturing {0}: {1}'.format(name, e)) return v - def CheckDataVisiblity(self, name): + def CheckDataVisiblity(self, value): """Returns a status object if the given name is not visible. Args: - name: Dot-separated symbol name + value: The value to check. The actual value here is not important but the + value's metadata (e.g. package and type) will be checked. Returns: - None if the name is visible. A variable structure with an error status - if the object is not visible. + None if the value is visible. A variable structure with an error status + if the value should not be visible. """ if not self.data_visibility_policy: return None - visible, reason = self.data_visibility_policy.IsDataVisible(name) + visible, reason = self.data_visibility_policy.IsDataVisible( + DetermineType(value)) if visible: return None return { 'status': { - 'is_error': False, + 'is_error': True, 'refers_to': 'VARIABLE_NAME', 'description': { 'format': reason @@ -533,12 +562,8 @@ def CaptureVariable(self, value, depth, limits, can_enqueue=True): OBJECT_HAS_NO_FIELDS, limits) v = {'members': members} - object_type = type(value) - if hasattr(object_type, '__name__'): - type_string = getattr(object_type, '__module__', '') - if type_string: - type_string += '.' - type_string += object_type.__name__ + type_string = DetermineType(value) + if type_string: v['type'] = type_string return v diff --git a/src/googleclouddebugger/glob_data_visibility_policy.py b/src/googleclouddebugger/glob_data_visibility_policy.py index 31c729c..deb6d4d 100644 --- a/src/googleclouddebugger/glob_data_visibility_policy.py +++ b/src/googleclouddebugger/glob_data_visibility_policy.py @@ -22,6 +22,7 @@ # Possible visibility responses RESPONSES = { + 'UNKNOWN_TYPE': 'could not determine type', 'BLACKLISTED': 'blacklisted by config', 'NOT_WHITELISTED': 'not whitelisted by config', 'VISIBLE': 'visible', @@ -47,6 +48,9 @@ def IsDataVisible(self, path): should be visible. Reason is a string reason that can be displayed to the user and indicates why data is visible or not visible. """ + if path is None: + return (False, RESPONSES['UNKNOWN_TYPE']) + if _Matches(path, self.blacklist_patterns): return (False, RESPONSES['BLACKLISTED']) From ddb72fbece1ac94eb9e074c67e720bb759a1b1d2 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Mon, 13 Nov 2017 11:17:35 -0800 Subject: [PATCH 093/241] Add try/except around CaptureCollector to catch any exceptions and convert them into breakpoint errors. This should cover unforeseen exceptions from: 1. from capture loop, 2. labels collector, 3. user id collector. We can remove this try/except once we figure out the root cause of the problem. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=175559530 --- src/googleclouddebugger/python_breakpoint.py | 21 +++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 72e4153..201ee56 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -399,7 +399,26 @@ def _BreakpointEvent(self, event, frame): collector = capture_collector.CaptureCollector( self.definition, self.data_visibility_policy) - collector.Collect(frame) + + # TODO(b/69119299): This is a temporary try/except. All exceptions should be + # caught inside Collect and converted into breakpoint error messages. + try: + collector.Collect(frame) + except BaseException as e: # pylint: disable=broad-except + native.LogInfo('Internal error during data capture: %s' % repr(e)) + error_status = {'isError': True, + 'description': { + 'format': ('Internal error while capturing data: %s' % + repr(e))}} + self._CompleteBreakpoint({'status': error_status}) + return + except: # pylint: disable=bare-except + native.LogInfo('Unknown exception raised') + error_status = {'isError': True, + 'description': { + 'format': 'Unknown internal error'}} + self._CompleteBreakpoint({'status': error_status}) + return self._CompleteBreakpoint(collector.breakpoint, is_incremental=False) From 41afada1c6c965bb2b6685b564cd099d5065b226 Mon Sep 17 00:00:00 2001 From: mattwach Date: Mon, 13 Nov 2017 13:53:48 -0800 Subject: [PATCH 094/241] Dump supressed python exceptions to StdErr. This was previously only done in DEBUG mode. Added simple throttling to avoid spamming stderr in tight loop scenerios. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=175582602 --- src/googleclouddebugger/python_util.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/googleclouddebugger/python_util.cc b/src/googleclouddebugger/python_util.cc index 5ba1b2b..7083c1a 100644 --- a/src/googleclouddebugger/python_util.cc +++ b/src/googleclouddebugger/python_util.cc @@ -19,6 +19,8 @@ #include "python_util.h" +#include + namespace devtools { namespace cdbg { @@ -183,6 +185,15 @@ Nullable ClearPythonException() { #ifndef NDEBUG PyErr_Print(); +#else + static constexpr time_t EXCEPTION_THROTTLE_SECONDS = 30; + static time_t last_exception_reported = 0; + + time_t current_time = time(nullptr); + if (current_time - last_exception_reported >= EXCEPTION_THROTTLE_SECONDS) { + last_exception_reported = current_time; + PyErr_Print(); + } #endif // NDEBUG PyErr_Clear(); From c4443a5de986f0f2e177ae6ba03ca23a32fb759d Mon Sep 17 00:00:00 2001 From: mattwach Date: Wed, 22 Nov 2017 10:03:15 -0800 Subject: [PATCH 095/241] Add a thread yield in module_search.FindMatchingFiles Without the yield, viceroy is unable to make enough progress in it's other threads to respond to serve content. We suspect this true for any python process that has many directories to search. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=176669697 --- src/googleclouddebugger/module_search.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/googleclouddebugger/module_search.py b/src/googleclouddebugger/module_search.py index c811631..9165c3c 100644 --- a/src/googleclouddebugger/module_search.py +++ b/src/googleclouddebugger/module_search.py @@ -122,6 +122,8 @@ def AddCandidate(mod_path): while search_paths: num_dirs_scanned += 1 path = search_paths.pop() + # Allow other threads to run in case there are many search_paths. + time.sleep(0) for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([path]): mod_path = os.path.join(path, mod_name) if mod_ispkg: From f293923ecf64c48da43d39ca3e1d89cad33f6072 Mon Sep 17 00:00:00 2001 From: mattwach Date: Wed, 30 Aug 2017 13:05:35 -0700 Subject: [PATCH 096/241] Rename debugger-config.yaml -> debugger-blacklist.yaml Prompted by bugbash feedback. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=177630141 --- src/googleclouddebugger/yaml_data_visibility_config_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py index 08f5306..8c80801 100644 --- a/src/googleclouddebugger/yaml_data_visibility_config_reader.py +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -54,7 +54,7 @@ def __init__(self, blacklist_patterns, whitelist_patterns): self.whitelist_patterns = whitelist_patterns -def OpenAndRead(relative_path='debugger-config.yaml'): +def OpenAndRead(relative_path='debugger-blacklist.yaml'): """Attempts to find the yaml configuration file, then read it. Args: From 759f6c9a65a230f31174c6020757e13cea2e3dac Mon Sep 17 00:00:00 2001 From: erezh Date: Fri, 15 Dec 2017 12:41:49 -0800 Subject: [PATCH 097/241] Release python agent version 2.3 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=179227554 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index aeee7f4..17e6254 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.2' +__version__ = '2.3' From 0a5ddbe65559546cf71ed00e425e4adac19ce09c Mon Sep 17 00:00:00 2001 From: mattwach Date: Mon, 18 Dec 2017 09:28:36 -0800 Subject: [PATCH 098/241] Add License file to Python sources ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=179432115 --- .../error_data_visibility_policy.py | 14 ++++++++++++++ .../glob_data_visibility_policy.py | 14 ++++++++++++++ .../yaml_data_visibility_config_reader.py | 14 ++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/src/googleclouddebugger/error_data_visibility_policy.py b/src/googleclouddebugger/error_data_visibility_policy.py index 847ac50..a604578 100644 --- a/src/googleclouddebugger/error_data_visibility_policy.py +++ b/src/googleclouddebugger/error_data_visibility_policy.py @@ -1,3 +1,17 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Always returns the provided error on visibility requests. Example Usage: diff --git a/src/googleclouddebugger/glob_data_visibility_policy.py b/src/googleclouddebugger/glob_data_visibility_policy.py index deb6d4d..08b7a16 100644 --- a/src/googleclouddebugger/glob_data_visibility_policy.py +++ b/src/googleclouddebugger/glob_data_visibility_policy.py @@ -1,3 +1,17 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Determines the visibilty of python data and symbols. Example Usage: diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py index 8c80801..764f787 100644 --- a/src/googleclouddebugger/yaml_data_visibility_config_reader.py +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -1,3 +1,17 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Reads a YAML configuration file to determine visibility policy. Example Usage: From 37ff148aeddfe5a271aae79fae75ae902aed358d Mon Sep 17 00:00:00 2001 From: bauerpower Date: Wed, 3 Jan 2018 11:29:55 -0800 Subject: [PATCH 099/241] Adding environment label collector for Python. This collects all the borg information. trace/span ids can be added in a followup cl ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=180696208 --- src/googleclouddebugger/capture_collector.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 85801ba..178ad7b 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -41,6 +41,9 @@ # Externally defined function to collect the end user id. user_id_collector = lambda: (None, None) +# Externally defined function to collect the end user id. +breakpoint_labels_collector = lambda: {} + _PRIMITIVE_TYPES = (int, long, float, complex, types.StringTypes, bool, types.NoneType, types.SliceType, bytearray) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) @@ -351,6 +354,7 @@ def Collect(self, top_frame): # didn't make it point to var_index of 0 ("buffer full") self.TrimVariableTable(num_vars) + self._CaptureEnvironmentLabels() self._CaptureRequestLogId() self._CaptureUserId() @@ -613,6 +617,15 @@ def ProcessBufferFull(variables): ProcessBufferFull(stack_frame['locals']) ProcessBufferFull(self._var_table) + def _CaptureEnvironmentLabels(self): + """Captures information about the environment, if possible.""" + if 'labels' not in self.breakpoint: + self.breakpoint['labels'] = {} + + if callable(breakpoint_labels_collector): + for (key, value) in breakpoint_labels_collector().iteritems(): + self.breakpoint['labels'][key] = value + def _CaptureRequestLogId(self): """Captures the request log id if possible. @@ -623,9 +636,6 @@ def _CaptureRequestLogId(self): request_log_id = request_log_id_collector() if request_log_id: # We have a request_log_id, save it into the breakpoint labels - if 'labels' not in self.breakpoint: - self.breakpoint['labels'] = {} - self.breakpoint['labels'][ labels.Breakpoint.REQUEST_LOG_ID] = request_log_id From c2da9cbc823425932d38b9f28c77168ee00e36db Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Wed, 31 Jan 2018 12:18:23 -0800 Subject: [PATCH 100/241] Catch per-variable exceptions at a finer granularity so that unexpected runtime errors at certain variables do not corrupt the entire breakpoint. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=184028582 --- src/googleclouddebugger/capture_collector.py | 74 +++++++++++++------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 178ad7b..1017b2d 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -293,9 +293,6 @@ def Collect(self, top_frame): frame = top_frame top_line = self.breakpoint['location']['line'] breakpoint_frames = self.breakpoint['stackFrames'] - # Number of entries in _var_table. Starts at 1 (index 0 is the 'buffer full' - # status value). - num_vars = 1 try: # Evaluate watched expressions. if 'expressions' in self.breakpoint: @@ -323,33 +320,31 @@ def Collect(self, top_frame): }) frame = frame.f_back - # Explore variables table in BFS fashion. The variables table will grow - # inside CaptureVariable as we encounter new references. - while (num_vars < len(self._var_table)) and ( - self._total_size < self.max_size): - try: - self._var_table[num_vars] = self.CaptureVariable( - self._var_table[num_vars], 0, self.default_capture_limits, - can_enqueue=False) - num_vars += 1 - except RuntimeError as e: - # Capture details on the failure and let the outer handler convert it - # to a status. - raise RuntimeError( - 'Failed while capturing an object of type {0}: {1}'.format( - type(self._var_table[num_vars]), e)) - except BaseException as e: # pylint: disable=broad-except - # The variable table will get serialized even though there was a - # failure. The results can be useful for diagnosing the internal - # error so just trim the excess values. + # The variable table will get serialized even though there was a failure. + # The results can be useful for diagnosing the internal error. self.breakpoint['status'] = { 'isError': True, 'description': { - 'format': ( - 'INTERNAL ERROR: Debugger failed to capture frame $0: $1'), + 'format': ('INTERNAL ERROR: Failed while capturing locals ' + 'of frame $0: $1'), 'parameters': [str(len(breakpoint_frames)), str(e)]}} + # Number of entries in _var_table. Starts at 1 (index 0 is the 'buffer full' + # status value). + num_vars = 1 + + # Explore variables table in BFS fashion. The variables table will grow + # inside CaptureVariable as we encounter new references. + while (num_vars < len(self._var_table)) and ( + self._total_size < self.max_size): + self._var_table[num_vars] = self.CaptureVariable( + self._var_table[num_vars], 0, self.default_capture_limits, + can_enqueue=False) + + # Move on to the next entry in the variable table. + num_vars += 1 + # Trim variables table and change make all references to variables that # didn't make it point to var_index of 0 ("buffer full") self.TrimVariableTable(num_vars) @@ -481,6 +476,35 @@ def CaptureVariablesList(self, items, depth, empty_message, limits): return v def CaptureVariable(self, value, depth, limits, can_enqueue=True): + """Try-Except wrapped version of CaptureVariableInternal.""" + try: + return self.CaptureVariableInternal(value, depth, limits, can_enqueue) + except RuntimeError as e: + # Record as an error in the variable, and continue iterating. + return { + 'status': { + 'is_error': True, + 'refers_to': 'VARIABLE_VALUE', + 'description': { + 'format': 'Failed while capturing variable: $0', + 'parameters': [str(e)] + } + } + } + except BaseException as e: # pylint: disable=broad-except + # Record as an internal error in the variable, and continue iterating. + return { + 'status': { + 'isError': True, + 'description': { + 'format': ('INTERNAL ERROR: Failed while capturing ' + 'variable: $0: $1'), + 'parameters': [str(e)] + } + } + } + + def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): """Captures a single nameless object into Variable message. TODO(vlif): safely evaluate iterable types. @@ -541,7 +565,7 @@ def CaptureVariable(self, value, depth, limits, can_enqueue=True): index = len(self._var_table) self._var_table_index[id(value)] = index self._var_table.append(value) - self._total_size += 4 # number of characters to accomodate a number. + self._total_size += 4 # number of characters to accommodate a number. return {'varTableIndex': index} for pretty_printer in CaptureCollector.pretty_printers: From 1e279f4b382a121cf8e4655fd91ed1070741a230 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Tue, 6 Feb 2018 12:17:25 -0800 Subject: [PATCH 101/241] Add new module search algorithm for Python. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=184720075 --- src/googleclouddebugger/__init__.py | 7 +- .../breakpoints_manager.py | 14 +- src/googleclouddebugger/imphook.py | 15 +- src/googleclouddebugger/imphook2.py | 335 ++++++++++++++++++ src/googleclouddebugger/module_search2.py | 106 ++++++ src/googleclouddebugger/module_utils2.py | 77 ++++ src/googleclouddebugger/python_breakpoint.py | 35 +- 7 files changed, 582 insertions(+), 7 deletions(-) create mode 100644 src/googleclouddebugger/imphook2.py create mode 100644 src/googleclouddebugger/module_search2.py create mode 100644 src/googleclouddebugger/module_utils2.py diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 6221cf7..adf9735 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -51,10 +51,13 @@ def _StartDebugger(): cdbg_native.InitializeModule(_flags) _hub_client = gcp_hub_client.GcpHubClient() - visibility_policy = _GetVisibilityPolicy() + use_new_module_search = _flags.get('use_new_module_search') + _breakpoints_manager = breakpoints_manager.BreakpointsManager( - _hub_client, visibility_policy) + _hub_client, + visibility_policy, + use_new_module_search=use_new_module_search) # Set up loggers for logpoints. capture_collector.SetLogger(logging.getLogger()) diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index bfb91e0..ee99df3 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -36,11 +36,17 @@ class BreakpointsManager(object): breakpoint updates back to the backend. data_visibility_policy: An object used to determine the visibiliy of a captured variable. May be None if no policy is available. + use_new_module_search: If true, the new module search algorithm will be + used. """ - def __init__(self, hub_client, data_visibility_policy): + def __init__(self, + hub_client, + data_visibility_policy, + use_new_module_search=False): self._hub_client = hub_client self.data_visibility_policy = data_visibility_policy + self.use_new_module_search = use_new_module_search # Lock to synchronize access to data across multiple threads. self._lock = RLock() @@ -75,7 +81,11 @@ def SetActiveBreakpoints(self, breakpoints_data): self._active.update([ (x['id'], python_breakpoint.PythonBreakpoint( - x, self._hub_client, self, self.data_visibility_policy)) + x, + self._hub_client, + self, + self.data_visibility_policy, + self.use_new_module_search)) for x in breakpoints_data if x['id'] in ids - self._active.viewkeys() - self._completed]) diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py index 4d7c857..d142093 100644 --- a/src/googleclouddebugger/imphook.py +++ b/src/googleclouddebugger/imphook.py @@ -12,7 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Support for breakpoints on modules that haven't been loaded yet.""" +"""Support for breakpoints on modules that haven't been loaded yet. + +This is the old module import hook which: + 1. Takes full path of the module with file extension as input. + 2. At each (top-level-only) import statement: + a. Uses path to guess all possible names the module may be loaded as. + b. Checks sys.modules if there are any modules loaded with those names, + using exact path match between the __file__ attribute of the module + from sys.modules and the input path. + +For the new module import hook, see imphook2.py file. +""" import os import sys # Must be imported, otherwise import hooks don't work. @@ -35,7 +46,7 @@ def AddImportCallback(abspath, callback): """Register import hook. This function overrides the default import process. Then whenever a module - corresponding to source_path is imported, the callback will be invoked. + corresponding to abspath is imported, the callback will be invoked. A module may be imported multiple times. Import event only means that the Python code contained an "import" statement. The actual loading and diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py new file mode 100644 index 0000000..05eb233 --- /dev/null +++ b/src/googleclouddebugger/imphook2.py @@ -0,0 +1,335 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Support for breakpoints on modules that haven't been loaded yet. + +This is the new module import hook which: + 1. Takes a partial path of the module file excluding the file extension as + input (can be as short as 'foo' or longer such as 'sys/path/pkg/foo'). + 2. At each (top-level-only) import statement: + a. Generates an estimate of the modules that might be loaded as a result + of this import (and all chained imports) using the arguments of the + import hook. The estimate is best-effort, it may contain extra entries + that are not of interest to us (e.g., outer packages that were already + loaded before this import), or may be missing some module names (not + all intricacies of Python module importer are handled). + b. Checks sys.modules if any of these modules have a file that matches the + given path, using suffix match. + +For the old module import hook, see imphook.py file. +""" + +import os +import sys # Must be imported, otherwise import hooks don't work. +import threading + +import module_utils2 + +# Callbacks to invoke when a module is imported. +_import_callbacks = {} +_import_callbacks_lock = threading.Lock() + +# Per thread data holding information about the import call nest level. +_import_local = threading.local() + +# Original __import__ function if import hook is installed or None otherwise. +_real_import = None + + +def AddImportCallbackBySuffix(path, callback): + """Register import hook. + + This function overrides the default import process. Then whenever a module + whose suffix matches path is imported, the callback will be invoked. + + A module may be imported multiple times. Import event only means that the + Python code contained an "import" statement. The actual loading and + initialization of a new module normally happens only once, at which time + the callback will be invoked. This function does not validates the existence + of such a module and it's the responsibility of the caller. + + TODO(erezh): handle module reload. + + Args: + path: python module file path. It may be missing the directories for the + outer packages, and therefore, requires suffix comparison to match + against loaded modules. If it contains all outer packages, it may + contain the sys.path as well. + It might contain an incorrect file extension (e.g., py vs. pyc). + callback: callable to invoke upon module load. + + Returns: + Function object to invoke to remove the installed callback. + """ + + def RemoveCallback(): + # This is a read-if-del operation on _import_callbacks. Lock to prevent + # callbacks from being inserted just before the key is deleted. Thus, it + # must be locked also when inserting a new entry below. On the other hand + # read only access, in the import hook, does not require a lock. + with _import_callbacks_lock: + callbacks = _import_callbacks.get(path) + if callbacks: + callbacks.remove(callback) + if not callbacks: + del _import_callbacks[path] + + with _import_callbacks_lock: + _import_callbacks.setdefault(path, set()).add(callback) + _InstallImportHookBySuffix() + + return RemoveCallback + + +def _InstallImportHookBySuffix(): + """Lazily installs import hook.""" + + global _real_import + + if _real_import: + return # Import hook already installed + + builtin = sys.modules['__builtin__'] + + _real_import = getattr(builtin, '__import__') + assert _real_import + + builtin.__import__ = _ImportHookBySuffix + + +# pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield +def _ImportHookBySuffix( + name, globals=None, locals=None, fromlist=None, level=-1): + """Callback when an import statement is executed by the Python interpreter. + + Argument names have to exactly match those of __import__. Otherwise calls + to __import__ that use keyword syntax will fail: __import('a', fromlist=[]). + """ + # This is the top call to import (no nesting), init the per-thread nest level + # and names set. + if getattr(_import_local, 'nest_level', None) is None: + _import_local.nest_level = 0 + + if _import_local.nest_level == 0: + # Re-initialize names set at each top-level import to prevent any + # accidental unforeseen memory leak. + _import_local.names = set() + + _import_local.nest_level += 1 + + try: + # Really import modules. + module = _real_import(name, globals, locals, fromlist, level) + finally: + # This _real_import call may raise an exception (e.g., ImportError). + # However, there might be several modules already loaded before the + # exception was raised. For instance: + # a.py + # import b # success + # import c # ImportError exception. + # In this case, an 'import a' statement would have the side effect of + # importing module 'b'. This should trigger the import hooks for module + # 'b'. To achieve this, we always search/invoke import callbacks (i.e., + # even when an exception is raised). + # + # Important Note: Do not use 'return' inside the finally block. It will + # cause any pending exception to be discarded. + _import_local.nest_level -= 1 + + # To improve common code path performance, compute the loaded modules only + # if there are any import callbacks. + if _import_callbacks: + # Collect the names of all modules that might be newly loaded as a result + # of this import. Add them in a thread-local list. + _import_local.names |= _GenerateNames(name, fromlist, globals) + + # Invoke the callbacks only on the top-level import call. + if _import_local.nest_level == 0: + _InvokeImportCallbackBySuffix(_import_local.names) + + # To be safe, we clear the names set every time we exit a top level import. + if _import_local.nest_level == 0: + _import_local.names.clear() + + return module + + +def _GenerateNames(name, fromlist, globals): + """Generates the names of modules that might be loaded via this import. + + Args: + name: Argument as passed to the importer. + fromlist: Argument as passed to the importer. + globals: Argument as passed to the importer. + + Returns: + A set that contains the names of all modules that are loaded by the + currently executing import statement, as they would show up in sys.modules. + The returned set may contain module names that were already loaded before + the execution of this import statement. + The returned set may contain names that are not real modules. + """ + def GetCurrentPackage(globals): + """Finds the name of the package for the currently executing module.""" + if not globals: + return None + + # Get the name of the module/package that the current import is being + # executed in. + current = globals.get('__name__') + if not current: + return None + + # Check if the current module is really a module, or a package. + current_file = globals.get('__file__') + if not current_file: + return None + + root = os.path.splitext(os.path.basename(current_file))[0] + if root == '__init__': + # The current import happened from a package. Return the package. + return current + else: + # The current import happened from a module. Return the package that + # contains the module. + return current.rpartition('.')[0] + + # A Python module can be addressed in two ways: + # 1. Using a path relative to the currently executing module's path. For + # instance, module p1/p2/m3.py imports p1/p2/p3/m4.py using 'import p3.m4'. + # 2. Using a path relative to sys.path. For instance, module p1/p2/m3.py + # imports p1/p2/p3/m4.py using 'import p1.p2.p3.m4'. + # + # The Python importer uses the 'globals' argument to identify the module that + # the current import is being performed in. The actual logic is very + # complicated, and we only approximate it here to limit the performance + # overhead (See import.c in the interpreter for details). Here, we only use + # the value of the globals['__name__'] for this purpose. + # + # Note: The Python importer prioritizes the current package over sys.path. For + # instance, if 'p1.p2.m3' imports 'm4', then 'p1.p2.m4' is a better match than + # the top level 'm4'. However, the debugger does not have to implement this, + # because breakpoint paths are not described relative to some other file. They + # are always assumed to be relative to the sys.path directories. If the user + # sets breakpoint inside 'm4.py', then we can map it to either the top level + # 'm4' or 'p1.p2.m4', i.e., both are valid matches. + curpkg = GetCurrentPackage(globals) + + names = set() + + # A Python module can be imported using two syntaxes: + # 1. import p1.p2.m3 + # 2. from p1.p2 import m3 + # + # When the regular 'import p1.p2.m3' syntax is used, the name of the module + # being imported is passed in the 'name' argument (e.g., name='p1.p2.m3', + # fromlist=None). + # + # When the from-import syntax is used, then fromlist contains the leaf names + # of the modules, and name contains the containing package. For instance, if + # name='a.b', fromlist=['c', 'd'], then we add ['a.b.c', 'a.b.d']. + # + # Corner cases: + # 1. The fromlist syntax can be used to import a function from a module. + # For instance, 'from p1.p2.m3 import func'. + # 2. Sometimes, the importer is passed a dummy fromlist=['__doc__'] (see + # import.c in the interpreter for details). + # Due to these corner cases, the returned set may contain entries that are not + # names of real modules. + for from_entry in fromlist or []: + # Name relative to sys.path. + names.add(name + '.' + from_entry) + # Name relative to the currently executing module's package. + if curpkg: + names.add(curpkg + '.' + name + '.' + from_entry) + + # Generate all names from name. For instance, if name='a.b.c', then + # we need to add ['a.b.c', 'a.b', 'a']. + while name: + # Name relative to sys.path. + names.add(name) + # Name relative to currently executing module's package. + if curpkg: + names.add(curpkg + '.' + name) + name = name.rpartition('.')[0] + + return names + + +def _InvokeImportCallbackBySuffix(names): + """Invokes import callbacks for newly loaded modules. + + Uses a path suffix match to identify whether a loaded module matches the + file path provided by the user. + + Args: + names: A set of names for modules that are loaded by the current import. + The set may contain some superfluous entries that were already + loaded before this import, or some entries that do not correspond + to a module. The list is expected to be much smaller than the exact + sys.modules so that a linear search is not as costly. + """ + def GetModuleFromName(name, path): + """Returns the loaded module for this name/path, or None if not found. + + Args: + name: A string that may represent the name of a loaded Python module. + path: If 'name' ends with '.*', then the last path component in 'path' is + used to identify what the wildcard may map to. Does not contain file + extension. + + Returns: + The loaded module for the given name and path, or None if a loaded module + was not found. + """ + # The from-import syntax can be used as 'from p1.p2 import *'. In this case, + # we cannot know what modules will match the wildcard. However, we know that + # the wildcard can only be used to import leaf modules. So, we guess that + # the leaf module will have the same name as the leaf file name the user + # provided. For instance, + # User input path = 'foo.py' + # Currently executing import: + # from pkg1.pkg2 import * + # Then, we combine: + # 1. 'pkg1.pkg2' from import's outer package and + # 2. Add 'foo' as our guess for the leaf module name. + # So, we will search for modules with name similar to 'pkg1.pkg2.foo'. + if name.endswith('.*'): + # Replace the final '*' with the name of the module we are looking for. + name = name.rpartition('.')[0] + '.' + path.split('/')[-1] + + # Check if the module was loaded. + return sys.modules.get(name) + + for path, callbacks in _import_callbacks.items(): + root = os.path.splitext(path)[0] + + nonempty_names = (n for n in names if n) + modules = (GetModuleFromName(name, root) for name in nonempty_names) + nonempty_modules = (m for m in modules if m) + + for module in nonempty_modules: + mod_root = os.path.splitext(module.__file__)[0] + + # If the module is relative, add the curdir prefix to convert it to + # absolute path. Note that we don't use os.path.abspath because it + # also normalizes the path (which has side effects we don't want). + if not os.path.isabs(mod_root): + mod_root = os.path.join(os.curdir, mod_root) + + if module_utils2.IsPathSuffix(mod_root, root): + for callback in callbacks.copy(): + callback(module) + break + diff --git a/src/googleclouddebugger/module_search2.py b/src/googleclouddebugger/module_search2.py new file mode 100644 index 0000000..f7e5de8 --- /dev/null +++ b/src/googleclouddebugger/module_search2.py @@ -0,0 +1,106 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Inclusive search for module files.""" + +import os +import sys + + +def Search(path): + """Search sys.path to find a source file that matches path. + + The provided input path may have an unknown number of irrelevant outer + directories (e.g., /garbage1/garbage2/real1/real2/x.py'). This function + does multiple search iterations until an actual Python module file that + matches the input path is found. At each iteration, it strips one leading + directory from the path and searches the directories at sys.path + for a match. + + Examples: + sys.path: ['/x1/x2', '/y1/y2'] + Search order: [.pyo|.pyc|.py] + /x1/x2/a/b/c + /x1/x2/b/c + /x1/x2/c + /y1/y2/a/b/c + /y1/y2/b/c + /y1/y2/c + Filesystem: ['/y1/y2/a/b/c.pyc'] + + 1) Search('a/b/c.py') + Returns '/y1/y2/a/b/c.pyc' + 2) Search('q/w/a/b/c.py') + Returns '/y1/y2/a/b/c.pyc' + 3) Search('q/w/c.py') + Returns 'q/w/c.py' + + The provided input path may also be relative to an unknown directory. + The path may include some or all outer package names. + + Examples (continued): + + 4) Search('c.py') + Returns 'c.py' + 5) Search('b/c.py') + Returns 'b/c.py' + + Args: + path: Path that describes a source file. Must contain .py file extension. + Must not contain any leading os.sep character. + + Returns: + Full path to the matched source file, if a match is found. Otherwise, + returns the input path. + + Raises: + AssertionError: if the provided path is an absolute path, or if it does not + have a .py extension. + """ + def SearchCandidates(p): + """Generates all candidates for the fuzzy search of p.""" + while p: + yield p + (_, _, p) = p.partition(os.sep) + + # Verify that the os.sep is already stripped from the input. + assert not path.startswith(os.sep) + + # Strip the file extension, it will not be needed. + src_root, src_ext = os.path.splitext(path) + assert src_ext == '.py' + + # Search longer suffixes first. Move to shorter suffixes only if longer + # suffixes do not result in any matches. + for src_part in SearchCandidates(src_root): + # Search is done in sys.path order, which gives higher priority to earlier + # entries in sys.path list. + for sys_path in sys.path: + f = os.path.join(sys_path, src_part) + # The order in which we search the extensions does not matter. + for ext in ('.pyo', '.pyc', '.py'): + # The os.path.exists check internally follows symlinks and flattens + # relative paths, so we don't have to deal with it. + fext = f + ext + if os.path.exists(fext): + # Once we identify a matching file in the filesystem, we should + # preserve the (1) potentially-symlinked and (2) + # potentially-non-flattened file path (f+ext), because that's exactly + # how we expect it to appear in sys.modules when we search the file + # there. + return fext + + # A matching file was not found in sys.path directories. + return path + diff --git a/src/googleclouddebugger/module_utils2.py b/src/googleclouddebugger/module_utils2.py new file mode 100644 index 0000000..b7d9f2f --- /dev/null +++ b/src/googleclouddebugger/module_utils2.py @@ -0,0 +1,77 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provides utility functions for module path processing.""" + +import os +import sys + + +def IsPathSuffix(mod_path, path): + """Checks whether path is a full path suffix of mod_path. + + Args: + mod_path: Must be an absolute path to a source file. Must not have + file extension. + path: A relative path. Must not have file extension. + + Returns: + True if path is a full path suffix of mod_path. False otherwise. + """ + return (mod_path.endswith(path) and + (len(mod_path) == len(path) or + mod_path[:-len(path)].endswith(os.sep))) + + +def GetLoadedModuleBySuffix(path): + """Searches sys.modules to find a module with the given file path. + + Args: + path: Path to the source file. It can be relative or absolute, as suffix + match can handle both. If absolute, it must have already been + sanitized. + + Algorithm: + The given path must be a full suffix of a loaded module to be a valid match. + File extensions are ignored when performing suffix match. + + Example: + path: 'a/b/c.py' + modules: {'a': 'a.py', 'a.b': 'a/b.py', 'a.b.c': 'a/b/c.pyc'] + returns: module('a.b.c') + + Returns: + The module that corresponds to path, or None if such module was not + found. + """ + root = os.path.splitext(path)[0] + for module in sys.modules.values(): + mod_root = os.path.splitext(getattr(module, '__file__', ''))[0] + + if not mod_root: + continue + + # While mod_root can contain symlinks, we cannot eliminate them. This is + # because, we must perform exactly the same transformations on mod_root and + # path, yet path can be relative to an unknown directory which prevents + # identifying and eliminating symbolic links. + # + # Therefore, we only convert relative to absolute path. + if not os.path.isabs(mod_root): + mod_root = os.path.join(os.getcwd(), mod_root) + + if IsPathSuffix(mod_root, root): + return module + + return None diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 201ee56..fbed878 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -22,9 +22,12 @@ import capture_collector import cdbg_native as native import imphook +import imphook2 import module_explorer import module_search +import module_search2 import module_utils +import module_utils2 # TODO(vlif): move to messages.py module. # Use the following schema to define breakpoint error message constant: @@ -126,6 +129,19 @@ def _MultipleModulesFoundError(path, candidates): def _NormalizePath(path): """Removes surrounding whitespace, leading separator and normalize.""" + # TODO(emrekultursay): Calling os.path.normpath "may change the meaning of a + # path that contains symbolic links" (e.g., "A/foo/../B" != "A/B" if foo is a + # symlink). This might cause trouble when matching against loaded module + # paths. We should try to avoid using it. + # Example: + # > import symlink.a + # > symlink.a.__file__ + # symlink/a.py + # > import target.a + # > starget.a.__file__ + # target/a.py + # Python interpreter treats these as two separate modules. So, we also need to + # handle them the same way. return os.path.normpath(path.strip().lstrip(os.sep)) @@ -139,7 +155,7 @@ class PythonBreakpoint(object): """ def __init__(self, definition, hub_client, breakpoints_manager, - data_visibility_policy): + data_visibility_policy, use_new_module_search=False): """Class constructor. Tries to set the breakpoint. If the source location is invalid, the @@ -152,6 +168,8 @@ def __init__(self, definition, hub_client, breakpoints_manager, breakpoints_manager: parent object managing active breakpoints. data_visibility_policy: An object used to determine the visibility of a captured variable. May be None if no policy is available. + use_new_module_search: If true, the new module search algorithm will be + used. """ self.definition = definition @@ -193,6 +211,21 @@ def __init__(self, definition, hub_client, breakpoints_manager, 'parameters': [path]}}}) return + # If enabled, then use the new module search algorithm. + if use_new_module_search: + new_path = module_search2.Search(path) + new_module = module_utils2.GetLoadedModuleBySuffix(new_path) + + if new_module: + self._ActivateBreakpoint(new_module) + else: + self._import_hook_cleanup = imphook2.AddImportCallbackBySuffix( + new_path, + self._ActivateBreakpoint) + return + + # Otherwise, use the old module search algorithm. + # Find all module files matching the location path. paths = module_search.FindMatchingFiles(path) if not paths: From 1d9e02e1834e37b5fb11fb894d6f1967c1bd602f Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Sun, 11 Feb 2018 16:27:48 -0800 Subject: [PATCH 102/241] Skip over modules that don't have a __file__ field. Otherwise, we get this error in deferred modules: Traceback (most recent call last): File "devtools/cdbg/debuglets/python/imphook2.py", line 339, in _InvokeImportCallbackBySuffix mod_root = os.path.splitext(module.__file__)[0] AttributeError: 'module' object has no attribute '__file__' ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=185320164 --- src/googleclouddebugger/imphook2.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index 05eb233..5bebed4 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -320,7 +320,12 @@ def GetModuleFromName(name, path): nonempty_modules = (m for m in modules if m) for module in nonempty_modules: - mod_root = os.path.splitext(module.__file__)[0] + # TODO(emrekultursay): Write unit test to cover None case. + mod_file = getattr(module, '__file__', None) + if not mod_file: + continue + + mod_root = os.path.splitext(mod_file)[0] # If the module is relative, add the curdir prefix to convert it to # absolute path. Note that we don't use os.path.abspath because it From 0dfe9e5f5503a398f90f6809da75492470b29d9d Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Wed, 14 Feb 2018 16:46:03 -0800 Subject: [PATCH 103/241] Enable new module search algorithm for GCE/Flex/opensource/other. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=185766513 --- src/googleclouddebugger/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index adf9735..d1e3e71 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -52,7 +52,7 @@ def _StartDebugger(): _hub_client = gcp_hub_client.GcpHubClient() visibility_policy = _GetVisibilityPolicy() - use_new_module_search = _flags.get('use_new_module_search') + use_new_module_search = _flags.get('use_new_module_search', True) _breakpoints_manager = breakpoints_manager.BreakpointsManager( _hub_client, From f06accc6c79e45854aa84fe582396edf1d07e70b Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 15 Feb 2018 09:14:05 -0800 Subject: [PATCH 104/241] Increment python agent minor version to 2.4 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=185848633 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 17e6254..ccd0088 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.3' +__version__ = '2.4' From 01849c0af95b6eb4fa2fb8a57cc9d6021db67db5 Mon Sep 17 00:00:00 2001 From: zhangxy Date: Fri, 16 Feb 2018 10:17:33 -0800 Subject: [PATCH 105/241] Renamespace symbols in third_party/absl/time/time.h and third_party/absl/time/clock.h from `base` to `absl`. LSC doc: go/lsc-absl-gplc. Tested: TAP sample presubmit queue http://test/OCL:185694001:BASE:185694166:1518627675307:e614761b ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=186012329 --- src/googleclouddebugger/__init__.py | 16 ++++++++-------- src/googleclouddebugger/breakpoints_manager.py | 2 +- src/googleclouddebugger/capture_collector.py | 4 ++-- src/googleclouddebugger/gcp_hub_client.py | 8 ++++---- src/googleclouddebugger/imphook.py | 2 +- src/googleclouddebugger/imphook2.py | 2 +- src/googleclouddebugger/module_search.py | 4 ++-- src/googleclouddebugger/python_breakpoint.py | 18 +++++++++--------- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index d1e3e71..1f6983f 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -26,15 +26,15 @@ import os import sys -import appengine_pretty_printers -import breakpoints_manager -import capture_collector -import error_data_visibility_policy -import gcp_hub_client -import glob_data_visibility_policy -import yaml_data_visibility_config_reader +from . import appengine_pretty_printers +from . import breakpoints_manager +from . import capture_collector +from . import error_data_visibility_policy +from . import gcp_hub_client +from . import glob_data_visibility_policy +from . import yaml_data_visibility_config_reader import cdbg_native -import version +from . import version __version__ = version.__version__ diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index ee99df3..3516030 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -17,7 +17,7 @@ from datetime import datetime from threading import RLock -import python_breakpoint +from . import python_breakpoint class BreakpointsManager(object): diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 1017b2d..8007faf 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -26,8 +26,8 @@ import time import types -import labels -import cdbg_native as native +from . import labels +from . import cdbg_native as native # Externally defined functions to actually log a message. If these variables # are not initialized, the log action for breakpoints is invalid. diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index d11d064..d656d95 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -36,10 +36,10 @@ from oauth2client import service_account from oauth2client.contrib.gce import AppAssertionCredentials -import labels -import cdbg_native as native -import uniquifier_computer -import version +from . import labels +from . import cdbg_native as native +from . import uniquifier_computer +from . import version # This module catches all exception. This is safe because it runs in # a daemon thread (so we are not blocking Ctrl+C). We need to catch all diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py index d142093..71f5bb2 100644 --- a/src/googleclouddebugger/imphook.py +++ b/src/googleclouddebugger/imphook.py @@ -29,7 +29,7 @@ import sys # Must be imported, otherwise import hooks don't work. import threading -import module_utils +from . import module_utils # Callbacks to invoke when a module is imported. _import_callbacks = {} diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index 5bebed4..4de8e01 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -34,7 +34,7 @@ import sys # Must be imported, otherwise import hooks don't work. import threading -import module_utils2 +from . import module_utils2 # Callbacks to invoke when a module is imported. _import_callbacks = {} diff --git a/src/googleclouddebugger/module_search.py b/src/googleclouddebugger/module_search.py index 9165c3c..8d83c12 100644 --- a/src/googleclouddebugger/module_search.py +++ b/src/googleclouddebugger/module_search.py @@ -19,8 +19,8 @@ import sys import time -import cdbg_native as native -import module_utils +from . import cdbg_native as native +from . import module_utils def _CommonPathPrefix(paths): diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index fbed878..c0394be 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -19,15 +19,15 @@ import os from threading import Lock -import capture_collector -import cdbg_native as native -import imphook -import imphook2 -import module_explorer -import module_search -import module_search2 -import module_utils -import module_utils2 +from . import capture_collector +from . import cdbg_native as native +from . import imphook +from . import imphook2 +from . import module_explorer +from . import module_search +from . import module_search2 +from . import module_utils +from . import module_utils2 # TODO(vlif): move to messages.py module. # Use the following schema to define breakpoint error message constant: From 799f6492b007c5f37e294cf749e0e389240284df Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Tue, 20 Feb 2018 10:22:38 -0800 Subject: [PATCH 106/241] Python 3 debugger - Python side changes. Most of these are fairly simple changes to get the code in a state where it works under both Python 2 and Python 3. The only real confusion is the difference between 'str' vs 'bytes' in Python 3. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=186322439 --- .../appengine_pretty_printers.py | 4 ++- .../breakpoints_manager.py | 8 +++-- src/googleclouddebugger/capture_collector.py | 30 +++++++++++-------- src/googleclouddebugger/gcp_hub_client.py | 7 +++-- src/googleclouddebugger/imphook.py | 16 ++++++---- src/googleclouddebugger/imphook2.py | 14 +++++---- src/googleclouddebugger/module_explorer.py | 23 ++++++++------ src/googleclouddebugger/module_search.py | 2 +- src/googleclouddebugger/module_utils.py | 2 ++ src/googleclouddebugger/python_breakpoint.py | 3 +- .../yaml_data_visibility_config_reader.py | 22 +++++++++----- src/setup.py | 2 +- 12 files changed, 81 insertions(+), 52 deletions(-) diff --git a/src/googleclouddebugger/appengine_pretty_printers.py b/src/googleclouddebugger/appengine_pretty_printers.py index 0abed1c..16ef31d 100644 --- a/src/googleclouddebugger/appengine_pretty_printers.py +++ b/src/googleclouddebugger/appengine_pretty_printers.py @@ -14,6 +14,8 @@ """Formatters for well known objects that don't show up nicely by default.""" +import six + try: from google.appengine.ext import ndb # pylint: disable=g-import-not-at-top except ImportError: @@ -24,6 +26,6 @@ def PrettyPrinter(obj): """Pretty printers for AppEngine objects.""" if ndb and isinstance(obj, ndb.Model): - return obj.to_dict().iteritems(), 'ndb.Model(%s)' % type(obj).__name__ + return six.iteritems(obj.to_dict()), 'ndb.Model(%s)' % type(obj).__name__ return None diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index 3516030..7cea2e4 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -17,6 +17,8 @@ from datetime import datetime from threading import RLock +import six + from . import python_breakpoint @@ -74,7 +76,7 @@ def SetActiveBreakpoints(self, breakpoints_data): ids = set([x['id'] for x in breakpoints_data]) # Clear breakpoints that no longer show up in active breakpoints list. - for breakpoint_id in self._active.viewkeys() - ids: + for breakpoint_id in six.viewkeys(self._active) - ids: self._active.pop(breakpoint_id).Clear() # Create new breakpoints. @@ -87,7 +89,7 @@ def SetActiveBreakpoints(self, breakpoints_data): self.data_visibility_policy, self.use_new_module_search)) for x in breakpoints_data - if x['id'] in ids - self._active.viewkeys() - self._completed]) + if x['id'] in ids - six.viewkeys(self._active) - self._completed]) # Remove entries from completed_breakpoints_ that weren't listed in # breakpoints_data vector. These are confirmed to have been removed by the @@ -122,7 +124,7 @@ def CheckBreakpointsExpiration(self): expired_breakpoints = [] self._next_expiration = datetime.max - for breakpoint in self._active.itervalues(): + for breakpoint in six.itervalues(self._active): expiration_time = breakpoint.GetExpirationTime() if expiration_time <= current_time: expired_breakpoints.append(breakpoint) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 8007faf..f8f9ab8 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -26,6 +26,8 @@ import time import types +import six + from . import labels from . import cdbg_native as native @@ -44,10 +46,11 @@ # Externally defined function to collect the end user id. breakpoint_labels_collector = lambda: {} -_PRIMITIVE_TYPES = (int, long, float, complex, types.StringTypes, bool, - types.NoneType, types.SliceType, bytearray) +_PRIMITIVE_TYPES = (type(None), float, complex, bool, slice, bytearray, + six.text_type, + six.binary_type) + six.integer_types + six.string_types _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) -_VECTOR_TYPES = (types.TupleType, types.ListType, set) +_VECTOR_TYPES = (tuple, list, set) # TODO(vlif): move to messages.py module. EMPTY_DICTIONARY = 'Empty dictionary' @@ -63,7 +66,7 @@ def _ListTypeFormatString(value): """Returns the appropriate format string for formatting a list object.""" - if isinstance(value, types.TupleType): + if isinstance(value, tuple): return '({0})' if isinstance(value, set): return '{{{0}}}' @@ -365,7 +368,7 @@ def CaptureFrameLocals(self, frame): # Capture all local variables (including method arguments). variables = {n: self.CaptureNamedVariable(n, v, 1, self.default_capture_limits) - for n, v in frame.f_locals.viewitems()} + for n, v in six.viewitems(frame.f_locals)} # Split between locals and arguments (keeping arguments in the right order). nargs = frame.f_code.co_argcount @@ -376,7 +379,7 @@ def CaptureFrameLocals(self, frame): for argname in frame.f_code.co_varnames[:nargs]: if argname in variables: frame_arguments.append(variables.pop(argname)) - return (frame_arguments, list(variables.viewvalues())) + return (frame_arguments, list(six.viewvalues(variables))) def CaptureNamedVariable(self, name, value, depth, limits): """Appends name to the product of CaptureVariable. @@ -555,9 +558,9 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): return {'members': fields, 'type': type(value).__name__} if isinstance(value, types.FunctionType): - self._total_size += len(value.func_name) + self._total_size += len(value.__name__) # TODO(vlif): set value to func_name and type to 'function' - return {'value': 'function ' + value.func_name} + return {'value': 'function ' + value.__name__} if can_enqueue: index = self._var_table_index.get(id(value)) @@ -647,7 +650,7 @@ def _CaptureEnvironmentLabels(self): self.breakpoint['labels'] = {} if callable(breakpoint_labels_collector): - for (key, value) in breakpoint_labels_collector().iteritems(): + for (key, value) in six.iteritems(breakpoint_labels_collector()): self.breakpoint['labels'][key] = value def _CaptureRequestLogId(self): @@ -843,14 +846,14 @@ def FormatList(items, formatter, level=0): return str(type(value)) if isinstance(value, dict): - return '{' + FormatList(value.iteritems(), FormatDictItem) + '}' + return '{' + FormatList(six.iteritems(value), FormatDictItem) + '}' if isinstance(value, _VECTOR_TYPES): return _ListTypeFormatString(value).format(FormatList( value, lambda item: self._FormatValue(item, level + 1), level=level)) if isinstance(value, types.FunctionType): - return 'function ' + value.func_name + return 'function ' + value.__name__ if hasattr(value, '__dict__') and value.__dict__: return self._FormatValue(value.__dict__, level) @@ -870,7 +873,8 @@ def _EvaluateExpression(frame, expression): """ try: code = compile(expression, '', 'eval') - except TypeError as e: # condition string contains null bytes. + except (TypeError, ValueError) as e: + # expression string contains null bytes. return (False, { 'isError': True, 'refersTo': 'VARIABLE_NAME', @@ -893,7 +897,7 @@ def _EvaluateExpression(frame, expression): 'refersTo': 'VARIABLE_VALUE', 'description': { 'format': 'Exception occurred: $0', - 'parameters': [e.message]}}) + 'parameters': [str(e)]}}) def _GetFrameCodeObjectName(frame): diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index d656d95..779e8e1 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -35,6 +35,7 @@ import oauth2client from oauth2client import service_account from oauth2client.contrib.gce import AppAssertionCredentials +import six from . import labels from . import cdbg_native as native @@ -102,7 +103,7 @@ def __init__(self): self._transmission_thread = None self._transmission_thread_startup_lock = threading.Lock() self._transmission_queue = deque(maxlen=100) - self._new_updates = threading.Event(False) + self._new_updates = threading.Event() # Disable logging in the discovery API to avoid excessive logging. class _ChildLogFilter(logging.Filter): @@ -156,7 +157,7 @@ def InitializeDebuggeeLabels(self, flags): """ self._debuggee_labels = {} - for (label, var_names) in _DEBUGGEE_LABELS.iteritems(): + for (label, var_names) in six.iteritems(_DEBUGGEE_LABELS): # var_names is a list of possible environment variables that may contain # the label value. Find the first one that is set. for name in var_names: @@ -171,7 +172,7 @@ def InitializeDebuggeeLabels(self, flags): if flags: self._debuggee_labels.update( - {name: value for (name, value) in flags.iteritems() + {name: value for (name, value) in six.iteritems(flags) if name in _DEBUGGEE_LABELS}) self._debuggee_labels['projectid'] = self._project_id() diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py index 71f5bb2..aeb1ee8 100644 --- a/src/googleclouddebugger/imphook.py +++ b/src/googleclouddebugger/imphook.py @@ -26,9 +26,12 @@ """ import os -import sys # Must be imported, otherwise import hooks don't work. +# Must be imported, otherwise import hooks don't work. +import sys # pylint: disable=unused-import import threading +from six.moves import builtins # pylint: disable=redefined-builtin + from . import module_utils # Callbacks to invoke when a module is imported. @@ -92,12 +95,10 @@ def _InstallImportHook(): if _real_import: return # Import hook already installed - builtin = sys.modules['__builtin__'] - - _real_import = getattr(builtin, '__import__') + _real_import = getattr(builtins, '__import__') assert _real_import - builtin.__import__ = _ImportHook + builtins.__import__ = _ImportHook # pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield @@ -134,7 +135,10 @@ def _ImportHook(name, globals=None, locals=None, fromlist=None, level=-1): def _InvokeImportCallback(): """Invokes import callbacks for loaded modules.""" - for path, callbacks in _import_callbacks.items(): + # _import_callbacks might change during iteration because RemoveCallback() + # might delete items. Iterate over a copy to avoid a + # 'dictionary changed size during iteration' error. + for path, callbacks in list(_import_callbacks.items()): module = module_utils.GetLoadedModuleByPath(path) if module: for callback in callbacks.copy(): diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index 4de8e01..7cd652a 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -34,6 +34,8 @@ import sys # Must be imported, otherwise import hooks don't work. import threading +from six.moves import builtins # pylint: disable=redefined-builtin + from . import module_utils2 # Callbacks to invoke when a module is imported. @@ -100,12 +102,10 @@ def _InstallImportHookBySuffix(): if _real_import: return # Import hook already installed - builtin = sys.modules['__builtin__'] - - _real_import = getattr(builtin, '__import__') + _real_import = getattr(builtins, '__import__') assert _real_import - builtin.__import__ = _ImportHookBySuffix + builtins.__import__ = _ImportHookBySuffix # pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield @@ -312,7 +312,10 @@ def GetModuleFromName(name, path): # Check if the module was loaded. return sys.modules.get(name) - for path, callbacks in _import_callbacks.items(): + # _import_callbacks might change during iteration because RemoveCallback() + # might delete items. Iterate over a copy to avoid a + # 'dictionary changed size during iteration' error. + for path, callbacks in list(_import_callbacks.items()): root = os.path.splitext(path)[0] nonempty_names = (n for n in names if n) @@ -337,4 +340,3 @@ def GetModuleFromName(name, path): for callback in callbacks.copy(): callback(module) break - diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index f715a91..b5ba8d5 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -19,6 +19,8 @@ import sys import types +import six + # Maximum traversal depth when looking for all the code objects referenced by # a module or another code object. _MAX_REFERENTS_BFS_DEPTH = 15 @@ -33,11 +35,9 @@ _MAX_OBJECT_REFERENTS = 1000 # Object types to ignore when looking for the code objects. -_BFS_IGNORE_TYPES = (types.ModuleType, types.NoneType, types.BooleanType, - types.IntType, types.LongType, types.FloatType, - types.StringType, types.UnicodeType, - types.BuiltinFunctionType, types.BuiltinMethodType, - types.ListType) +_BFS_IGNORE_TYPES = (types.ModuleType, type(None), bool, float, six.binary_type, + six.text_type, types.BuiltinFunctionType, + types.BuiltinMethodType, list) + six.integer_types def GetCodeObjectAtLine(module, line): @@ -56,7 +56,7 @@ def GetCodeObjectAtLine(module, line): return (False, (None, None)) prev_line = 0 - next_line = sys.maxint + next_line = six.MAXSIZE for code_object in _GetModuleCodeObjects(module): for co_line_number in _GetLineNumbers(code_object): @@ -69,7 +69,7 @@ def GetCodeObjectAtLine(module, line): break prev_line = None if prev_line == 0 else prev_line - next_line = None if next_line == sys.maxint else next_line + next_line = None if next_line == six.MAXSIZE else next_line return (False, (prev_line, next_line)) @@ -85,7 +85,12 @@ def _GetLineNumbers(code_object): # Get the line number deltas, which are the odd number entries, from the # lnotab. See # https://svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt - line_incrs = (ord(c) for c in code_object.co_lnotab[1::2]) + # In Python 3, this is just a byte array. In Python 2 it is a string so the + # numerical values have to be extracted from the individual characters. + if six.PY3: + line_incrs = code_object.co_lnotab[1::2] + else: + line_incrs = (ord(c) for c in code_object.co_lnotab[1::2]) current_line = code_object.co_firstlineno for line_incr in line_incrs: current_line += line_incr @@ -209,7 +214,7 @@ def CheckIgnoreClass(cls): if isinstance(obj, types.CodeType) and CheckIgnoreCodeObject(obj): continue - if isinstance(obj, types.ClassType) and CheckIgnoreClass(obj): + if isinstance(obj, six.class_types) and CheckIgnoreClass(obj): continue if isinstance(obj, types.CodeType): diff --git a/src/googleclouddebugger/module_search.py b/src/googleclouddebugger/module_search.py index 8d83c12..ce9c763 100644 --- a/src/googleclouddebugger/module_search.py +++ b/src/googleclouddebugger/module_search.py @@ -25,7 +25,7 @@ def _CommonPathPrefix(paths): """The equivalent of Python 3 os.path.commonpath().""" - prefix = os.path.commonprefix(paths) + prefix = os.path.commonprefix(list(paths)) prefix_len = prefix.rfind(os.sep) + 1 return prefix[:prefix_len] diff --git a/src/googleclouddebugger/module_utils.py b/src/googleclouddebugger/module_utils.py index c25f0b6..606f459 100644 --- a/src/googleclouddebugger/module_utils.py +++ b/src/googleclouddebugger/module_utils.py @@ -17,6 +17,8 @@ import os import sys +from six.moves import xrange # pylint: disable=redefined-builtin + def GetAbsolutePath(mod_path): """Flattens symlinks and indirections in the module path. diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index c0394be..657375b 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -346,7 +346,8 @@ def _ActivateBreakpoint(self, module): condition = compile(self.definition.get('condition'), '', 'eval') - except TypeError as e: # condition string contains null bytes. + except (TypeError, ValueError) as e: + # condition string contains null bytes. self._CompleteBreakpoint({ 'status': { 'isError': True, diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py index 764f787..32ab8ed 100644 --- a/src/googleclouddebugger/yaml_data_visibility_config_reader.py +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -27,6 +27,7 @@ import os import sys +import six import yaml @@ -104,26 +105,31 @@ def Read(f): """ try: yaml_data = yaml.load(f) - except yaml.YAMLError, e: + except yaml.YAMLError as e: raise ParseError('%s' % e) - except IOError, e: + except IOError as e: raise YAMLLoadError('%s' % e) _CheckData(yaml_data) - return Config( - yaml_data.get('blacklist', ()), - yaml_data.get('whitelist', ('*'))) + + try: + to_str = lambda v: v.decode() if six.PY3 and isinstance(v, bytes) else v + return Config( + [to_str(val) for val in yaml_data.get(b'blacklist', ())], + [to_str(val) for val in yaml_data.get(b'whitelist', ('*'))]) + except UnicodeDecodeError as e: + raise YAMLLoadError('%s' % e) def _CheckData(yaml_data): """Checks data for illegal keys and formatting.""" - legal_keys = set(('blacklist', 'whitelist')) + legal_keys = set((b'blacklist', b'whitelist')) unknown_keys = set(yaml_data) - legal_keys if unknown_keys: raise UnknownConfigKeyError( 'Unknown keys in configuration: %s' % unknown_keys) - for key, data in yaml_data.iteritems(): + for key, data in six.iteritems(yaml_data): _AssertDataIsList(key, data) @@ -137,6 +143,6 @@ def _AssertDataIsList(key, lst): # each list entry must be a string for element in lst: - if not isinstance(element, str): + if not isinstance(element, (bytes, str)): raise ElementNotAStringError('Unsupported list element %s found in %s', (element, lst)) diff --git a/src/setup.py b/src/setup.py index 446ecf2..9f292ae 100644 --- a/src/setup.py +++ b/src/setup.py @@ -101,7 +101,7 @@ def ReadConfig(section, value, default): url='https://github.com/GoogleCloudPlatform/cloud-debug-python', author='Google Inc.', version=version, - install_requires=['google-api-python-client', 'pyyaml'], + install_requires=['google-api-python-client', 'pyyaml', 'six>=1.10.0'], packages=['googleclouddebugger'], ext_modules=[cdbg_native_module], license='Apache License, Version 2.0', From 6dc86696f119b0e8812a93fbca2bcf6371f5a01f Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Tue, 20 Feb 2018 12:24:48 -0800 Subject: [PATCH 107/241] Python 3 debugger - C++ side changes. There are 3 main changes in this CL: 1. Redefine common Python API functions 2. Update the immutability tracer 3. Update the bytecode manipulator, and add equivalent tests for Python 3. With this, most of the unit tests are passing under Python 3, the exceptions are: Every C++ test (complains about a memory leak at the end, but everything else passes) appengine_pretty_printers_test (depends on app engine) breakpoints_stress_test (depends on //third_party/py/test:python_runtime_tests) breakpoints_threads_test (When setting a breakpoint on IO_INCREMENT, the instruction needs to be upgraded to extended which is not supported for now) gcp_hub_client_test (depends on httplib2) integration_test (depends on httplib2) impook_test (no /usr/grte/v4/k8-linux/bin/python3.6) module_explorer_test (no /usr/grte/v4/k8-linux/bin/python3.6) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=186344036 --- .../bytecode_breakpoint.cc | 12 +- .../bytecode_manipulator.cc | 197 +++++---- .../bytecode_manipulator.h | 4 +- src/googleclouddebugger/common.h | 17 + .../conditional_breakpoint.cc | 4 + .../immutability_tracer.cc | 394 ++++++++++-------- src/googleclouddebugger/immutability_tracer.h | 3 +- src/googleclouddebugger/native_module.cc | 44 +- src/googleclouddebugger/python_util.cc | 24 +- src/googleclouddebugger/python_util.h | 2 +- 10 files changed, 417 insertions(+), 284 deletions(-) diff --git a/src/googleclouddebugger/bytecode_breakpoint.cc b/src/googleclouddebugger/bytecode_breakpoint.cc index 96700f7..8074760 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.cc +++ b/src/googleclouddebugger/bytecode_breakpoint.cc @@ -187,7 +187,7 @@ BytecodeBreakpoint::PreparePatchCodeObject( data->original_code = ScopedPyObject::NewReference(code_object.get()->co_code); if ((data->original_code == nullptr) || - !PyString_CheckExact(data->original_code.get())) { + !PyBytes_CheckExact(data->original_code.get())) { LOG(ERROR) << "Code object has no code"; return nullptr; // Probably a built-in method or uninitialized code object. } @@ -226,14 +226,14 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { return; } - std::vector bytecode = PyStringToByteArray(code->original_code.get()); + std::vector bytecode = PyBytesToByteArray(code->original_code.get()); bool has_lnotab = false; std::vector lnotab; if (!code->original_lnotab.is_null() && - PyString_CheckExact(code->original_lnotab.get())) { + PyBytes_CheckExact(code->original_lnotab.get())) { has_lnotab = true; - lnotab = PyStringToByteArray(code->original_lnotab.get()); + lnotab = PyBytesToByteArray(code->original_lnotab.get()); } BytecodeManipulator bytecode_manipulator( @@ -272,7 +272,7 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { code_object->co_stacksize = code->original_stacksize + 1; code->zombie_refs.push_back(ScopedPyObject(code_object->co_code)); - ScopedPyObject bytecode_string(PyString_FromStringAndSize( + ScopedPyObject bytecode_string(PyBytes_FromStringAndSize( reinterpret_cast(bytecode_manipulator.bytecode().data()), bytecode_manipulator.bytecode().size())); DCHECK(!bytecode_string.is_null()); @@ -283,7 +283,7 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { if (has_lnotab) { code->zombie_refs.push_back(ScopedPyObject(code_object->co_lnotab)); - ScopedPyObject lnotab_string(PyString_FromStringAndSize( + ScopedPyObject lnotab_string(PyBytes_FromStringAndSize( reinterpret_cast(bytecode_manipulator.lnotab().data()), bytecode_manipulator.lnotab().size())); DCHECK(!lnotab_string.is_null()); diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 21030a8..79b13d7 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -31,18 +31,31 @@ enum PythonOpcodeType { YIELD_OPCODE }; -// Single Python instruction. There are 3 types of instructions: +// Single Python instruction. +// +// In Python 2.7, there are 3 types of instructions: // 1. Instruction without arguments (takes 1 byte). // 2. Instruction with a single 16 bit argument (takes 3 bytes). // 3. Instruction with a 32 bit argument (very uncommon; takes 6 bytes). +// +// In Python 3.6, there are 4 types of instructions: +// 1. Instructions without arguments, or a 8 bit argument (takes 2 bytes). +// 2. Instructions with a 16 bit argument (takes 4 bytes). +// 3. Instructions with a 24 bit argument (takes 6 bytes). +// 4. Instructions with a 32 bit argument (takes 8 bytes). +// +// To handle 32 bit arguments in Python 2, or 16-32 bit arguments in Python 3, +// a special instruction with an opcode of EXTENDED_ARG is prepended to the +// actual instruction. The argument of the EXTENDED_ARG instruction is combined +// with the argument of the next instruction to form the full argument. struct PythonInstruction { uint8 opcode; uint32 argument; - bool is_extended; + int size; }; // Special pseudo-instruction to indicate failures. -static const PythonInstruction kInvalidInstruction { 0xFF, 0xFFFFFFFF, false }; +static const PythonInstruction kInvalidInstruction { 0xFF, 0xFFFFFFFF, 0 }; // Creates an instance of PythonInstruction for instruction with no arguments. static PythonInstruction PythonInstructionNoArg(uint8 opcode) { @@ -51,7 +64,12 @@ static PythonInstruction PythonInstructionNoArg(uint8 opcode) { PythonInstruction instruction; instruction.opcode = opcode; instruction.argument = 0; - instruction.is_extended = false; + +#if PY_MAJOR_VERSION >= 3 + instruction.size = 2; +#else + instruction.size = 1; +#endif return instruction; } @@ -64,23 +82,22 @@ static PythonInstruction PythonInstructionArg(uint8 opcode, uint32 argument) { PythonInstruction instruction; instruction.opcode = opcode; instruction.argument = argument; - instruction.is_extended = (argument > 0xFFFF); - - return instruction; -} - -// Calculates the number of bytes that an instruction occupies. -static int GetInstructionSize(const PythonInstruction& instruction) { - if (instruction.is_extended) { - return 6; // Extended instruction with a 32 bit argument. - } - - if (HAS_ARG(instruction.opcode)) { - return 3; // Instruction with a single 16 bit argument. +#if PY_MAJOR_VERSION >= 3 + if (argument <= 0xFF) { + instruction.size = 2; + } else if (argument <= 0xFFFF) { + instruction.size = 4; + } else if (argument <= 0xFFFFFF) { + instruction.size = 6; + } else { + instruction.size = 8; } +#else + instruction.size = instruction.argument > 0xFFFF ? 6 : 3; +#endif - return 1; // Instruction without argument. + return instruction; } @@ -89,7 +106,7 @@ static int GetInstructionsSize( const std::vector& instructions) { int size = 0; for (auto it = instructions.begin(); it != instructions.end(); ++it) { - size += GetInstructionSize(*it); + size += it->size; } return size; @@ -126,16 +143,12 @@ static PythonOpcodeType GetOpcodeType(uint8 opcode) { // Gets the target offset of a branch instruction. static int GetBranchTarget(int offset, PythonInstruction instruction) { - const int argument_value = instruction.is_extended - ? static_cast(instruction.argument) - : static_cast(instruction.argument); - switch (GetOpcodeType(instruction.opcode)) { case BRANCH_DELTA_OPCODE: - return offset + GetInstructionSize(instruction) + argument_value; + return offset + instruction.size + instruction.argument; case BRANCH_ABSOLUTE_OPCODE: - return argument_value; + return instruction.argument; default: DCHECK(false) << "Not a branch instruction"; @@ -164,14 +177,35 @@ static void WritePythonBytecodeUInt16( static PythonInstruction ReadInstruction( const std::vector& bytecode, std::vector::const_iterator it) { - PythonInstruction instruction { 0, 0, false }; + PythonInstruction instruction{0, 0, 0}; + +#if PY_MAJOR_VERSION >= 3 + if (bytecode.end() - it < 2) { + LOG(ERROR) << "Buffer underflow"; + return kInvalidInstruction; + } + + while (it[0] == EXTENDED_ARG) { + instruction.argument = instruction.argument << 8 | it[1]; + it += 2; + instruction.size += 2; + if (bytecode.end() - it < 2) { + LOG(ERROR) << "Buffer underflow"; + return kInvalidInstruction; + } + } + instruction.opcode = it[0]; + instruction.argument = instruction.argument << 8 | it[1]; + instruction.size += 2; +#else if (it == bytecode.end()) { LOG(ERROR) << "Buffer underflow"; return kInvalidInstruction; } instruction.opcode = it[0]; + instruction.size = 1; auto it_arg = it + 1; if (instruction.opcode == EXTENDED_ARG) { @@ -186,7 +220,7 @@ static PythonInstruction ReadInstruction( instruction.argument = (static_cast(ReadPythonBytecodeUInt16(it_arg)) << 16) | ReadPythonBytecodeUInt16(it_ext); - instruction.is_extended = true; + instruction.size = 6; } else if (HAS_ARG(instruction.opcode)) { if (bytecode.end() - it < 3) { LOG(ERROR) << "Buffer underflow"; @@ -194,7 +228,9 @@ static PythonInstruction ReadInstruction( } instruction.argument = ReadPythonBytecodeUInt16(it_arg); + instruction.size = 3; } +#endif return instruction; } @@ -206,7 +242,20 @@ static PythonInstruction ReadInstruction( static int WriteInstruction( std::vector::iterator it, const PythonInstruction& instruction) { - if (instruction.is_extended) { +#if PY_MAJOR_VERSION >= 3 + uint32 arg = instruction.argument; + int size_written = 0; + // Start writing backwards from the real instruction, followed by any + // EXTENDED_ARG instructions if needed. + for (int i = instruction.size - 2; i >= 0; i -= 2) { + it[i] = size_written == 0 ? instruction.opcode : EXTENDED_ARG; + it[i + 1] = static_cast(arg); + arg = arg >> 8; + size_written += 2; + } + return size_written; +#else + if (instruction.size == 6) { it[0] = EXTENDED_ARG; WritePythonBytecodeUInt16(it + 1, instruction.argument >> 16); it[3] = instruction.opcode; @@ -227,6 +276,7 @@ static int WriteInstruction( return 1; } +#endif } @@ -238,7 +288,7 @@ static void WriteInstructions( it_instruction != instructions.end(); ++it_instruction) { const int instruction_size = WriteInstruction(it, *it_instruction); - DCHECK_EQ(instruction_size, GetInstructionSize(*it_instruction)); + DCHECK_EQ(instruction_size, it_instruction->size); it += instruction_size; } } @@ -277,7 +327,7 @@ BytecodeManipulator::BytecodeManipulator( break; } - it += GetInstructionSize(instruction); + it += instruction.size; } } @@ -334,65 +384,48 @@ bool BytecodeManipulator::InsertMethodCall( return false; } - const int instruction_size = GetInstructionSize(instruction); - // Fix targets in branch instructions. - switch (instruction.opcode) { - // Delta target argument. - case FOR_ITER: - case JUMP_FORWARD: - case SETUP_LOOP: - case SETUP_EXCEPT: - case SETUP_FINALLY: - case SETUP_WITH: { - int32 delta = instruction.is_extended - ? static_cast(instruction.argument) - : static_cast(instruction.argument); - - int32 target = current_offset + instruction_size + delta; + switch (GetOpcodeType(instruction.opcode)) { + case BRANCH_DELTA_OPCODE: { + int32 delta = static_cast(instruction.argument); + int32 target = current_offset + instruction.size + delta; + if (target > offset) { target += size; } - int32 fixed_delta = target - current_fixed_offset - instruction_size; + int32 fixed_delta = target - current_fixed_offset - instruction.size; if (delta != fixed_delta) { - if (instruction.is_extended) { - instruction.argument = static_cast(fixed_delta); - } else { - if (static_cast(delta) != delta) { - LOG(ERROR) << "Upgrading instruction to extended not supported"; - return false; - } - - instruction.argument = static_cast(fixed_delta); + PythonInstruction new_instruction = + PythonInstructionArg(instruction.opcode, fixed_delta); + if (new_instruction.size != instruction.size) { + LOG(ERROR) << "Upgrading instruction to extended not supported"; + return false; } - WriteInstruction(it, instruction); + WriteInstruction(it, new_instruction); } - break; } - // Absolute target argument. - case JUMP_IF_FALSE_OR_POP: - case JUMP_IF_TRUE_OR_POP: - case JUMP_ABSOLUTE: - case POP_JUMP_IF_FALSE: - case POP_JUMP_IF_TRUE: - case CONTINUE_LOOP: - if (static_cast(instruction.argument) > offset) { - instruction.argument += size; - if (!instruction.is_extended && (instruction.argument > 0xFFFF)) { + case BRANCH_ABSOLUTE_OPCODE: + if (static_cast(instruction.argument) > offset) { + PythonInstruction new_instruction = PythonInstructionArg( + instruction.opcode, instruction.argument + size); + if (new_instruction.size != instruction.size) { LOG(ERROR) << "Upgrading instruction to extended not supported"; return false; } - WriteInstruction(it, instruction); + WriteInstruction(it, new_instruction); } break; + + default: + break; } - it += instruction_size; + it += instruction.size; } if (!offset_valid) { @@ -401,7 +434,7 @@ bool BytecodeManipulator::InsertMethodCall( } // Insert the bytecode to invoke the callable. - data->bytecode.insert(data->bytecode.begin() + offset, size, STOP_CODE); + data->bytecode.insert(data->bytecode.begin() + offset, size, NOP); WriteInstructions(data->bytecode.begin() + offset, method_call_instructions); // Insert a new entry into line table to account for the new bytecode. @@ -436,17 +469,13 @@ bool BytecodeManipulator::AppendMethodCall( BytecodeManipulator::Data* data, int offset, int const_index) const { - PythonInstruction trampoline; - trampoline.opcode = JUMP_ABSOLUTE; - trampoline.is_extended = false; - trampoline.argument = data->bytecode.size(); - - const int trampoline_size = GetInstructionSize(trampoline); + PythonInstruction trampoline = + PythonInstructionArg(JUMP_ABSOLUTE, data->bytecode.size()); std::vector relocated_instructions; int relocated_size = 0; for (auto it = data->bytecode.begin() + offset; - relocated_size < trampoline_size; ) { + relocated_size < trampoline.size; ) { if (it >= data->bytecode.end()) { LOG(ERROR) << "Not enough instructions"; return false; @@ -476,8 +505,8 @@ bool BytecodeManipulator::AppendMethodCall( } relocated_instructions.push_back(instruction); - relocated_size += GetInstructionSize(instruction); - it += GetInstructionSize(instruction); + relocated_size += instruction.size; + it += instruction.size; } for (auto it = data->bytecode.begin(); it < data->bytecode.end(); ) { @@ -501,7 +530,7 @@ bool BytecodeManipulator::AppendMethodCall( // Suppose we insert breakpoint into offset 1. The new bytecode will be: // 0 LOAD_CONST 6 // 1 JUMP_ABSOLUTE 100 - // 4 STOP_CODE + // 4 NOP // 5 ... // ... // 100 NOP # First relocated instruction. @@ -522,7 +551,7 @@ bool BytecodeManipulator::AppendMethodCall( } } - it += GetInstructionSize(instruction); + it += instruction.size; } std::vector appendix = BuildMethodCall(const_index); @@ -542,14 +571,12 @@ bool BytecodeManipulator::AppendMethodCall( // Insert jump to trampoline. WriteInstruction(data->bytecode.begin() + offset, trampoline); std::fill( - data->bytecode.begin() + offset + trampoline_size, + data->bytecode.begin() + offset + trampoline.size, data->bytecode.begin() + offset + relocated_size, - STOP_CODE); + NOP); return true; } } // namespace cdbg } // namespace devtools - - diff --git a/src/googleclouddebugger/bytecode_manipulator.h b/src/googleclouddebugger/bytecode_manipulator.h index 7d88a15..3177bdd 100644 --- a/src/googleclouddebugger/bytecode_manipulator.h +++ b/src/googleclouddebugger/bytecode_manipulator.h @@ -50,7 +50,7 @@ namespace cdbg { // For example consider this Python code: // def test(): // yield 'hello' -// It's bytecode without any breakpoints is: +// Its bytecode without any breakpoints is: // 0 LOAD_CONST 1 ('hello') // 3 YIELD_VALUE // 4 POP_TOP @@ -61,8 +61,8 @@ namespace cdbg { // 3 YIELD_VALUE // 4 POP_TOP // 5 LOAD_CONST 0 (None) -// 9 LOAD_CONST 2 (cdbg_native._Callback) // 8 RETURN_VALUE +// 9 LOAD_CONST 2 (cdbg_native._Callback) // 12 CALL_FUNCTION 0 // 15 POP_TOP // 16 LOAD_CONST 1 ('hello') diff --git a/src/googleclouddebugger/common.h b/src/googleclouddebugger/common.h index 2cd1ed5..98d393f 100644 --- a/src/googleclouddebugger/common.h +++ b/src/googleclouddebugger/common.h @@ -61,4 +61,21 @@ using google::AddLogSink; using google::RemoveLogSink; +// Python 3 compatibility +#if PY_MAJOR_VERSION >= 3 +// Python 2 has both an 'int' and a 'long' type, and Python 3 only as an 'int' +// type which is the equivalent of Python 2's 'long'. +// PyInt* functions will refer to 'int' in Python 2 and 3. + #define PyInt_FromLong PyLong_FromLong + #define PyInt_AsLong PyLong_AsLong + #define PyInt_CheckExact PyLong_CheckExact + +// Python 3's 'bytes' type is the equivalent of Python 2's 'str' type, which are +// byte arrays. Python 3's 'str' type represents a unicode string. +// In this codebase: +// PyString* functions will refer to 'str' in Python 2 and 3. +// PyBytes* functions will refer to 'str' in Python 2 and 'bytes' in Python 3. + #define PyString_AsString PyUnicode_AsUTF8 +#endif + #endif // DEVTOOLS_CDBG_DEBUGLETS_PYTHON_COMMON_H_ diff --git a/src/googleclouddebugger/conditional_breakpoint.cc b/src/googleclouddebugger/conditional_breakpoint.cc index b7e9bec..eed062c 100644 --- a/src/googleclouddebugger/conditional_breakpoint.cc +++ b/src/googleclouddebugger/conditional_breakpoint.cc @@ -69,7 +69,11 @@ bool ConditionalBreakpoint::EvaluateCondition(PyFrameObject* frame) { { ScopedImmutabilityTracer immutability_tracer; result.reset(PyEval_EvalCode( +#if PY_MAJOR_VERSION >= 3 + reinterpret_cast(condition_.get()), +#else condition_.get(), +#endif frame->f_globals, frame->f_locals)); is_mutable_code_detected = immutability_tracer.IsMutableCodeDetected(); diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index 22d1d1f..fab0861 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -251,9 +251,9 @@ void ImmutabilityTracer::VerifyCodeObject(ScopedPyCodeObject code_object) { void ImmutabilityTracer::ProcessCodeLine( PyCodeObject* code_object, int line_number) { - int size = PyString_Size(code_object->co_code); + int size = PyBytes_Size(code_object->co_code); const uint8* opcodes = - reinterpret_cast(PyString_AsString(code_object->co_code)); + reinterpret_cast(PyBytes_AsString(code_object->co_code)); DCHECK(opcodes != nullptr); @@ -263,6 +263,7 @@ void ImmutabilityTracer::ProcessCodeLine( do { if (start_offset != -1) { ProcessCodeRange( + opcodes, opcodes + start_offset, enumerator.offset() - start_offset); start_offset = -1; @@ -274,189 +275,237 @@ void ImmutabilityTracer::ProcessCodeLine( } while (enumerator.Next()); if (start_offset != -1) { - ProcessCodeRange(opcodes + start_offset, size - start_offset); + ProcessCodeRange(opcodes, opcodes + start_offset, size - start_offset); } } +enum OpcodeMutableStatus { + OPCODE_MUTABLE, + OPCODE_NOT_MUTABLE, + OPCODE_MAYBE_MUTABLE +}; + +static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { + // Notes: + // * We allow changing local variables (i.e. STORE_FAST). Expression + // evaluation doesn't let changing local variables of the top frame + // because we use "Py_eval_input" when compiling the expression. Methods + // invoked by an expression can freely change local variables as it + // doesn't change the state of the program once the method exits. + // * We let opcodes calling methods like "PyObject_Repr". These will either + // be completely executed inside Python interpreter (with no side + // effects), or call object method (e.g. "__repr__"). In this case the + // tracer will kick in and will verify that the method has no side + // effects. + switch (opcode) { + case POP_TOP: + case ROT_TWO: + case ROT_THREE: + case DUP_TOP: + case NOP: + case UNARY_POSITIVE: + case UNARY_NEGATIVE: + case UNARY_INVERT: + case BINARY_POWER: + case BINARY_MULTIPLY: + case BINARY_MODULO: + case BINARY_ADD: + case BINARY_SUBTRACT: + case BINARY_SUBSCR: + case BINARY_FLOOR_DIVIDE: + case BINARY_TRUE_DIVIDE: + case INPLACE_FLOOR_DIVIDE: + case INPLACE_TRUE_DIVIDE: + case INPLACE_ADD: + case INPLACE_SUBTRACT: + case INPLACE_MULTIPLY: + case INPLACE_MODULO: + case BINARY_LSHIFT: + case BINARY_RSHIFT: + case BINARY_AND: + case BINARY_XOR: + case INPLACE_POWER: + case GET_ITER: + case INPLACE_LSHIFT: + case INPLACE_RSHIFT: + case INPLACE_AND: + case INPLACE_XOR: + case INPLACE_OR: + case BREAK_LOOP: + case RETURN_VALUE: + case YIELD_VALUE: + case POP_BLOCK: + case UNPACK_SEQUENCE: + case FOR_ITER: + case LOAD_CONST: + case LOAD_NAME: + case BUILD_TUPLE: + case BUILD_LIST: + case BUILD_SET: + case BUILD_MAP: + case LOAD_ATTR: + case COMPARE_OP: + case JUMP_FORWARD: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: + case POP_JUMP_IF_TRUE: + case POP_JUMP_IF_FALSE: + case LOAD_GLOBAL: + case CONTINUE_LOOP: + case SETUP_LOOP: + case LOAD_FAST: + case STORE_FAST: + case DELETE_FAST: + case CALL_FUNCTION: + case MAKE_FUNCTION: + case BUILD_SLICE: + case LOAD_DEREF: + case CALL_FUNCTION_KW: + case EXTENDED_ARG: +#if PY_MAJOR_VERSION >= 3 + case DUP_TOP_TWO: + case BINARY_MATRIX_MULTIPLY: + case INPLACE_MATRIX_MULTIPLY: + case GET_YIELD_FROM_ITER: + case YIELD_FROM: + case UNPACK_EX: + case CALL_FUNCTION_EX: + case LOAD_CLASSDEREF: + case BUILD_LIST_UNPACK: + case BUILD_MAP_UNPACK: + case BUILD_MAP_UNPACK_WITH_CALL: + case BUILD_TUPLE_UNPACK: + case BUILD_SET_UNPACK: + case FORMAT_VALUE: + case BUILD_CONST_KEY_MAP: + case BUILD_STRING: + case BUILD_TUPLE_UNPACK_WITH_CALL: +#else + case ROT_FOUR: + case DUP_TOPX: + case UNARY_NOT: + case UNARY_CONVERT: + case BINARY_DIVIDE: + case BINARY_OR: + case INPLACE_DIVIDE: + case SLICE+0: + case SLICE+1: + case SLICE+2: + case SLICE+3: + case LOAD_LOCALS: + case EXEC_STMT: + case JUMP_ABSOLUTE: + case CALL_FUNCTION_VAR: + case CALL_FUNCTION_VAR_KW: + case MAKE_CLOSURE: +#endif + return OPCODE_NOT_MUTABLE; + + case PRINT_EXPR: + case STORE_GLOBAL: + case DELETE_GLOBAL: + case IMPORT_STAR: + case IMPORT_NAME: + case IMPORT_FROM: + case SETUP_EXCEPT: + case SETUP_FINALLY: + // TODO(xinghuadou): allow changing fields of locally created objects/lists. + case STORE_SUBSCR: + case DELETE_SUBSCR: + case STORE_NAME: + case DELETE_NAME: + case STORE_ATTR: + case DELETE_ATTR: + case LIST_APPEND: + case SET_ADD: + case MAP_ADD: + case STORE_DEREF: + // TODO(xinghuadou): allow exception handling + case RAISE_VARARGS: + case END_FINALLY: + case SETUP_WITH: + // TODO(xinghuadou): allow closures + case LOAD_CLOSURE: +#if PY_MAJOR_VERSION >= 3 + case GET_AITER: + case GET_ANEXT: + case BEFORE_ASYNC_WITH: + case LOAD_BUILD_CLASS: + case GET_AWAITABLE: + case WITH_CLEANUP_START: + case WITH_CLEANUP_FINISH: + case SETUP_ANNOTATIONS: + case POP_EXCEPT: + case STORE_ANNOTATION: + case DELETE_DEREF: + case SETUP_ASYNC_WITH: +#else + case STORE_SLICE+0: + case STORE_SLICE+1: + case STORE_SLICE+2: + case STORE_SLICE+3: + case DELETE_SLICE+0: + case DELETE_SLICE+1: + case DELETE_SLICE+2: + case DELETE_SLICE+3: + case STORE_MAP: + case PRINT_ITEM_TO: + case PRINT_ITEM: + case PRINT_NEWLINE_TO: + case PRINT_NEWLINE: + case BUILD_CLASS: + case WITH_CLEANUP: +#endif + return OPCODE_MUTABLE; + + default: + return OPCODE_MAYBE_MUTABLE; + } +} -void ImmutabilityTracer::ProcessCodeRange(const uint8* opcodes, int size) { +void ImmutabilityTracer::ProcessCodeRange(const uint8* code_start, + const uint8* opcodes, int size) { const uint8* end = opcodes + size; while (opcodes < end) { // Read opcode. const uint8 opcode = *opcodes; - ++opcodes; - - if (HAS_ARG(opcode)) { - DCHECK_LE(opcodes + 2, end); - opcodes += 2; - - // Opcode argument is: - // (static_cast(opcodes[1]) << 8) | opcodes[0]; - // and can extend to 32 bit if EXTENDED_ARG is used. - } - - // Notes: - // * We allow changing local variables (i.e. STORE_FAST). Expression - // evaluation doesn't let changing local variables of the top frame - // because we use "Py_eval_input" when compiling the expression. Methods - // invoked by an expression can freely change local variables as it - // doesn't change the state of the program once the method exits. - // * We let opcodes calling methods like "PyObject_Repr". These will either - // be completely executed inside Python interpreter (with no side - // effects), or call object method (e.g. "__repr__"). In this case the - // tracer will kick in and will verify that the method has no side - // effects. - switch (opcode) { - case NOP: - case LOAD_FAST: - case LOAD_CONST: - case STORE_FAST: - case POP_TOP: - case ROT_TWO: - case ROT_THREE: - case ROT_FOUR: - case DUP_TOP: - case DUP_TOPX: - case UNARY_POSITIVE: - case UNARY_NEGATIVE: - case UNARY_NOT: - case UNARY_CONVERT: - case UNARY_INVERT: - case BINARY_POWER: - case BINARY_MULTIPLY: - case BINARY_DIVIDE: - case BINARY_TRUE_DIVIDE: - case BINARY_FLOOR_DIVIDE: - case BINARY_MODULO: - case BINARY_ADD: - case BINARY_SUBTRACT: - case BINARY_SUBSCR: - case BINARY_LSHIFT: - case BINARY_RSHIFT: - case BINARY_AND: - case BINARY_XOR: - case BINARY_OR: - case INPLACE_POWER: - case INPLACE_MULTIPLY: - case INPLACE_DIVIDE: - case INPLACE_TRUE_DIVIDE: - case INPLACE_FLOOR_DIVIDE: - case INPLACE_MODULO: - case INPLACE_ADD: - case INPLACE_SUBTRACT: - case INPLACE_LSHIFT: - case INPLACE_RSHIFT: - case INPLACE_AND: - case INPLACE_XOR: - case INPLACE_OR: - case SLICE+0: - case SLICE+1: - case SLICE+2: - case SLICE+3: - case LOAD_LOCALS: - case RETURN_VALUE: - case YIELD_VALUE: - case EXEC_STMT: - case UNPACK_SEQUENCE: - case LOAD_NAME: - case LOAD_GLOBAL: - case DELETE_FAST: - case LOAD_DEREF: - case BUILD_TUPLE: - case BUILD_LIST: - case BUILD_SET: - case BUILD_MAP: - case LOAD_ATTR: - case COMPARE_OP: - case JUMP_FORWARD: - case POP_JUMP_IF_FALSE: - case POP_JUMP_IF_TRUE: - case JUMP_IF_FALSE_OR_POP: - case JUMP_IF_TRUE_OR_POP: - case JUMP_ABSOLUTE: - case GET_ITER: - case FOR_ITER: - case BREAK_LOOP: - case CONTINUE_LOOP: - case SETUP_LOOP: - case CALL_FUNCTION: - case CALL_FUNCTION_VAR: - case CALL_FUNCTION_KW: - case CALL_FUNCTION_VAR_KW: - case MAKE_FUNCTION: - case MAKE_CLOSURE: - case BUILD_SLICE: - case POP_BLOCK: + switch (IsOpcodeMutable(opcode)) { + case OPCODE_NOT_MUTABLE: + // We don't worry about the sizes of instructions with EXTENDED_ARG. + // The argument does not really matter and so EXTENDED_ARGs can be + // treated as just another instruction with an opcode. +#if PY_MAJOR_VERSION >= 3 + opcodes += 2; +#else + opcodes += HAS_ARG(opcode) ? 3 : 1; +#endif + DCHECK_LE(opcodes, end); break; - case EXTENDED_ARG: - // Go to the next opcode. The argument is going to be incorrect, - // but we don't really care. - break; - - // TODO(vlif): allow changing fields of locally created objects/lists. - case LIST_APPEND: - case SET_ADD: - case STORE_SLICE+0: - case STORE_SLICE+1: - case STORE_SLICE+2: - case STORE_SLICE+3: - case DELETE_SLICE+0: - case DELETE_SLICE+1: - case DELETE_SLICE+2: - case DELETE_SLICE+3: - case STORE_SUBSCR: - case DELETE_SUBSCR: - case STORE_NAME: - case DELETE_NAME: - case STORE_ATTR: - case DELETE_ATTR: - case STORE_DEREF: - case STORE_MAP: - case MAP_ADD: - mutable_code_detected_ = true; - return; - - case STORE_GLOBAL: - case DELETE_GLOBAL: - mutable_code_detected_ = true; - return; - - case PRINT_EXPR: - case PRINT_ITEM_TO: - case PRINT_ITEM: - case PRINT_NEWLINE_TO: - case PRINT_NEWLINE: - mutable_code_detected_ = true; - return; - - case BUILD_CLASS: - mutable_code_detected_ = true; - return; - - case IMPORT_NAME: - case IMPORT_STAR: - case IMPORT_FROM: - case SETUP_EXCEPT: - case SETUP_FINALLY: - case WITH_CLEANUP: - mutable_code_detected_ = true; - return; - - // TODO(vlif): allow exception handling. - case RAISE_VARARGS: - case END_FINALLY: - case SETUP_WITH: - mutable_code_detected_ = true; - return; - - // TODO(vlif): allow closures. - case LOAD_CLOSURE: + case OPCODE_MAYBE_MUTABLE: +#if PY_MAJOR_VERSION >= 3 + if (opcode == JUMP_ABSOLUTE) { + // Check for a jump to itself, which happens in "while True: pass". + // The tracer won't call our tracing function unless there is a jump + // backwards, or we reached a new line. In this case neither of those + // ever happens, so we can't rely on our tracing function to detect + // infinite loops. + // In this case EXTENDED_ARG doesn't matter either because if this + // instruction had one it would jump backwards and be caught tracing. + if (opcodes - code_start == opcodes[1]) { + mutable_code_detected_ = true; + return; + } + opcodes += 2; + DCHECK_LE(opcodes, end); + break; + } +#endif + LOG(WARNING) << "Unknown opcode " << static_cast(opcode); mutable_code_detected_ = true; return; - default: - LOG(WARNING) << "Unknown opcode " << static_cast(opcode); + case OPCODE_MUTABLE: mutable_code_detected_ = true; return; } @@ -505,4 +554,3 @@ void ImmutabilityTracer::SetMutableCodeException() { } // namespace cdbg } // namespace devtools - diff --git a/src/googleclouddebugger/immutability_tracer.h b/src/googleclouddebugger/immutability_tracer.h index 0035d94..49b351f 100644 --- a/src/googleclouddebugger/immutability_tracer.h +++ b/src/googleclouddebugger/immutability_tracer.h @@ -78,7 +78,8 @@ class ImmutabilityTracer { void ProcessCodeLine(PyCodeObject* code_object, int line_number); // Verifies immutability of block of opcodes. - void ProcessCodeRange(const uint8* opcodes, int size); + void ProcessCodeRange(const uint8* code_start, const uint8* opcodes, + int size); // Verifies that the called C function is whitelisted. void ProcessCCall(PyObject* function); diff --git a/src/googleclouddebugger/native_module.cc b/src/googleclouddebugger/native_module.cc index 4a11b26..f1e1ced 100644 --- a/src/googleclouddebugger/native_module.cc +++ b/src/googleclouddebugger/native_module.cc @@ -316,12 +316,16 @@ static PyObject* CallImmutable(PyObject* self, PyObject* py_args) { } PyFrameObject* frame = reinterpret_cast(obj_frame); - PyCodeObject* code = reinterpret_cast(obj_code); PyFrame_FastToLocals(frame); ScopedImmutabilityTracer immutability_tracer; - return PyEval_EvalCode(code, frame->f_globals, frame->f_locals); +#if PY_MAJOR_VERSION >= 3 + return PyEval_EvalCode(obj_code, frame->f_globals, frame->f_locals); +#else + return PyEval_EvalCode(reinterpret_cast(obj_code), + frame->f_globals, frame->f_locals); +#endif } // Applies the dynamic logs quota, which is limited by both total messages and @@ -403,17 +407,34 @@ static PyMethodDef g_module_functions[] = { }; -void InitDebuggerNativeModule() { +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, /** m_base */ + CDBG_MODULE_NAME, /** m_name */ + "Native module for Python Cloud Debugger", /** m_doc */ + -1, /** m_size */ + g_module_functions, /** m_methods */ + NULL, /** m_slots */ + NULL, /** m_traverse */ + NULL, /** m_clear */ + NULL /** m_free */ +}; + +PyObject* InitDebuggerNativeModuleInternal() { + PyObject* module = PyModule_Create(&moduledef); +#else +PyObject* InitDebuggerNativeModuleInternal() { PyObject* module = Py_InitModule3( CDBG_MODULE_NAME, g_module_functions, "Native module for Python Cloud Debugger"); +#endif SetDebugletModule(module); if (!RegisterPythonType() || !RegisterPythonType()) { - return; + return nullptr; } // Add constants we want to share with the Python code. @@ -424,17 +445,28 @@ void InitDebuggerNativeModule() { PyInt_FromLong(kIntegerConstants[i].value))) { LOG(ERROR) << "Failed to constant " << kIntegerConstants[i].name << " to native module"; - return; + return nullptr; } } + + return module; +} + +void InitDebuggerNativeModule() { + InitDebuggerNativeModuleInternal(); } } // namespace cdbg } // namespace devtools - // This function is called to initialize the module. +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC PyInit_cdbg_native() { + return devtools::cdbg::InitDebuggerNativeModuleInternal(); +} +#else PyMODINIT_FUNC initcdbg_native() { devtools::cdbg::InitDebuggerNativeModule(); } +#endif diff --git a/src/googleclouddebugger/python_util.cc b/src/googleclouddebugger/python_util.cc index 7083c1a..d86d8b9 100644 --- a/src/googleclouddebugger/python_util.cc +++ b/src/googleclouddebugger/python_util.cc @@ -46,9 +46,9 @@ void CodeObjectLinesEnumerator::Initialize( PyObject* lnotab) { offset_ = 0; line_number_ = firstlineno; - remaining_entries_ = PyString_Size(lnotab) / 2; + remaining_entries_ = PyBytes_Size(lnotab) / 2; next_entry_ = - reinterpret_cast(PyString_AsString(lnotab)); + reinterpret_cast(PyBytes_AsString(lnotab)); // If the line table starts with offset 0, the first line is not // "code_object->co_firstlineno", but the following line. @@ -102,8 +102,12 @@ void SetDebugletModule(PyObject* module) { PyTypeObject DefaultTypeDefinition(const char* type_name) { return { +#if PY_MAJOR_VERSION >= 3 + PyVarObject_HEAD_INIT(nullptr, /* ob_size */ 0) +#else PyObject_HEAD_INIT(nullptr) 0, /* ob_size */ +#endif type_name, /* tp_name */ 0, /* tp_basicsize */ 0, /* tp_itemsize */ @@ -231,8 +235,8 @@ string CodeObjectDebugString(PyCodeObject* code_object) { string str; if ((code_object->co_name != nullptr) && - PyString_CheckExact(code_object->co_name)) { - str += PyString_AS_STRING(code_object->co_name); + PyBytes_CheckExact(code_object->co_name)) { + str += PyBytes_AS_STRING(code_object->co_name); } else { str += ""; } @@ -241,21 +245,21 @@ string CodeObjectDebugString(PyCodeObject* code_object) { str += std::to_string(static_cast(code_object->co_firstlineno)); if ((code_object->co_filename != nullptr) && - PyString_CheckExact(code_object->co_filename)) { + PyBytes_CheckExact(code_object->co_filename)) { str += " at "; - str += PyString_AS_STRING(code_object->co_filename); + str += PyBytes_AS_STRING(code_object->co_filename); } return str; } -std::vector PyStringToByteArray(PyObject* obj) { - DCHECK(PyString_CheckExact(obj)); +std::vector PyBytesToByteArray(PyObject* obj) { + DCHECK(PyBytes_CheckExact(obj)); - const size_t bytecode_size = PyString_GET_SIZE(obj); + const size_t bytecode_size = PyBytes_GET_SIZE(obj); const uint8* const bytecode_data = - reinterpret_cast(PyString_AS_STRING(obj)); + reinterpret_cast(PyBytes_AS_STRING(obj)); return std::vector(bytecode_data, bytecode_data + bytecode_size); } diff --git a/src/googleclouddebugger/python_util.h b/src/googleclouddebugger/python_util.h index 1275194..7ab71c2 100644 --- a/src/googleclouddebugger/python_util.h +++ b/src/googleclouddebugger/python_util.h @@ -307,7 +307,7 @@ string CodeObjectDebugString(PyCodeObject* code_object); // Reads Python string as a byte array. The function does not verify that // "obj" is of a string type. -std::vector PyStringToByteArray(PyObject* obj); +std::vector PyBytesToByteArray(PyObject* obj); // Creates a new tuple by appending "items" to elements in "tuple". ScopedPyObject AppendTuple( From 27f642ef9825acc6251a72d97bbd8a807cac6b11 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Wed, 21 Feb 2018 11:11:46 -0800 Subject: [PATCH 108/241] More Python 3 compatibility fixes. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=186490046 --- src/googleclouddebugger/__init__.py | 4 ++-- src/googleclouddebugger/bytecode_manipulator.cc | 2 ++ src/googleclouddebugger/gcp_hub_client.py | 10 +++++----- src/googleclouddebugger/uniquifier_computer.py | 8 ++++---- src/setup.py | 9 +++++++-- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 1f6983f..becf5e4 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -33,7 +33,7 @@ from . import gcp_hub_client from . import glob_data_visibility_policy from . import yaml_data_visibility_config_reader -import cdbg_native +from . import cdbg_native from . import version __version__ = version.__version__ @@ -143,7 +143,7 @@ def _DebuggerMain(): sys.modules['__main__'] = __main__ - exec 'execfile(%r)' % app_path in globals, locals # pylint: disable=exec-used + exec('execfile(%r)' % app_path, globals, locals) # pylint: disable=exec-used # pylint: disable=invalid-name diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 79b13d7..10ab8c2 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -157,6 +157,7 @@ static int GetBranchTarget(int offset, PythonInstruction instruction) { } +#if PY_MAJOR_VERSION < 3 // Reads 16 bit value according to Python bytecode encoding. static uint16 ReadPythonBytecodeUInt16(std::vector::const_iterator it) { return it[0] | (static_cast(it[1]) << 8); @@ -170,6 +171,7 @@ static void WritePythonBytecodeUInt16( it[0] = static_cast(data); it[1] = data >> 8; } +#endif // Read instruction at the specified offset. Returns kInvalidInstruction diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 779e8e1..bb8dd45 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -30,7 +30,6 @@ import apiclient from apiclient import discovery # pylint: disable=unused-import -from backoff import Backoff import httplib2 import oauth2client from oauth2client import service_account @@ -38,6 +37,7 @@ import six from . import labels +from . import backoff from . import cdbg_native as native from . import uniquifier_computer from . import version @@ -132,9 +132,9 @@ def filter(self, record): # # Delay before retrying failed request. - self.register_backoff = Backoff() # Register debuggee. - self.list_backoff = Backoff() # Query active breakpoints. - self.update_backoff = Backoff() # Update breakpoint. + self.register_backoff = backoff.Backoff() # Register debuggee. + self.list_backoff = backoff.Backoff() # Query active breakpoints. + self.update_backoff = backoff.Backoff() # Update breakpoint. # Maximum number of times that the message is re-transmitted before it # is assumed to be poisonous and discarded @@ -365,7 +365,7 @@ def _ListActiveBreakpoints(self, service): if not response.get('waitExpired'): self._wait_token = response.get('nextWaitToken') breakpoints = response.get('breakpoints') or [] - if cmp(self._breakpoints, breakpoints) != 0: + if self._breakpoints != breakpoints: self._breakpoints = breakpoints native.LogInfo( 'Breakpoints list changed, %d active, wait token: %s' % ( diff --git a/src/googleclouddebugger/uniquifier_computer.py b/src/googleclouddebugger/uniquifier_computer.py index 2b307f1..873b110 100644 --- a/src/googleclouddebugger/uniquifier_computer.py +++ b/src/googleclouddebugger/uniquifier_computer.py @@ -106,12 +106,12 @@ def IsPackage(path): def ProcessApplicationFile(path, relative_path): """Updates the hash with the specified application file.""" - hash_obj.update(relative_path) - hash_obj.update(':') + hash_obj.update(relative_path.encode()) + hash_obj.update(':'.encode()) try: - hash_obj.update(str(os.stat(path).st_size)) + hash_obj.update(str(os.stat(path).st_size).encode()) except BaseException: pass - hash_obj.update('\n') + hash_obj.update('\n'.encode()) ProcessDirectory(sys.path[0], '') diff --git a/src/setup.py b/src/setup.py index 9f292ae..98b4667 100644 --- a/src/setup.py +++ b/src/setup.py @@ -14,7 +14,12 @@ """Python Cloud Debugger build and packaging script.""" -import ConfigParser +# pylint: disable=g-statement-before-imports,g-import-not-at-top +try: + from ConfigParser import ConfigParser # Python 2 +except ImportError: + from configparser import ConfigParser # Python 3 +# pylint: enable=g-statement-before-imports,g-import-not-at-top from glob import glob import os import re @@ -34,7 +39,7 @@ def RemovePrefixes(optlist, bad_prefixes): def ReadConfig(section, value, default): try: - config = ConfigParser.ConfigParser() + config = ConfigParser() config.read('setup.cfg') return config.get(section, value) except: # pylint: disable=bare-except From 247e1b927f335ec0ab9b6b5edf410b14d98ab5b1 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Wed, 21 Feb 2018 17:56:46 -0800 Subject: [PATCH 109/241] Introduce new method call bytecode insert algorithm for Python 3 that allows upgrading instructions to use EXTENDED_ARG. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=186548912 --- .../bytecode_manipulator.cc | 252 ++++++++++++++++-- 1 file changed, 235 insertions(+), 17 deletions(-) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 10ab8c2..9fd4a7b 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -179,7 +179,7 @@ static void WritePythonBytecodeUInt16( static PythonInstruction ReadInstruction( const std::vector& bytecode, std::vector::const_iterator it) { - PythonInstruction instruction{0, 0, 0}; + PythonInstruction instruction { 0, 0, 0 }; #if PY_MAJOR_VERSION >= 3 if (bytecode.end() - it < 2) { @@ -360,6 +360,238 @@ bool BytecodeManipulator::InjectMethodCall( } +// Inserts an entry into the line number table for an insertion in the bytecode. +static void InsertAndUpdateLnotab(int offset, int size, + std::vector *lnotab) { + int current_offset = 0; + for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) { + current_offset += it[0]; + + if (current_offset >= offset) { + int remaining_size = size; + while (remaining_size > 0) { + const int current_size = std::min(remaining_size, 0xFF); + it = lnotab->insert(it, static_cast(current_size)) + 1; + it = lnotab->insert(it, 0) + 1; + remaining_size -= current_size; + } + return; + } + } +} + + +// Use different algorithms to insert method calls for Python 2 and 3. +// Technically the algorithm for Python 3 will work with Python 2, but because +// it is more complicated and the issue of needing to upgrade branch +// instructions to use EXTENDED_ARG is less common, we stick with the existing +// algorithm for better safety. + + +#if PY_MAJOR_VERSION >= 3 + + +// Represents a branch instruction in the original bytecode that may need to +// have its offsets fixed and/or upgraded to use EXTENDED_ARG. +struct UpdatedInstruction { + PythonInstruction instruction; + size_t original_size; + int current_offset; +}; + + +// Represents space that needs to be reserved for an insertion operation. +struct Insertion { + int size; + int current_offset; +}; + +// Max number of outer loop iterations to do before failing in +// InsertAndUpdateBranchInstructions. +static const int kMaxInsertionIterations = 10; + +// Reserves space for instructions to be inserted into the bytecode, and +// calculates the new offsets and arguments of branch instructions. +// Returns true if the calculation was successful, and false if too many +// iterations were needed. +// +// When inserting some space for the method call bytecode, branch instructions +// may need to have their offsets updated. Some cases might require branch +// instructions to be 'upgraded' to use EXTENDED_ARG if the new argument crosses +// the argument value limit for its current size.. This in turn will require +// another insertion and possibly further updates. +// +// It won't be manageable to update the bytecode in place in such cases, as when +// performing an insertion we might need to perform more insertions and quickly +// lose our place. +// +// Instead, we perform process insertion operations one at a time, starting from +// the original argument. While processing an operation, if an instruction needs +// to be upgraded to use EXTENDED_ARG, then another insertion operation is +// pushed on the stack to be processed later. +// +// Example: +// Suppose we need to reserve space for 6 bytes at offset 40. We have a +// JUMP_ABSOLUTE 250 instruction at offset 0, and a JUMP_FORWARD 2 instruction +// at offset 40. +// insertions: [{6, 40}] +// instructions: [{JUMP_ABSOLUTE 250, 0}, {JUMP_FORWARD 2, 40}] +// +// The JUMP_ABSOLUTE argument needs to be moved forward to 256, since the +// insertion occurs before the target. This requires an EXTENDED_ARG, so another +// insertion operation with size=2 at offset=0 is pushed. +// The JUMP_FORWARD instruction will be after the space reserved, so we need to +// update its current offset to now be 46. The argument does not need to be +// changed, as the insertion is not between its offset and target. +// insertions: [{2, 0}] +// instructions: [{JUMP_ABSOLUTE 256, 0}, {JUMP_FORWARD 2, 46}] +// +// For the next insertion, The JUMP_ABSOLUTE instruction's offset does not +// change, since it has the same offset as the insertion, signaling that the +// insertion is for the instruction itself. The argument gets updated to 258 to +// account for the additional space. The JUMP_FORWARD instruction's offset needs +// to be updated, but not its argument, for the same reason as before. +// insertions: [] +// instructions: [{JUMP_ABSOLUTE 258, 0}, {JUMP_FORWARD 2, 48}] +// +// There are no more insertions so we are done. +static bool InsertAndUpdateBranchInstructions( + Insertion insertion, std::vector& instructions) { + std::vector insertions { insertion }; + + int iterations = 0; + while (insertions.size() && iterations < kMaxInsertionIterations) { + insertion = insertions.back(); + insertions.pop_back(); + + // Update the offsets of all insertions after. + for (auto it = insertions.begin(); it < insertions.end(); it++) { + if (it->current_offset >= insertion.current_offset) { + it->current_offset += insertion.size; + } + } + + // Update the offsets and arguments of the branches. + for (auto it = instructions.begin(); + it < instructions.end(); it++) { + PythonInstruction instruction = it->instruction; + uint32 arg = instruction.argument; + bool need_to_update = false; + PythonOpcodeType opcode_type = GetOpcodeType(instruction.opcode); + if (opcode_type == BRANCH_DELTA_OPCODE) { + // For relative branches, the argument needs to be updated if the + // insertion is between the instruction and the target. + int32 target = it->current_offset + instruction.size + arg; + need_to_update = it->current_offset < insertion.current_offset && + insertion.current_offset < target; + } else if (opcode_type == BRANCH_ABSOLUTE_OPCODE) { + // For absolute branches, the argument needs to be updated if the + // insertion before the target. + need_to_update = insertion.current_offset < arg; + } + + // If we are inserting the original method call instructions, we want to + // update the current_offset of any instructions at or after. If we are + // doing an EXTENDED_ARG insertion, we don't want to update the offset of + // instructions right at the offset, because that is the original + // instruction that the EXTENDED_ARG is for. + int offset_diff = it->current_offset - insertion.current_offset; + if ((iterations == 0 && offset_diff >= 0) || (offset_diff > 0)) { + it->current_offset += insertion.size; + } + + if (need_to_update) { + PythonInstruction new_instruction = + PythonInstructionArg(instruction.opcode, arg + insertion.size); + int size_diff = new_instruction.size - instruction.size; + if (size_diff > 0) { + insertions.push_back(Insertion { size_diff, it->current_offset }); + } + it->instruction = new_instruction; + } + } + iterations++; + } + + return insertions.size() == 0; +} + + +bool BytecodeManipulator::InsertMethodCall( + BytecodeManipulator::Data* data, + int offset, + int const_index) const { + std::vector updated_instructions; + bool offset_valid = false; + + // Gather all branch instructions. + for (auto it = data->bytecode.begin(); it < data->bytecode.end();) { + int current_offset = it - data->bytecode.begin(); + if (current_offset == offset) { + DCHECK(!offset_valid) << "Each offset should be visited only once"; + offset_valid = true; + } + + PythonInstruction instruction = ReadInstruction(data->bytecode, it); + if (instruction.opcode == kInvalidInstruction.opcode) { + return false; + } + + PythonOpcodeType opcode_type = GetOpcodeType(instruction.opcode); + if (opcode_type == BRANCH_DELTA_OPCODE || + opcode_type == BRANCH_ABSOLUTE_OPCODE) { + updated_instructions.push_back( + UpdatedInstruction { instruction, instruction.size, current_offset }); + } + + it += instruction.size; + } + + if (!offset_valid) { + LOG(ERROR) << "Offset " << offset << " is mid instruction or out of range"; + return false; + } + + // Calculate new branch instructions. + const std::vector method_call_instructions = + BuildMethodCall(const_index); + int method_call_size = GetInstructionsSize(method_call_instructions); + if (!InsertAndUpdateBranchInstructions({ method_call_size, offset }, + updated_instructions)) { + LOG(ERROR) << "Too many instruction argument upgrades required"; + return false; + } + + // Insert the method call. + data->bytecode.insert(data->bytecode.begin() + offset, method_call_size, NOP); + WriteInstructions(data->bytecode.begin() + offset, method_call_instructions); + if (has_lnotab_) { + InsertAndUpdateLnotab(offset, method_call_size, &data->lnotab); + } + + // Write new branch instructions. + // We can use current_offset directly since all insertions before would have + // been done by the time we reach the current instruction. + for (auto it = updated_instructions.begin(); + it < updated_instructions.end(); it++) { + int size_diff = it->instruction.size - it->original_size; + uint32 offset = it->current_offset; + if (size_diff > 0) { + data->bytecode.insert(data->bytecode.begin() + offset, size_diff, NOP); + if (has_lnotab_) { + InsertAndUpdateLnotab(it->current_offset, size_diff, &data->lnotab); + } + } + WriteInstruction(data->bytecode.begin() + offset, it->instruction); + } + + return true; +} + + +#else + + bool BytecodeManipulator::InsertMethodCall( BytecodeManipulator::Data* data, int offset, @@ -441,26 +673,12 @@ bool BytecodeManipulator::InsertMethodCall( // Insert a new entry into line table to account for the new bytecode. if (has_lnotab_) { - int current_offset = 0; - for (auto it = data->lnotab.begin(); it != data->lnotab.end(); it += 2) { - current_offset += it[0]; - - if (current_offset >= offset) { - int remaining_size = size; - while (remaining_size > 0) { - const int current_size = std::min(remaining_size, 0xFF); - it = data->lnotab.insert(it, static_cast(current_size)) + 1; - it = data->lnotab.insert(it, 0) + 1; - remaining_size -= current_size; - } - - break; - } - } + InsertAndUpdateLnotab(offset, size, &data->lnotab); } return true; } +#endif // This method does not change line numbers table. The line numbers table From 1a6e87904b79727cbd2c264712e6fbe56fb3060e Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 22 Feb 2018 15:17:39 -0800 Subject: [PATCH 110/241] Build Python 3.6 agent in Kokoro script. Also stop copying distutils and setuptools since they are installed already. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=186682723 --- src/googleclouddebugger/bytecode_manipulator.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 9fd4a7b..96ede7b 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -395,7 +395,7 @@ static void InsertAndUpdateLnotab(int offset, int size, // have its offsets fixed and/or upgraded to use EXTENDED_ARG. struct UpdatedInstruction { PythonInstruction instruction; - size_t original_size; + int original_size; int current_offset; }; @@ -475,7 +475,7 @@ static bool InsertAndUpdateBranchInstructions( for (auto it = instructions.begin(); it < instructions.end(); it++) { PythonInstruction instruction = it->instruction; - uint32 arg = instruction.argument; + int32 arg = static_cast(instruction.argument); bool need_to_update = false; PythonOpcodeType opcode_type = GetOpcodeType(instruction.opcode); if (opcode_type == BRANCH_DELTA_OPCODE) { From 714a39856f5eabf0b747998a3fd5ee8c576db1c3 Mon Sep 17 00:00:00 2001 From: erezh Date: Fri, 23 Feb 2018 16:25:18 -0800 Subject: [PATCH 111/241] Rename Cloud Debugger roles to Stackdriver Debugger in open source docs. (following cr/186824115) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=186840597 --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 9e0241e..4701c5e 100644 --- a/README.md +++ b/README.md @@ -123,8 +123,7 @@ to authenticate with the Cloud Debugger Service. Use the Google Cloud Console Service Accounts [page](https://console.cloud.google.com/iam-admin/serviceaccounts/project) to create a credentials file for an existing or new service-account. The -service-account must have at least the `Cloud Debugger Agent` role to be -accepted by the Cloud Debugger Service. +service-account must have at least the `Stackdriver Debugger Agent` role. If you don't have a Google Cloud Platform project, you can create one for free on [Google Cloud Console](https://console.cloud.google.com). From 968c7ec620562c1ffadf76b8880f77818a466d4a Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Tue, 27 Feb 2018 12:03:06 -0800 Subject: [PATCH 112/241] Treat YIELD_FROM opcode same as YIELD, and use the append strategy. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=187211122 --- src/googleclouddebugger/bytecode_manipulator.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 96ede7b..21878f1 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -117,6 +117,9 @@ static int GetInstructionsSize( static PythonOpcodeType GetOpcodeType(uint8 opcode) { switch (opcode) { case YIELD_VALUE: +#if PY_MAJOR_VERSION >= 3 + case YIELD_FROM: +#endif return YIELD_OPCODE; case FOR_ITER: @@ -324,7 +327,7 @@ BytecodeManipulator::BytecodeManipulator( break; } - if (instruction.opcode == YIELD_VALUE) { + if (GetOpcodeType(instruction.opcode) == YIELD_OPCODE) { strategy_ = STRATEGY_APPEND; break; } From 6a1068782443069429587e00e568d61b0c83fcc1 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 1 Mar 2018 14:14:34 -0800 Subject: [PATCH 113/241] Add support for Python 3 in //devtools/cdbg/e2e/python/gce:e2e_python_gce_test Unfortunately tools like virtualenv and pyenv won't work with this since they are designed for interactive shells and _vm.RunShellCommand starts a new one every time. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=187532256 --- src/build.sh | 3 ++- src/googleclouddebugger/__init__.py | 4 +++- src/googleclouddebugger/gcp_hub_client.py | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/build.sh b/src/build.sh index f9b61be..7c86c71 100644 --- a/src/build.sh +++ b/src/build.sh @@ -84,6 +84,7 @@ library_dirs=${ROOT}/build/third_party/lib" > ${ROOT}/setup.cfg # Build the Python Cloud Debugger agent. pushd ${ROOT} -python setup.py bdist_egg +# Use custom python command if variable is set +"${PYTHON:-python}" setup.py bdist_egg popd diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index becf5e4..b4ff721 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -143,7 +143,9 @@ def _DebuggerMain(): sys.modules['__main__'] = __main__ - exec('execfile(%r)' % app_path, globals, locals) # pylint: disable=exec-used + with open(app_path) as f: + code = compile(f.read(), app_path, 'exec') + exec(code, globals, locals) # pylint: disable=exec-used # pylint: disable=invalid-name diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index bb8dd45..6e9840e 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -461,6 +461,8 @@ def _QueryGcpProject(self, resource): 'HTTP error %s %s when querying local metadata service at %s' % (response['status'], content, url)) + if not isinstance(content, str): + content = content.decode() return content def _GetDebuggee(self): From 96880f41331e86fd4217cbc6111e8130b305eba6 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Fri, 2 Mar 2018 15:59:10 -0800 Subject: [PATCH 114/241] Fix some more unit tests under Python 3. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=187681812 --- src/googleclouddebugger/imphook2.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index 7cd652a..4b87876 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -249,10 +249,13 @@ def GetCurrentPackage(globals): # names of real modules. for from_entry in fromlist or []: # Name relative to sys.path. - names.add(name + '.' + from_entry) + # For relative imports such as 'from . import x', name will be the empty + # string. Thus we should not prepend a '.' to the entry. + entry = (name + '.' + from_entry) if name else from_entry + names.add(entry) # Name relative to the currently executing module's package. if curpkg: - names.add(curpkg + '.' + name + '.' + from_entry) + names.add(curpkg + '.' + entry) # Generate all names from name. For instance, if name='a.b.c', then # we need to add ['a.b.c', 'a.b', 'a']. From 78c24ee22c82d3debf9b7320c6cfb2fbbf12280a Mon Sep 17 00:00:00 2001 From: erezh Date: Fri, 2 Mar 2018 16:43:48 -0800 Subject: [PATCH 115/241] Improve exception handing during variable capture. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=187687825 --- src/googleclouddebugger/capture_collector.py | 82 +++++++------------- 1 file changed, 30 insertions(+), 52 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index f8f9ab8..06413fd 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -393,19 +393,15 @@ def CaptureNamedVariable(self, name, value, depth, limits): Returns: Formatted captured data as per Variable proto with name. """ - try: - if not hasattr(name, '__dict__'): - name = str(name) - else: # TODO(vlif): call str(name) with immutability verifier here. - name = str(id(name)) - self._total_size += len(name) - - v = (self.CheckDataVisiblity(value) or - self.CaptureVariable(value, depth, limits)) - v['name'] = name - except RuntimeError as e: - raise RuntimeError( - 'INTERNAL ERROR while capturing {0}: {1}'.format(name, e)) + if not hasattr(name, '__dict__'): + name = str(name) + else: # TODO(vlif): call str(name) with immutability verifier here. + name = str(id(name)) + self._total_size += len(name) + + v = (self.CheckDataVisiblity(value) or + self.CaptureVariable(value, depth, limits)) + v['name'] = name return v def CheckDataVisiblity(self, value): @@ -451,57 +447,39 @@ def CaptureVariablesList(self, items, depth, empty_message, limits): List of formatted variable objects. """ v = [] - try: - for name, value in items: - if (self._total_size >= self.max_size) or ( - len(v) >= limits.max_list_items): - v.append({ - 'status': { - 'refers_to': 'VARIABLE_VALUE', - 'description': { - 'format': - ('Only first $0 items were captured. Use in an ' - 'expression to see all items.'), - 'parameters': [str(len(v))]}}}) - break - v.append(self.CaptureNamedVariable(name, value, depth, limits)) + for name, value in items: + if (self._total_size >= self.max_size) or ( + len(v) >= limits.max_list_items): + v.append({ + 'status': { + 'refers_to': 'VARIABLE_VALUE', + 'description': { + 'format': + ('Only first $0 items were captured. Use in an ' + 'expression to see all items.'), + 'parameters': [str(len(v))]}}}) + break + v.append(self.CaptureNamedVariable(name, value, depth, limits)) + + if not v: + return [{'status': { + 'is_error': False, + 'refers_to': 'VARIABLE_NAME', + 'description': {'format': empty_message}}}] - if not v: - return [{'status': { - 'is_error': False, - 'refers_to': 'VARIABLE_NAME', - 'description': {'format': empty_message}}}] - except RuntimeError as e: - raise RuntimeError( - 'Failed while capturing variables: {0}\n' - 'The following elements were successfully captured: {1}'.format( - e, ', '.join([c['name'] for c in v if 'name' in c]))) return v def CaptureVariable(self, value, depth, limits, can_enqueue=True): """Try-Except wrapped version of CaptureVariableInternal.""" try: return self.CaptureVariableInternal(value, depth, limits, can_enqueue) - except RuntimeError as e: - # Record as an error in the variable, and continue iterating. - return { - 'status': { - 'is_error': True, - 'refers_to': 'VARIABLE_VALUE', - 'description': { - 'format': 'Failed while capturing variable: $0', - 'parameters': [str(e)] - } - } - } except BaseException as e: # pylint: disable=broad-except - # Record as an internal error in the variable, and continue iterating. return { 'status': { 'isError': True, + 'refersTo': 'VARIABLE_VALUE', 'description': { - 'format': ('INTERNAL ERROR: Failed while capturing ' - 'variable: $0: $1'), + 'format': ('Failed to capture variable: $0'), 'parameters': [str(e)] } } From cba273ec4b215017a55e36840b2ac73ca8dc9ebf Mon Sep 17 00:00:00 2001 From: erezh Date: Fri, 2 Mar 2018 20:13:32 -0800 Subject: [PATCH 116/241] Use lowerCamelCase in Python agent code. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=187702795 --- src/googleclouddebugger/capture_collector.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 06413fd..7fd5e92 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -426,8 +426,8 @@ def CheckDataVisiblity(self, value): return { 'status': { - 'is_error': True, - 'refers_to': 'VARIABLE_NAME', + 'isError': True, + 'refersTo': 'VARIABLE_NAME', 'description': { 'format': reason } @@ -452,7 +452,7 @@ def CaptureVariablesList(self, items, depth, empty_message, limits): len(v) >= limits.max_list_items): v.append({ 'status': { - 'refers_to': 'VARIABLE_VALUE', + 'refersTo': 'VARIABLE_VALUE', 'description': { 'format': ('Only first $0 items were captured. Use in an ' @@ -463,8 +463,7 @@ def CaptureVariablesList(self, items, depth, empty_message, limits): if not v: return [{'status': { - 'is_error': False, - 'refers_to': 'VARIABLE_NAME', + 'refersTo': 'VARIABLE_NAME', 'description': {'format': empty_message}}}] return v @@ -648,7 +647,7 @@ def _CaptureUserId(self): """Captures the user id of the end user, if possible.""" user_kind, user_id = user_id_collector() if user_kind and user_id: - self.breakpoint['evaluated_user_id'] = {'kind': user_kind, 'id': user_id} + self.breakpoint['evaluatedUserId'] = {'kind': user_kind, 'id': user_id} class LogCollector(object): From 5f5d70980f42c6ca85156cbf4fc4c98b6ecf5061 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 5 Mar 2018 11:30:39 -0800 Subject: [PATCH 117/241] Report Python version in the debuggee. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=187896037 --- src/googleclouddebugger/gcp_hub_client.py | 5 +++-- src/setup.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 6e9840e..1bcf436 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -467,13 +467,14 @@ def _QueryGcpProject(self, resource): def _GetDebuggee(self): """Builds the debuggee structure.""" - major_version = version.__version__.split('.')[0] + major_version = 'v' + version.__version__.split('.')[0] + python_version = 'python36-gcp' if six.PY3 else 'python27-gcp' debuggee = { 'project': self._project_number(), 'description': self._GetDebuggeeDescription(), 'labels': self._debuggee_labels, - 'agentVersion': 'google.com/python27-gcp/v' + major_version + 'agentVersion': 'google.com/%s/%s' % (python_version, major_version) } source_context = self._ReadAppJsonFile('source-context.json') diff --git a/src/setup.py b/src/setup.py index 98b4667..d4b0e75 100644 --- a/src/setup.py +++ b/src/setup.py @@ -113,6 +113,7 @@ def ReadConfig(section, value, default): keywords='google cloud debugger', classifiers=[ 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.6', 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers' ]) From d03fcbd8c862aff3326d6cb2a6d26c318ca0e045 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Wed, 14 Mar 2018 17:41:38 -0700 Subject: [PATCH 118/241] Fix bugs in the bytecode manipulator under Python 3. Calculate offset from scratch for each breakpoint insertion. Update line number table entries instead of inserting a new one. Account for uneeded EXTENDED_ARGS when writing new instructions. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=189114944 --- .../bytecode_breakpoint.cc | 32 +++++-- src/googleclouddebugger/bytecode_breakpoint.h | 3 + .../bytecode_manipulator.cc | 87 ++++++++++++++----- 3 files changed, 93 insertions(+), 29 deletions(-) diff --git a/src/googleclouddebugger/bytecode_breakpoint.cc b/src/googleclouddebugger/bytecode_breakpoint.cc index 8074760..ed53eab 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.cc +++ b/src/googleclouddebugger/bytecode_breakpoint.cc @@ -95,6 +95,7 @@ int BytecodeBreakpoint::SetBreakpoint( std::unique_ptr breakpoint(new Breakpoint); breakpoint->code_object = ScopedPyCodeObject::NewReference(code_object); + breakpoint->line = line; breakpoint->offset = lines_enumerator.offset(); breakpoint->hit_callable = PythonCallback::Wrap(hit_callback); breakpoint->error_callback = error_callback; @@ -251,15 +252,38 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { for (auto it_entry = code->breakpoints.begin(); it_entry != code->breakpoints.end(); ++it_entry, ++const_index) { - const int offset = it_entry->first; + int offset = it_entry->first; + bool offset_found = true; const Breakpoint& breakpoint = *it_entry->second; DCHECK_EQ(offset, breakpoint.offset); callbacks.push_back(breakpoint.hit_callable.get()); - if (!bytecode_manipulator.InjectMethodCall(offset, const_index)) { +#if PY_MAJOR_VERSION >= 3 + // In Python 3, since we allow upgrading of instructions to use + // EXTENDED_ARG, the offsets for lines originally calculated might not be + // accurate, so we need to recalculate them each insertion. + offset_found = false; + if (bytecode_manipulator.has_lnotab()) { + ScopedPyObject lnotab(PyBytes_FromStringAndSize( + reinterpret_cast(bytecode_manipulator.lnotab().data()), + bytecode_manipulator.lnotab().size())); + CodeObjectLinesEnumerator lines_enumerator(code_object->co_firstlineno, + lnotab.release()); + while (lines_enumerator.line_number() != breakpoint.line) { + if (!lines_enumerator.Next()) { + break; + } + offset = lines_enumerator.offset(); + } + offset_found = lines_enumerator.line_number() == breakpoint.line; + } +#endif + + if (!offset_found || + !bytecode_manipulator.InjectMethodCall(offset, const_index)) { LOG(WARNING) << "Failed to insert bytecode for breakpoint " - << breakpoint.cookie; + << breakpoint.cookie << " at line " << breakpoint.line; errors.push_back(breakpoint.error_callback); } } @@ -299,5 +323,3 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { } // namespace cdbg } // namespace devtools - - diff --git a/src/googleclouddebugger/bytecode_breakpoint.h b/src/googleclouddebugger/bytecode_breakpoint.h index 5256125..bbc9da9 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.h +++ b/src/googleclouddebugger/bytecode_breakpoint.h @@ -61,6 +61,9 @@ class BytecodeBreakpoint { // Method in which the breakpoint is set. ScopedPyCodeObject code_object; + // Line number on which the breakpoint is set. + int line; + // Offset to the instruction on which the breakpoint is set. int offset; diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 21878f1..e6b72a6 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -363,27 +363,6 @@ bool BytecodeManipulator::InjectMethodCall( } -// Inserts an entry into the line number table for an insertion in the bytecode. -static void InsertAndUpdateLnotab(int offset, int size, - std::vector *lnotab) { - int current_offset = 0; - for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) { - current_offset += it[0]; - - if (current_offset >= offset) { - int remaining_size = size; - while (remaining_size > 0) { - const int current_size = std::min(remaining_size, 0xFF); - it = lnotab->insert(it, static_cast(current_size)) + 1; - it = lnotab->insert(it, 0) + 1; - remaining_size -= current_size; - } - return; - } - } -} - - // Use different algorithms to insert method calls for Python 2 and 3. // Technically the algorithm for Python 3 will work with Python 2, but because // it is more complicated and the issue of needing to upgrade branch @@ -413,6 +392,42 @@ struct Insertion { // InsertAndUpdateBranchInstructions. static const int kMaxInsertionIterations = 10; + +// Updates the line number table for an insertion in the bytecode. +// This is different than what the Python 2 version of InsertMethodCall() does. +// It should be more accurate, but is confined to Python 3 only for safety. +// This handles the case of adding insertion for EXTENDED_ARG better. +// Example for inserting 2 bytes at offset 2: +// lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta} +// Old algorithm: [{2, 0}, {2, 1}, {4, 1}] +// New algorithm: [{2, 1}, {6, 1}] +// In the old version, trying to get the offset to insert a breakpoint right +// before line 1 would result in an offset of 2, which is inaccurate as the +// instruction before is an EXTENDED_ARG which will now be applied to the first +// instruction inserted instead of its original target. +static void InsertAndUpdateLnotab(int offset, int size, + std::vector* lnotab) { + int current_offset = 0; + for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) { + current_offset += it[0]; + + if (current_offset > offset) { + int remaining_size = it[0] + size; + int remaining_lines = it[1]; + it = lnotab->erase(it, it + 2); + while (remaining_size > 0xFF) { + it = lnotab->insert(it, 0xFF) + 1; + it = lnotab->insert(it, 0) + 1; + remaining_size -= 0xFF; + } + it = lnotab->insert(it, remaining_size) + 1; + it = lnotab->insert(it, remaining_lines) + 1; + return; + } + } +} + + // Reserves space for instructions to be inserted into the bytecode, and // calculates the new offsets and arguments of branch instructions. // Returns true if the calculation was successful, and false if too many @@ -484,7 +499,11 @@ static bool InsertAndUpdateBranchInstructions( if (opcode_type == BRANCH_DELTA_OPCODE) { // For relative branches, the argument needs to be updated if the // insertion is between the instruction and the target. - int32 target = it->current_offset + instruction.size + arg; + // The Python compiler sometimes prematurely adds EXTENDED_ARG with an + // argument of 0 even when it is not required. This needs to be taken + // into account when calculating the target of a branch instruction. + int inst_size = std::max(instruction.size, it->original_size); + int32 target = it->current_offset + inst_size + arg; need_to_update = it->current_offset < insertion.current_offset && insertion.current_offset < target; } else if (opcode_type == BRANCH_ABSOLUTE_OPCODE) { @@ -578,12 +597,17 @@ bool BytecodeManipulator::InsertMethodCall( for (auto it = updated_instructions.begin(); it < updated_instructions.end(); it++) { int size_diff = it->instruction.size - it->original_size; - uint32 offset = it->current_offset; + int offset = it->current_offset; if (size_diff > 0) { data->bytecode.insert(data->bytecode.begin() + offset, size_diff, NOP); if (has_lnotab_) { InsertAndUpdateLnotab(it->current_offset, size_diff, &data->lnotab); } + } else if (size_diff < 0) { + // The Python compiler sometimes prematurely adds EXTENDED_ARG with an + // argument of 0 even when it is not required. Just leave it there, but + // start writing the instruction after them. + offset -= size_diff; } WriteInstruction(data->bytecode.begin() + offset, it->instruction); } @@ -676,7 +700,22 @@ bool BytecodeManipulator::InsertMethodCall( // Insert a new entry into line table to account for the new bytecode. if (has_lnotab_) { - InsertAndUpdateLnotab(offset, size, &data->lnotab); + int current_offset = 0; + for (auto it = data->lnotab.begin(); it != data->lnotab.end(); it += 2) { + current_offset += it[0]; + + if (current_offset >= offset) { + int remaining_size = size; + while (remaining_size > 0) { + const int current_size = std::min(remaining_size, 0xFF); + it = data->lnotab.insert(it, static_cast(current_size)) + 1; + it = data->lnotab.insert(it, 0) + 1; + remaining_size -= current_size; + } + + break; + } + } } return true; From 979c61dabd5aae7ddf830be0147ce579565224c0 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 15 Mar 2018 14:29:47 -0700 Subject: [PATCH 119/241] Increment Python debugger agent version to 2.5 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=189246382 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index ccd0088..fd05a61 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.4' +__version__ = '2.5' From 83a13e77b120bfb0cfd0c50e3dbe0f99a302eb1d Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Fri, 16 Mar 2018 12:19:24 -0700 Subject: [PATCH 120/241] Add notes to GitHub README about Python 3 support. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=189375958 --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4701c5e..d6fa908 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,8 @@ tested on Debian Linux, but it should work on other distributions as well. Cloud Debugger consists of 3 primary components: -1. The Python debugger agent (this repo implements one for Python 2.7). +1. The Python debugger agent (this repo implements one for CPython 2.7, and an + experimental one for CPython 3.6). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the API's using [APIs Explorer](https://developers.google.com/apis-explorer/#p/clouddebugger/v2/). @@ -77,6 +78,15 @@ sudo apt-get -y -q --no-install-recommends install \ python python-dev libpython2.7 python-setuptools ``` +### Python 3 + +There is experimental support for Python 3.6. Python 3.0 to 3.5 are not +supported, and newer versions have not been tested. + +To build, the `python3.6` and `python3.6-dev` packages are additionally needed. +If Python 3.6 is not the default version of the 'python' command on your system, +run the build script as `PYTHON=python3.6 ./build.sh`. + ## Setup ### Google Cloud Platform From 81396e32800c7c57d137e5ad388f6be435b008f1 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 19 Mar 2018 17:07:22 -0700 Subject: [PATCH 121/241] Support breakpoint rollout in python agent. While doing this, I noticed that there is no e2e test for this for Java/C++ either, and had to test it manually. Should there be an e2e test? It would require more resources since it has to spin up more jobs. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=189666397 --- src/googleclouddebugger/breakpoints_manager.py | 5 +++-- src/googleclouddebugger/python_breakpoint.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index 7cea2e4..4865eee 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -118,7 +118,7 @@ def CompleteBreakpoint(self, breakpoint_id): def CheckBreakpointsExpiration(self): """Completes all breakpoints that have been active for too long.""" with self._lock: - current_time = BreakpointsManager._GetCurrentTime() + current_time = BreakpointsManager.GetCurrentTime() if self._next_expiration > current_time: return @@ -134,8 +134,9 @@ def CheckBreakpointsExpiration(self): for breakpoint in expired_breakpoints: breakpoint.ExpireBreakpoint() + @staticmethod - def _GetCurrentTime(): + def GetCurrentTime(): """Wrapper around datetime.now() function. The datetime class is a built-in one and therefore not patchable by unit diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 657375b..dad434c 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -82,6 +82,7 @@ 'refersTo': 'BREAKPOINT_CONDITION', 'description': {'format': ERROR_CONDITION_MUTABLE_0}})]) + # The implementation of datetime.strptime imports an undocumented module called # _strptime. If it happens at the wrong time, we can get an exception about # trying to import while another thread holds the import lock. This dummy call @@ -178,6 +179,7 @@ def __init__(self, definition, hub_client, breakpoints_manager, # Breakpoint expiration time. self.expiration_period = timedelta(hours=24) + self._hub_client = hub_client self._breakpoints_manager = breakpoints_manager self._cookie = None @@ -301,6 +303,7 @@ def ExpireBreakpoint(self): 'refersTo': 'BREAKPOINT_AGE', 'description': {'format': message}}}) + def _ActivateBreakpoint(self, module): """Sets the breakpoint in the loaded module, or complete with error.""" @@ -455,4 +458,3 @@ def _BreakpointEvent(self, event, frame): return self._CompleteBreakpoint(collector.breakpoint, is_incremental=False) - From 04c3119d323af037182b683173ece01a8cef2b77 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 22 Mar 2018 15:02:43 -0700 Subject: [PATCH 122/241] Don't decrement reference count if Python is not running. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=190131752 --- src/googleclouddebugger/python_util.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/python_util.h b/src/googleclouddebugger/python_util.h index 7ab71c2..f4db534 100644 --- a/src/googleclouddebugger/python_util.h +++ b/src/googleclouddebugger/python_util.h @@ -69,7 +69,13 @@ class ScopedPyObjectT { } ~ScopedPyObjectT() { - reset(nullptr); + // Only do anything if Python is running. If not, we get might get a + // segfault when we try to decrement the reference count of the underlying + // object when this destructor is run after Python itself has cleaned up. + // https://bugs.python.org/issue17703 + if (Py_IsInitialized()) { + reset(nullptr); + } } static ScopedPyObjectT NewReference(TPointer* obj) { From 8cfb266dcfcb9e3faad7d25eb344590e3685ca37 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 22 Mar 2018 15:40:05 -0700 Subject: [PATCH 123/241] Specify explictly to not cache the discovery document to prevent warning log from import error. https://stackoverflow.com/questions/40154672 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=190137771 --- src/googleclouddebugger/gcp_hub_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 1bcf436..ae55d07 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -278,7 +278,8 @@ def _BuildService(self): http = httplib2.Http(timeout=_HTTP_TIMEOUT_SECONDS) http = self._credentials.authorize(http) - api = apiclient.discovery.build('clouddebugger', 'v2', http=http) + api = apiclient.discovery.build( + 'clouddebugger', 'v2', http=http, cache_discovery=False) return api.controller() def _MainThreadProc(self): From ca49ba8d311d516137d678ab2514f2dbf7678b0c Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 22 Mar 2018 15:41:27 -0700 Subject: [PATCH 124/241] Increment Python debugger version. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=190137963 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index fd05a61..59c306f 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.5' +__version__ = '2.6' From 988f22fbb40fb3f4c4d98c80f68fc6983babeeb1 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Tue, 27 Mar 2018 10:58:47 -0700 Subject: [PATCH 125/241] Make the agent buildable on Python 3.7. There won't be any releases made for 3.7 yet. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=190639613 --- src/googleclouddebugger/gcp_hub_client.py | 7 +++++-- src/googleclouddebugger/immutability_tracer.cc | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index ae55d07..8d978b1 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -21,6 +21,7 @@ import json import logging import os +import platform import sys import threading import time @@ -469,13 +470,15 @@ def _QueryGcpProject(self, resource): def _GetDebuggee(self): """Builds the debuggee structure.""" major_version = 'v' + version.__version__.split('.')[0] - python_version = 'python36-gcp' if six.PY3 else 'python27-gcp' + python_version = ''.join(platform.python_version().split('.')[:2]) + agent_version = ('google.com/python%s-gcp/%s' % (python_version, + major_version)) debuggee = { 'project': self._project_number(), 'description': self._GetDebuggeeDescription(), 'labels': self._debuggee_labels, - 'agentVersion': 'google.com/%s/%s' % (python_version, major_version) + 'agentVersion': agent_version, } source_context = self._ReadAppJsonFile('source-context.json') diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index fab0861..1744863 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -380,6 +380,11 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case BUILD_CONST_KEY_MAP: case BUILD_STRING: case BUILD_TUPLE_UNPACK_WITH_CALL: +#if PY_VERSION_HEX >= 0x03070000 + // Added in Python 3.7. + case LOAD_METHOD: + case CALL_METHOD: +#endif #else case ROT_FOUR: case DUP_TOPX: @@ -436,7 +441,10 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case WITH_CLEANUP_FINISH: case SETUP_ANNOTATIONS: case POP_EXCEPT: +#if PY_VERSION_HEX < 0x03070000 + // Removed in Python 3.7. case STORE_ANNOTATION: +#endif case DELETE_DEREF: case SETUP_ASYNC_WITH: #else From 0fababae7dea123f8e8e5ce39ff4316bb736ba3b Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 29 Mar 2018 13:10:55 -0700 Subject: [PATCH 126/241] Replace importlib.import_module in Python 3. In Python 3, importlib.import_module does not call __import__ internally so our import hook would not get called when it is used. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=190974685 --- src/googleclouddebugger/imphook2.py | 127 +++++++++++++++++++++++----- 1 file changed, 105 insertions(+), 22 deletions(-) diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index 4b87876..4ad205e 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -30,10 +30,13 @@ For the old module import hook, see imphook.py file. """ +import importlib +import itertools import os import sys # Must be imported, otherwise import hooks don't work. import threading +import six from six.moves import builtins # pylint: disable=redefined-builtin from . import module_utils2 @@ -48,6 +51,10 @@ # Original __import__ function if import hook is installed or None otherwise. _real_import = None +# Original importlib.import_module function if import hook is installed or None +# otherwise. +_real_import_module = None + def AddImportCallbackBySuffix(path, callback): """Register import hook. @@ -96,7 +103,6 @@ def RemoveCallback(): def _InstallImportHookBySuffix(): """Lazily installs import hook.""" - global _real_import if _real_import: @@ -104,18 +110,20 @@ def _InstallImportHookBySuffix(): _real_import = getattr(builtins, '__import__') assert _real_import - builtins.__import__ = _ImportHookBySuffix + if six.PY3: + # In Python 2, importlib.import_module calls __import__ internally so + # overriding __import__ is enough. In Python 3, they are separate so it also + # needs to be overwritten. + global _real_import_module + _real_import_module = importlib.import_module + assert _real_import_module + importlib.import_module = _ImportModuleHookBySuffix -# pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield -def _ImportHookBySuffix( - name, globals=None, locals=None, fromlist=None, level=-1): - """Callback when an import statement is executed by the Python interpreter. - Argument names have to exactly match those of __import__. Otherwise calls - to __import__ that use keyword syntax will fail: __import('a', fromlist=[]). - """ +def _IncrementNestLevel(): + """Increments the per thread nest level of imports.""" # This is the top call to import (no nesting), init the per-thread nest level # and names set. if getattr(_import_local, 'nest_level', None) is None: @@ -128,6 +136,47 @@ def _ImportHookBySuffix( _import_local.nest_level += 1 + +# pylint: disable=redefined-builtin +def _ProcessImportBySuffix(name, fromlist, globals): + """Processes an import. + + Calculates the possible names generated from an import and invokes + registered callbacks if needed. + + Args: + name: Argument as passed to the importer. + fromlist: Argument as passed to the importer. + globals: Argument as passed to the importer. + """ + _import_local.nest_level -= 1 + + # To improve common code path performance, compute the loaded modules only + # if there are any import callbacks. + if _import_callbacks: + # Collect the names of all modules that might be newly loaded as a result + # of this import. Add them in a thread-local list. + _import_local.names |= _GenerateNames(name, fromlist, globals) + + # Invoke the callbacks only on the top-level import call. + if _import_local.nest_level == 0: + _InvokeImportCallbackBySuffix(_import_local.names) + + # To be safe, we clear the names set every time we exit a top level import. + if _import_local.nest_level == 0: + _import_local.names.clear() + + +# pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield +def _ImportHookBySuffix( + name, globals=None, locals=None, fromlist=None, level=-1): + """Callback when an import statement is executed by the Python interpreter. + + Argument names have to exactly match those of __import__. Otherwise calls + to __import__ that use keyword syntax will fail: __import('a', fromlist=[]). + """ + _IncrementNestLevel() + try: # Really import modules. module = _real_import(name, globals, locals, fromlist, level) @@ -145,22 +194,56 @@ def _ImportHookBySuffix( # # Important Note: Do not use 'return' inside the finally block. It will # cause any pending exception to be discarded. - _import_local.nest_level -= 1 + _ProcessImportBySuffix(name, fromlist, globals) - # To improve common code path performance, compute the loaded modules only - # if there are any import callbacks. - if _import_callbacks: - # Collect the names of all modules that might be newly loaded as a result - # of this import. Add them in a thread-local list. - _import_local.names |= _GenerateNames(name, fromlist, globals) + return module - # Invoke the callbacks only on the top-level import call. - if _import_local.nest_level == 0: - _InvokeImportCallbackBySuffix(_import_local.names) - # To be safe, we clear the names set every time we exit a top level import. - if _import_local.nest_level == 0: - _import_local.names.clear() +def _ResolveRelativeImport(name, package): + """Resolves a relative import into an absolute path. + + This is mostly an adapted version of the logic found in the backported + version of import_module in Python 2.7. + https://github.com/python/cpython/blob/2.7/Lib/importlib/__init__.py + + Args: + name: relative name imported, such as '.a' or '..b.c' + package: absolute package path, such as 'a.b.c.d.e' + + Returns: + The absolute path of the name to be imported, or None if it is invalid. + Examples: + _ResolveRelativeImport('.c', 'a.b') -> 'a.b.c' + _ResolveRelativeImport('..c', 'a.b') -> 'a.c' + _ResolveRelativeImport('...c', 'a.c') -> None + """ + level = sum(1 for c in itertools.takewhile(lambda c: c == '.', name)) + if level == 1: + return package + name + else: + parts = package.split('.')[:-(level - 1)] + if not parts: + return None + parts.append(name[level:]) + return '.'.join(parts) + + +def _ImportModuleHookBySuffix(name, package=None): + """Callback when a module is imported through importlib.import_module.""" + _IncrementNestLevel() + + try: + # Really import modules. + module = _real_import_module(name, package) + finally: + if name.startswith('.'): + if package: + name = _ResolveRelativeImport(name, package) + else: + # Should not happen. Relative imports require the package argument. + name = None + if name: + _ProcessImportBySuffix(name, None, None) return module From 8a11d284e2b3f06f1a9433f9f9e37aeaf7f1feb9 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Fri, 30 Mar 2018 12:25:19 -0700 Subject: [PATCH 127/241] Handle cases where module.__file__ is set to None. I don't know why this would be the case, namespace modules don't have a __file__ attribute at all when I tried it, but they should just be skipped anyway. https://github.com/GoogleCloudPlatform/cloud-debug-python/issues/4 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=191100585 --- src/googleclouddebugger/module_utils2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/module_utils2.py b/src/googleclouddebugger/module_utils2.py index b7d9f2f..996209f 100644 --- a/src/googleclouddebugger/module_utils2.py +++ b/src/googleclouddebugger/module_utils2.py @@ -57,7 +57,7 @@ def GetLoadedModuleBySuffix(path): """ root = os.path.splitext(path)[0] for module in sys.modules.values(): - mod_root = os.path.splitext(getattr(module, '__file__', ''))[0] + mod_root = os.path.splitext(getattr(module, '__file__', None) or '')[0] if not mod_root: continue From 9b0c8d1f53486f26aa558f808a08c9dcb39e0373 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Fri, 30 Mar 2018 13:21:40 -0700 Subject: [PATCH 128/241] Set default of the level argument of __import__ to 0 in Python 3. https://github.com/GoogleCloudPlatform/cloud-debug-python/issues/5 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=191107673 --- src/googleclouddebugger/imphook2.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index 4ad205e..ba94484 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -169,7 +169,7 @@ def _ProcessImportBySuffix(name, fromlist, globals): # pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield def _ImportHookBySuffix( - name, globals=None, locals=None, fromlist=None, level=-1): + name, globals=None, locals=None, fromlist=None, level=None): """Callback when an import statement is executed by the Python interpreter. Argument names have to exactly match those of __import__. Otherwise calls @@ -177,6 +177,15 @@ def _ImportHookBySuffix( """ _IncrementNestLevel() + if level is None: + # A level of 0 means absolute import, positive values means relative + # imports, and -1 means to try both an absolute and relative import. + # Since imports were disambiguated in Python 3, -1 is not a valid value. + # The default values are 0 and -1 for Python 3 and 3 respectively. + # https://docs.python.org/2/library/functions.html#__import__ + # https://docs.python.org/3/library/functions.html#__import__ + level = 0 if six.PY3 else -1 + try: # Really import modules. module = _real_import(name, globals, locals, fromlist, level) From e01407ffc16c80ad0a3bea49d07654156d8adc22 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Fri, 30 Mar 2018 16:23:28 -0700 Subject: [PATCH 129/241] Call list() on dict.items() in Python 3 to avoid 'dict changed size during iteration'. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=191133041 --- src/googleclouddebugger/capture_collector.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 7fd5e92..b5966d7 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -19,6 +19,7 @@ import copy import datetime import inspect +import itertools import logging import os import re @@ -566,7 +567,14 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): return {'value': r} # Add an additional depth for the object itself - members = self.CaptureVariablesList(value.__dict__.items(), depth + 2, + items = value.__dict__.items() + if six.PY3: + # Make a list of the iterator in Python 3, to avoid 'dict changed size + # during iteration' errors from GC happening in the middle. + # Only limits.max_list_items + 1 items are copied, anything past that will + # get ignored by CaptureVariablesList(). + items = list(itertools.islice(items, limits.max_list_items + 1)) + members = self.CaptureVariablesList(items, depth + 2, OBJECT_HAS_NO_FIELDS, limits) v = {'members': members} From c53a3e6a3294ae83c1a15d8a24456fee906428a7 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Tue, 3 Apr 2018 12:57:25 -0700 Subject: [PATCH 130/241] Increment python agent version. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=191486696 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 59c306f..ce00ef0 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.6' +__version__ = '2.7' From 2a634c917f99b05ea42909b847e4d3741d634800 Mon Sep 17 00:00:00 2001 From: marmstrong Date: Mon, 9 Apr 2018 07:36:13 -0700 Subject: [PATCH 131/241] Include standard headers when libc++ requires them. This CL applies the google3-build-missing-std-includes clang tidy check. More information: go/lsc-add-std-includes, http://g3doc/devtools/cymbal/clang_tidy/g3doc/checks/google3-build-missing-std-includes Tested: TAP sample presubmit queue http://test/OCL:192066538:BASE:192066604:1523229341298:b5321576 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=192124980 --- src/googleclouddebugger/bytecode_manipulator.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index e6b72a6..6bdb09a 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -18,6 +18,7 @@ #include "common.h" #include "bytecode_manipulator.h" +#include namespace devtools { namespace cdbg { From fb8bc64b4c269dbebf780c62c57474fcf9fede42 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 9 Apr 2018 10:52:42 -0700 Subject: [PATCH 132/241] Refactor auth in the Python agent. It now uses the google.auth library instead of the deprecated oauth2client. The project id is retrieved from the credentials, and the project number defaults to the project id if not specified. No flags are needed if application default credentials are available, but the --service_account_json_file, --project_id, and --project_number flags can still override the defaults. Support for P12 credentials have been removed. They are not mentioned anywhere in the docs anymore, and oauth2client itself removed the function we used for P12 credentials in 2016. https://github.com/google/oauth2client/releases/tag/v2.0.0 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=192151169 --- README.md | 96 ++++++++------- src/googleclouddebugger/__init__.py | 24 +--- src/googleclouddebugger/gcp_hub_client.py | 141 +++++++++------------- src/setup.py | 10 +- 4 files changed, 125 insertions(+), 146 deletions(-) diff --git a/README.md b/README.md index d6fa908..3ab465c 100644 --- a/README.md +++ b/README.md @@ -99,10 +99,10 @@ run the build script as `PYTHON=python3.6 ./build.sh`. account of the Virtual Machine. It is possible to use the Python debugger agent without it. Please see the - [next section](#Service_Account) for details. + [next section](#outside-google-cloud-platform) for details. 1. Install the Python debugger agent as explained in the - [Installation](#Installation) section. + [Installation](#installation) section. 2. Enable the debugger in your application using one of the two options: @@ -124,54 +124,69 @@ run the build script as `PYTHON=python3.6 ./build.sh`. -### Service Account +### Outside Google Cloud Platform To use the Python debugger agent on machines not hosted by Google Cloud -Platform, the agent must use a Google Cloud Platform service-account credentials -to authenticate with the Cloud Debugger Service. +Platform, you must set up credentials to authenticate with Google Cloud APIs. By +default, the debugger agent tries to find the [Application Default +Credentials](https://cloud.google.com/docs/authentication/production) on the +system. This can either be from your personal account or a dedicated service +account. -Use the Google Cloud Console Service Accounts -[page](https://console.cloud.google.com/iam-admin/serviceaccounts/project) to -create a credentials file for an existing or new service-account. The -service-account must have at least the `Stackdriver Debugger Agent` role. -If you don't have a Google Cloud Platform project, you can create one for free -on [Google Cloud Console](https://console.cloud.google.com). +#### Personal Account -Once you have the service-account JSON file, deploy it alongside the Python -debugger agent. +1. Set up Application Default Credentials through + [gcloud](https://cloud.google.com/sdk/gcloud/reference/auth/application-default/login). -Once you have the service account, please note the service account e-mail, -project ID and project number. + ```shell + gcloud auth application-default login + ``` -Then, enable the debugger agent using one of these two options: +2. Follow the rest of the steps in the [GCP](#google-cloud-platform) section. -_Option A_: add this code to the beginning of your `main()` function: +#### Service Account -```python -# Attach Python Cloud Debugger -try: - import googleclouddebugger - googleclouddebugger.enable( - enable_service_account_auth=True, - project_id='my-gcp-project-id', - project_number='123456789', - service_account_json_file='/opt/cdbg/gcp-svc.json') -except ImportError: - pass -``` +1. Use the Google Cloud Console Service Accounts + [page](https://console.cloud.google.com/iam-admin/serviceaccounts/project) + to create a credentials file for an existing or new service account. The + service account must have at least the `Stackdriver Debugger Agent` role. + +2. Once you have the service account credentials JSON file, deploy it alongside + the Python debugger agent. + +3. Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable. + + ```shell + export GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json + ``` + + Alternatively, you can provide the path to the credentials file directly to + the debugger agent. + + _Option A_: + + ```python + # Attach Python Cloud Debugger + try: + import googleclouddebugger + googleclouddebugger.enable( + service_account_json_file='/path/to/credentials.json') + except ImportError: + pass + ``` -_Option B_: run the debugger agent as a module: + _Option B_: + +
+    python \
+        -m googleclouddebugger \
+        --service_account_json_file=/path/to/credentials.json \
+        -- \
+        myapp.py
+    
+ +4. Follow the rest of the steps in the [GCP](#google-cloud-platform) section. -
-python \
-    -m googleclouddebugger \
-    --enable_service_account_auth=1 \
-    --project_id=my-gcp-project-id \
-    --project_number=123456789 \
-    --service_account_json_file=/opt/cdbg/gcp-svc.json \
-    -- \
-    myapp.py
-
### Django Web Framework @@ -196,4 +211,3 @@ Alternatively, you can pass the `--noreload` flag when running the Django `manage.py` and use any one of the option A and B listed earlier. Note that using the `--noreload` flag disables the autoreload feature in Django, which means local changes to files will not be automatically picked up by Django. - diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index b4ff721..1c7d288 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -68,26 +68,10 @@ def _StartDebugger(): _hub_client.on_active_breakpoints_changed = ( _breakpoints_manager.SetActiveBreakpoints) _hub_client.on_idle = _breakpoints_manager.CheckBreakpointsExpiration - if _flags.get('enable_service_account_auth') in ('1', 'true', True): - if _flags.get('service_account_p12_file'): - try: - _hub_client.EnableServiceAccountAuthP12( - _flags['project_id'], - _flags['project_number'], - _flags['service_account_email'], - _flags['service_account_p12_file']) - except NotImplementedError as e: - raise NotImplementedError( - '{0}\nYou must specify project_id, project_number, and ' - 'service_account_json_file in order to use service account ' - 'authentication.'.format(e)) - else: - _hub_client.EnableServiceAccountAuthJson( - _flags['project_id'], - _flags['project_number'], - _flags['service_account_json_file']) - else: - _hub_client.EnableGceAuth() + _hub_client.SetupAuth( + _flags.get('project_id'), + _flags.get('project_number'), + _flags.get('service_account_json_file')) _hub_client.InitializeDebuggeeLabels(_flags) _hub_client.Start() diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 8d978b1..365bf33 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -30,19 +30,19 @@ import apiclient -from apiclient import discovery # pylint: disable=unused-import +import apiclient.discovery +import google_auth_httplib2 import httplib2 -import oauth2client -from oauth2client import service_account -from oauth2client.contrib.gce import AppAssertionCredentials import six +import google.auth +from google.oauth2 import service_account + from . import labels from . import backoff from . import cdbg_native as native from . import uniquifier_computer from . import version - # This module catches all exception. This is safe because it runs in # a daemon thread (so we are not blocking Ctrl+C). We need to catch all # the exception because HTTP client is unpredictable as far as every @@ -50,11 +50,7 @@ # pylint: disable=broad-except # API scope we are requesting when service account authentication is enabled. -_CLOUD_PLATFORM_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' - -# Base URL for metadata service. Specific attributes are appended to this URL. -_LOCAL_METADATA_SERVICE_PROJECT_URL = ('http://metadata.google.internal/' - 'computeMetadata/v1/project/') +_CLOUD_PLATFORM_SCOPE = ['https://www.googleapis.com/auth/cloud-platform'] # Set of all known debuggee labels (passed down as flags). The value of # a map is optional environment variable that can be used to set the flag @@ -77,15 +73,19 @@ _HTTP_TIMEOUT_SECONDS = 100 +class NoProjectIdError(Exception): + """Used to indicate the project id cannot be determined.""" + + class GcpHubClient(object): """Controller API client. Registers the debuggee, queries the active breakpoints and sends breakpoint updates to the backend. - This class supports two types of authentication: metadata service and service - account. The mode is selected by calling EnableServiceAccountAuth or - EnableGceAuth method. + This class supports two types of authentication: application default + credentials or a manually provided JSON credentials file for a service + account. GcpHubClient creates a worker thread that communicates with the backend. The thread can be stopped with a Stop function, but it is optional since the @@ -126,7 +126,7 @@ def filter(self, record): return False return True self._log_filter = _ChildLogFilter({logging.INFO}) - discovery.logger.addFilter(self._log_filter) + apiclient.discovery.logger.addFilter(self._log_filter) # # Configuration options (constants only modified by unit test) @@ -150,9 +150,6 @@ def InitializeDebuggeeLabels(self, flags): Debuggee description is formatted from available flags. - Project ID is not set here. It is obtained from metadata service or - specified as a parameter to EnableServiceAccountAuth. - Args: flags: dictionary of debuglet command line flags. """ @@ -176,61 +173,47 @@ def InitializeDebuggeeLabels(self, flags): {name: value for (name, value) in six.iteritems(flags) if name in _DEBUGGEE_LABELS}) - self._debuggee_labels['projectid'] = self._project_id() + self._debuggee_labels['projectid'] = self._project_id - def EnableServiceAccountAuthP12(self, project_id, project_number, - email, p12_file): - """Selects service account authentication with a p12 file. + def SetupAuth(self, + project_id=None, + project_number=None, + service_account_json_file=None): + """Sets up authentication with Google APIs. + + This will use the credentials from service_account_json_file if provided, + falling back to application default credentials. + See https://cloud.google.com/docs/authentication/production. - Using this function is not recommended. Use EnableServiceAccountAuthJson - for authentication, instead. The p12 file format is no longer recommended. Args: - project_id: GCP project ID (e.g. myproject). - project_number: numberic GCP project ID (e.g. 72386324623). - email: service account identifier for use with p12_file - (...@developer.gserviceaccount.com). - p12_file: (deprecated) path to an old-style p12 file with the - private key. + project_id: GCP project ID (e.g. myproject). If not provided, will attempt + to retrieve it from the credentials. + project_number: GCP project number (e.g. 72386324623). If not provided, + project_id will be used in its place. + service_account_json_file: JSON file to use for credentials. If not + provided, will default to application default credentials. Raises: - NotImplementedError indicates that the installed version of oauth2client - does not support using a p12 file. + NoProjectIdError: If the project id cannot be determined. """ - try: - with open(p12_file, 'rb') as f: - self._credentials = oauth2client.client.SignedJwtAssertionCredentials( - email, f.read(), scope=_CLOUD_PLATFORM_SCOPE) - except AttributeError: - raise NotImplementedError( - 'P12 key files are no longer supported. Please use a JSON ' - 'credentials file instead.') - self._project_id = lambda: project_id - self._project_number = lambda: project_number - - def EnableServiceAccountAuthJson(self, project_id, project_number, - auth_json_file): - """Selects service account authentication using Json credentials. + if service_account_json_file: + self._credentials = ( + service_account.Credentials.from_service_account_file( + service_account_json_file, scopes=_CLOUD_PLATFORM_SCOPE)) + if not project_id: + with open(service_account_json_file) as f: + project_id = json.load(f).get('project_id') + else: + self._credentials, credentials_project_id = google.auth.default( + scopes=_CLOUD_PLATFORM_SCOPE) + project_id = project_id or credentials_project_id - Args: - project_id: GCP project ID (e.g. myproject). - project_number: numberic GCP project ID (e.g. 72386324623). - auth_json_file: the JSON keyfile - """ - self._credentials = ( - service_account.ServiceAccountCredentials - .from_json_keyfile_name(auth_json_file, scopes=_CLOUD_PLATFORM_SCOPE)) - self._project_id = lambda: project_id - self._project_number = lambda: project_number - - def EnableGceAuth(self): - """Selects to use local metadata service for authentication. - - The project ID and project number are also retrieved from the metadata - service. It is done lazily from the worker thread. The motivation is to - speed up initialization and be able to recover from failures. - """ - self._credentials = AppAssertionCredentials() - self._project_id = lambda: self._QueryGcpProject('project-id') - self._project_number = lambda: self._QueryGcpProject('numeric-project-id') + if not project_id: + raise NoProjectIdError( + 'Unable to determine the project id from the API credentials. ' + 'Please specify the project id using the --project_id flag.') + + self._project_id = project_id + self._project_number = project_number or project_id def Start(self): """Starts the worker thread.""" @@ -277,7 +260,7 @@ def EnqueueBreakpointUpdate(self, breakpoint): def _BuildService(self): http = httplib2.Http(timeout=_HTTP_TIMEOUT_SECONDS) - http = self._credentials.authorize(http) + http = google_auth_httplib2.AuthorizedHttp(self._credentials, http) api = apiclient.discovery.build( 'clouddebugger', 'v2', http=http, cache_discovery=False) @@ -333,6 +316,13 @@ def _RegisterDebuggee(self, service): try: response = service.debuggees().register(body=request).execute() + # self._project_number will refer to the project id on initialization if + # the project number is not available. The project field in the debuggee + # will always refer to the project number. Update so the server will not + # have to do id->number translations in the future. + project_number = response['debuggee'].get('project') + self._project_number = project_number or self._project_number + self._debuggee_id = response['debuggee']['id'] native.LogInfo('Debuggee registered successfully, ID: %s' % ( self._debuggee_id)) @@ -452,21 +442,6 @@ def _TransmitBreakpointUpdates(self, service): else: return (reconnect, self.update_backoff.Failed()) - def _QueryGcpProject(self, resource): - """Queries project resource on a local metadata service.""" - url = _LOCAL_METADATA_SERVICE_PROJECT_URL + resource - http = httplib2.Http() - response, content = http.request( - url, headers={'Metadata-Flavor': 'Google'}) - if response['status'] != '200': - raise RuntimeError( - 'HTTP error %s %s when querying local metadata service at %s' % - (response['status'], content, url)) - - if not isinstance(content, str): - content = content.decode() - return content - def _GetDebuggee(self): """Builds the debuggee structure.""" major_version = 'v' + version.__version__.split('.')[0] @@ -475,7 +450,7 @@ def _GetDebuggee(self): major_version)) debuggee = { - 'project': self._project_number(), + 'project': self._project_number, 'description': self._GetDebuggeeDescription(), 'labels': self._debuggee_labels, 'agentVersion': agent_version, diff --git a/src/setup.py b/src/setup.py index d4b0e75..8b9ea30 100644 --- a/src/setup.py +++ b/src/setup.py @@ -106,7 +106,13 @@ def ReadConfig(section, value, default): url='https://github.com/GoogleCloudPlatform/cloud-debug-python', author='Google Inc.', version=version, - install_requires=['google-api-python-client', 'pyyaml', 'six>=1.10.0'], + install_requires=[ + 'google-api-python-client', + 'google-auth>=1.0.0', + 'google-auth-httplib2', + 'pyyaml', + 'six>=1.10.0', + ], packages=['googleclouddebugger'], ext_modules=[cdbg_native_module], license='Apache License, Version 2.0', @@ -115,5 +121,5 @@ def ReadConfig(section, value, default): 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.6', 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers' + 'Intended Audience :: Developers', ]) From 627521f8dfe9f30f7b5c46db5b2891397020100d Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Mon, 9 Apr 2018 11:25:20 -0700 Subject: [PATCH 133/241] Increment Python agent version. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=192156908 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index ce00ef0..bc7a0ee 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.7' +__version__ = '2.8' From 63ba7fd1790c08c53d9a354ca886a91d02c77056 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 12 Apr 2018 17:26:09 -0700 Subject: [PATCH 134/241] Update Python agent README with note about the -m googleclouddebugger option. This note is easier to find and more general than the specific section on Django at the end. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=192698195 --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3ab465c..97bf2bf 100644 --- a/README.md +++ b/README.md @@ -101,10 +101,10 @@ run the build script as `PYTHON=python3.6 ./build.sh`. It is possible to use the Python debugger agent without it. Please see the [next section](#outside-google-cloud-platform) for details. -1. Install the Python debugger agent as explained in the +2. Install the Python debugger agent as explained in the [Installation](#installation) section. -2. Enable the debugger in your application using one of the two options: +3. Enable the debugger in your application using one of the two options: _Option A_: add this code to the beginning of your `main()` function: @@ -123,6 +123,10 @@ run the build script as `PYTHON=python3.6 ./build.sh`. python -m googleclouddebugger -- myapp.py + **Note:** This option does not work well with tools such as + `multiprocessing` or `gunicorn`. These tools spawn workers in separate + processes, but the debugger does not get enabled on these worker processes. + Please use _Option A_ instead. ### Outside Google Cloud Platform From 4501fa840a7a20b07853d0d5b878f2165e28ebb4 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Fri, 13 Apr 2018 09:41:59 -0700 Subject: [PATCH 135/241] Add example Dockerfile to show how to build and run the Python debugger on Alpine Linux. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=192781023 --- README.md | 6 ++++++ alpine/Dockerfile | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 alpine/Dockerfile diff --git a/README.md b/README.md index 97bf2bf..c13c977 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,12 @@ To build, the `python3.6` and `python3.6-dev` packages are additionally needed. If Python 3.6 is not the default version of the 'python' command on your system, run the build script as `PYTHON=python3.6 ./build.sh`. +### Alpine Linux + +The Python agent is not regularly tested on Alpine Linux, and support will be on +a best effort basis. The [Dockerfile](alpine/Dockerfile) shows how to build a +minimal image with the agent installed. + ## Setup ### Google Cloud Platform diff --git a/alpine/Dockerfile b/alpine/Dockerfile new file mode 100644 index 0000000..888e448 --- /dev/null +++ b/alpine/Dockerfile @@ -0,0 +1,36 @@ +# WARNING: Stackdriver Debugger is not regularly tested on the Alpine Linux +# platform and support will be on a best effort basis. +# Sample Alpine Linux image including Python and the Stackdriver Debugger agent. +# To build: +# docker build . # Python 2.7 +# docker build --build-arg PYTHON_VERSION=3 . # Python 3.6 +# The final image size should be around 50-60 MiB. + +# Stage 1: Build the agent. +FROM alpine:latest + +ARG PYTHON_VERSION=2 +ENV PYTHON_VERSION=$PYTHON_VERSION +ENV PYTHON=python${PYTHON_VERSION} + +RUN apk update +RUN apk add bash git curl gcc g++ make cmake ${PYTHON}-dev +RUN if [ $PYTHON_VERSION == "2" ]; then apk add py-setuptools; fi + +RUN git clone https://github.com/GoogleCloudPlatform/cloud-debug-python +RUN PYTHON=$PYTHON bash cloud-debug-python/src/build.sh + + +# Stage 2: Create minimal image with just Python and the debugger. +FROM alpine:latest + +ARG PYTHON_VERSION=2 +ENV PYTHON_VERSION=$PYTHON_VERSION +ENV PYTHON=python${PYTHON_VERSION} + +RUN apk --no-cache add $PYTHON libstdc++ +RUN if [ $PYTHON_VERSION == "2" ]; then apk add --no-cache py-setuptools; fi + +COPY --from=0 /cloud-debug-python/src/dist/*.egg . +RUN $PYTHON -m easy_install *.egg +RUN rm *.egg From 165d60bc2ff4c7067a351383c1cdb840ecda1f08 Mon Sep 17 00:00:00 2001 From: mattwach Date: Wed, 16 May 2018 16:34:07 -0700 Subject: [PATCH 136/241] Change TODO -> Note It's not clear if increasing scalability will be needed, thus a TODO is probably overstating things. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=196909307 --- src/googleclouddebugger/glob_data_visibility_policy.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/googleclouddebugger/glob_data_visibility_policy.py b/src/googleclouddebugger/glob_data_visibility_policy.py index 08b7a16..2664acc 100644 --- a/src/googleclouddebugger/glob_data_visibility_policy.py +++ b/src/googleclouddebugger/glob_data_visibility_policy.py @@ -84,8 +84,6 @@ def _Matches(path, pattern_list): Returns: True if path matches any wildcard found in pattern_list. """ - # TODO(mattwach): This code does not scale to large pattern_list - # sizes. For now, keep things logically simple but consider a - # more optimized solution in the future. + # Note: This code does not scale to large pattern_list sizes. return any(fnmatch.fnmatchcase(path, pattern) for pattern in pattern_list) From 49885517d3bdb94fa517250d7a7f7025c5aec497 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Wed, 16 May 2018 17:06:00 -0700 Subject: [PATCH 137/241] Remove "use_new_module_search" option and all old-module-search files. This feature has been released and validated in GAE prod (and all other environments, as well). Follow-up CLs: 1. Remove the flag and tag on GAE (which will become no-op with this CL). 2. Move *2.py files back to *.py files. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=196913948 --- src/googleclouddebugger/__init__.py | 4 +- .../breakpoints_manager.py | 9 +- src/googleclouddebugger/imphook.py | 145 ----------------- src/googleclouddebugger/module_search.py | 154 ------------------ src/googleclouddebugger/module_utils.py | 78 --------- src/googleclouddebugger/python_breakpoint.py | 54 +----- 6 files changed, 10 insertions(+), 434 deletions(-) delete mode 100644 src/googleclouddebugger/imphook.py delete mode 100644 src/googleclouddebugger/module_search.py delete mode 100644 src/googleclouddebugger/module_utils.py diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 1c7d288..a30b732 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -52,12 +52,10 @@ def _StartDebugger(): _hub_client = gcp_hub_client.GcpHubClient() visibility_policy = _GetVisibilityPolicy() - use_new_module_search = _flags.get('use_new_module_search', True) _breakpoints_manager = breakpoints_manager.BreakpointsManager( _hub_client, - visibility_policy, - use_new_module_search=use_new_module_search) + visibility_policy) # Set up loggers for logpoints. capture_collector.SetLogger(logging.getLogger()) diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index 4865eee..6a5496b 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -38,17 +38,13 @@ class BreakpointsManager(object): breakpoint updates back to the backend. data_visibility_policy: An object used to determine the visibiliy of a captured variable. May be None if no policy is available. - use_new_module_search: If true, the new module search algorithm will be - used. """ def __init__(self, hub_client, - data_visibility_policy, - use_new_module_search=False): + data_visibility_policy): self._hub_client = hub_client self.data_visibility_policy = data_visibility_policy - self.use_new_module_search = use_new_module_search # Lock to synchronize access to data across multiple threads. self._lock = RLock() @@ -86,8 +82,7 @@ def SetActiveBreakpoints(self, breakpoints_data): x, self._hub_client, self, - self.data_visibility_policy, - self.use_new_module_search)) + self.data_visibility_policy)) for x in breakpoints_data if x['id'] in ids - six.viewkeys(self._active) - self._completed]) diff --git a/src/googleclouddebugger/imphook.py b/src/googleclouddebugger/imphook.py deleted file mode 100644 index aeb1ee8..0000000 --- a/src/googleclouddebugger/imphook.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Support for breakpoints on modules that haven't been loaded yet. - -This is the old module import hook which: - 1. Takes full path of the module with file extension as input. - 2. At each (top-level-only) import statement: - a. Uses path to guess all possible names the module may be loaded as. - b. Checks sys.modules if there are any modules loaded with those names, - using exact path match between the __file__ attribute of the module - from sys.modules and the input path. - -For the new module import hook, see imphook2.py file. -""" - -import os -# Must be imported, otherwise import hooks don't work. -import sys # pylint: disable=unused-import -import threading - -from six.moves import builtins # pylint: disable=redefined-builtin - -from . import module_utils - -# Callbacks to invoke when a module is imported. -_import_callbacks = {} -_import_callbacks_lock = threading.Lock() - -# Per thread data holding information about the import call nest level. -_import_local = threading.local() - -# Original __import__ function if import hook is installed or None otherwise. -_real_import = None - - -def AddImportCallback(abspath, callback): - """Register import hook. - - This function overrides the default import process. Then whenever a module - corresponding to abspath is imported, the callback will be invoked. - - A module may be imported multiple times. Import event only means that the - Python code contained an "import" statement. The actual loading and - initialization of a new module normally happens only once, at which time - the callback will be invoked. This function does not validates the existence - of such a module and it's the responsibility of the caller. - - TODO(erezh): handle module reload. - - Args: - abspath: python module file absolute path. - callback: callable to invoke upon module load. - - Returns: - Function object to invoke to remove the installed callback. - """ - - def RemoveCallback(): - # This is a read-if-del operation on _import_callbacks. Lock to prevent - # callbacks from being inserted just before the key is deleted. Thus, it - # must be locked also when inserting a new entry below. On the other hand - # read only access, in the import hook, does not require a lock. - with _import_callbacks_lock: - callbacks = _import_callbacks.get(path) - if callbacks: - callbacks.remove(callback) - if not callbacks: - del _import_callbacks[path] - - path, unused_ext = os.path.splitext(abspath) - with _import_callbacks_lock: - _import_callbacks.setdefault(path, set()).add(callback) - _InstallImportHook() - - return RemoveCallback - - -def _InstallImportHook(): - """Lazily installs import hook.""" - - global _real_import - - if _real_import: - return # Import hook already installed - - _real_import = getattr(builtins, '__import__') - assert _real_import - - builtins.__import__ = _ImportHook - - -# pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield -def _ImportHook(name, globals=None, locals=None, fromlist=None, level=-1): - """Callback when a module is being imported by Python interpreter. - - Argument names have to exactly match those of __import__. Otherwise calls - to __import__ that use keyword syntax will fail: __import('a', fromlist=[]). - """ - - # This is the top call to import (no nesting), init the per-thread nest level. - if getattr(_import_local, 'nest_level', None) is None: - _import_local.nest_level = 0 - - _import_local.nest_level += 1 - - try: - # Really import modules. - module = _real_import(name, globals, locals, fromlist, level) - finally: - _import_local.nest_level -= 1 - - # No need to invoke the callbacks on nested import calls. - if _import_local.nest_level: - return module - - # Optimize common code path when no breakponts are set. - if not _import_callbacks: - return module - - _InvokeImportCallback() - return module - - -def _InvokeImportCallback(): - """Invokes import callbacks for loaded modules.""" - # _import_callbacks might change during iteration because RemoveCallback() - # might delete items. Iterate over a copy to avoid a - # 'dictionary changed size during iteration' error. - for path, callbacks in list(_import_callbacks.items()): - module = module_utils.GetLoadedModuleByPath(path) - if module: - for callback in callbacks.copy(): - callback(module) diff --git a/src/googleclouddebugger/module_search.py b/src/googleclouddebugger/module_search.py deleted file mode 100644 index ce9c763..0000000 --- a/src/googleclouddebugger/module_search.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Inclusive search for module files.""" - -import os -import pkgutil -import sys -import time - -from . import cdbg_native as native -from . import module_utils - - -def _CommonPathPrefix(paths): - """The equivalent of Python 3 os.path.commonpath().""" - prefix = os.path.commonprefix(list(paths)) - prefix_len = prefix.rfind(os.sep) + 1 - return prefix[:prefix_len] - - -def _CommonPathSuffixLen(paths): - """Returns the longest common path suffix len in a list of paths.""" - return len(os.path.commonprefix([path[::-1].split(os.sep) for path in paths])) - - -def _GetIsPackageAndModuleName(path_noext): - """Returns a tuple indicating whether the path is a package and a name.""" - - directory, name = os.path.split(path_noext) - if name != '__init__': - return (False, name) - # It is a package, return the package name. - return (True, os.path.basename(directory)) - - -def _ResolveMultiPath(path, paths): - """Returns a single path if path ambiguity can be resolved.""" - if len(paths) > 1: - candidate = os.path.join(_CommonPathPrefix(paths), path) - if candidate in paths: - return (candidate,) - - candidate = os.path.join(sys.path[0], path) - if candidate in paths: - return (candidate,) - - return paths - - -# TODO(erezh): Ensure we handle whitespace in paths correctly including, -# extension, basename and dirname. -def FindMatchingFiles(location_path): - """Returns a list of absolute filenames of best matching modules/packages.""" - - def AddCandidate(mod_path): - # We must sanitize the module path before using it for proper deduplication. - mod_abspath = module_utils.GetAbsolutePath(mod_path) - suffix_len = _CommonPathSuffixLen([src_path, mod_abspath]) - if suffix_len < longest_suffix_len[0]: - return - if suffix_len > longest_suffix_len[0]: - candidates.clear() - longest_suffix_len[0] = suffix_len - candidates.add(mod_abspath) - - # We measure the time it takes to execute the scan. - start_time = time.time() - num_dirs_scanned = 0 - - # Remove the file extension and identify if it's a package. - src_path, src_ext = os.path.splitext(location_path) - assert src_ext == '.py' - (src_ispkg, src_name) = _GetIsPackageAndModuleName(src_path) - assert src_name - - # Using mutable vars to make them available in nested functions. - - # The set of module/package path w/ no extension. Use AddCandidate() to insert - # into this set. - candidates = set() - - # Init longest_suffix_len to 1 to avoid inserting zero length suffixes. - longest_suffix_len = [1] - - # Search paths for modules and packages, init with system search paths. - search_paths = set(path for path in sys.path) - - # Add search paths from the already loaded packages and add matching modules - # or packages to the candidates list. - for module in sys.modules.values(): - # Extend the search paths with packages path and modules file directory. - # Note that __path__ only exist for packages. - search_paths |= frozenset(getattr(module, '__path__', [])) - mod_path = os.path.splitext(getattr(module, '__file__', ''))[0] - - if not mod_path: - continue - - search_paths.add(os.path.dirname(mod_path)) - # Add loaded modules to the candidates set. - if (src_ispkg, src_name) == _GetIsPackageAndModuleName(mod_path): - AddCandidate(mod_path) - - # Walk the aggregated search path and loook for modules or packages. - # By searching one path at the time we control the module file name - # without having to load it. - # TODO(erezh): consider using the alternative impl in cr/165133821 which - # only uses os file lookup and not using pkgutil. The alternative is faster - # but is making many more assuptions that this impl does not. - while search_paths: - num_dirs_scanned += 1 - path = search_paths.pop() - # Allow other threads to run in case there are many search_paths. - time.sleep(0) - for unused_importer, mod_name, mod_ispkg in pkgutil.iter_modules([path]): - mod_path = os.path.join(path, mod_name) - if mod_ispkg: - search_paths.add(mod_path) - mod_path = os.path.join(mod_path, '__init__') - if src_ispkg == mod_ispkg and src_name == mod_name: - AddCandidate(mod_path) - - # Apply heuristics to resolve multiple matching paths into one. - candidates = _ResolveMultiPath(src_path, candidates) - - # Sort the list to return a stable result to the user. - # TODO(erezh): No need to add the .py extenssion, this is done just for - # compatabilty with current code. Once refactored not to use file extension - # this code can be removed to just return the sorted candidates. - candidates = sorted(path + '.py' for path in candidates) - - # Log scan stats, without the files list to avoid very long output as well as - # the potential leak of system files that the user has no access to. - native.LogInfo( - ('Found %d files matching \'%s\' in %d scanned folders in %f ms') % ( - len(candidates), - location_path, - num_dirs_scanned, - (time.time() - start_time) * 1000)) - - # Return a sorted result for stable report to the user - return candidates diff --git a/src/googleclouddebugger/module_utils.py b/src/googleclouddebugger/module_utils.py deleted file mode 100644 index 606f459..0000000 --- a/src/googleclouddebugger/module_utils.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Provides utility functions for module path processing.""" - -import os -import sys - -from six.moves import xrange # pylint: disable=redefined-builtin - - -def GetAbsolutePath(mod_path): - """Flattens symlinks and indirections in the module path. - - To uniquely identify each module file, the file path must be sanitized - by following all symbolic links and normalizing to an absolute path. - - Note that the module file (i.e., .py/.pyc/.pyo file) itself can be a - symbolic link, but we must *NOT* follow that symbolic link. - - Args: - mod_path: A path that represents a module file. - - Returns: - The sanitized version of mod_path. - """ - pkg_path, file_name = os.path.split(mod_path) - pkg_path = os.path.abspath(os.path.realpath(pkg_path)) - return os.path.join(pkg_path, file_name) - - -def GetLoadedModuleByPath(abspath): - """Returns the loaded module that matches abspath or None if not found.""" - - def GenModuleNames(path): - """Generates all possible module names from path.""" - parts = path.lstrip(os.sep).split(os.sep) - - # For packages, remove the __init__ file name. - if parts[-1] == '__init__': - parts = parts[:-1] - - # Generate module names from part, starting with just the leaf name. - for i in xrange(len(parts) - 1, -1, -1): - yield '.'.join(parts[i::]) - - # If non where matching, it is possible that it's the main module. - yield '__main__' - - # The extenssion is not part of the module matching, remove it. - abspath = os.path.splitext(abspath)[0] - - # Lookup every possible module name for abspath, starting with the leaf name. - # It is much faster than scanning sys.modules and comparing module paths. - for mod_name in GenModuleNames(abspath): - module = sys.modules.get(mod_name) - if not module: - continue - - mod_path = getattr(module, '__file__', None) - if not mod_path: - continue - - # Get the absolute real path (no symlink) for this module. - mod_path = os.path.splitext(GetAbsolutePath(mod_path))[0] - if mod_path == abspath: - return module diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index dad434c..83b615c 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -21,12 +21,9 @@ from . import capture_collector from . import cdbg_native as native -from . import imphook from . import imphook2 from . import module_explorer -from . import module_search from . import module_search2 -from . import module_utils from . import module_utils2 # TODO(vlif): move to messages.py module. @@ -156,7 +153,7 @@ class PythonBreakpoint(object): """ def __init__(self, definition, hub_client, breakpoints_manager, - data_visibility_policy, use_new_module_search=False): + data_visibility_policy): """Class constructor. Tries to set the breakpoint. If the source location is invalid, the @@ -169,8 +166,6 @@ def __init__(self, definition, hub_client, breakpoints_manager, breakpoints_manager: parent object managing active breakpoints. data_visibility_policy: An object used to determine the visibility of a captured variable. May be None if no policy is available. - use_new_module_search: If true, the new module search algorithm will be - used. """ self.definition = definition @@ -213,49 +208,14 @@ def __init__(self, definition, hub_client, breakpoints_manager, 'parameters': [path]}}}) return - # If enabled, then use the new module search algorithm. - if use_new_module_search: - new_path = module_search2.Search(path) - new_module = module_utils2.GetLoadedModuleBySuffix(new_path) + new_path = module_search2.Search(path) + new_module = module_utils2.GetLoadedModuleBySuffix(new_path) - if new_module: - self._ActivateBreakpoint(new_module) - else: - self._import_hook_cleanup = imphook2.AddImportCallbackBySuffix( - new_path, - self._ActivateBreakpoint) - return - - # Otherwise, use the old module search algorithm. - - # Find all module files matching the location path. - paths = module_search.FindMatchingFiles(path) - if not paths: - self._CompleteBreakpoint({ - 'status': { - 'isError': True, - 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': {'format': ERROR_LOCATION_MODULE_NOT_FOUND_0}}}) - return - - if len(paths) > 1: - fmt, params = _MultipleModulesFoundError(path, paths) - self._CompleteBreakpoint({ - 'status': { - 'isError': True, - 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': { - 'format': fmt, - 'parameters': params}}}) - return - - # TODO(erezh): Handle the possible thread race condtion from lookup to hook. - module = module_utils.GetLoadedModuleByPath(paths[0]) - if module: - self._ActivateBreakpoint(module) + if new_module: + self._ActivateBreakpoint(new_module) else: - self._import_hook_cleanup = imphook.AddImportCallback( - paths[0], + self._import_hook_cleanup = imphook2.AddImportCallbackBySuffix( + new_path, self._ActivateBreakpoint) def Clear(self): From 7631fe409788d52762338d3e0583c32916870e71 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Wed, 23 May 2018 13:17:17 -0700 Subject: [PATCH 138/241] Document how to specify module/version for python agent on GitHub docs. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=197777361 --- README.md | 86 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index c13c977..163728d 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ of a running cloud application, at any code location, without stopping or slowing it down. It is not your traditional process debugger but rather an always on, whole app debugger taking snapshots from any instance of the app. -Cloud Debugger is safe for use with production apps or during development. -The Python debugger agent only few milliseconds to the request latency when a -debug snapshot is captured. In most cases, this is not noticeable to users. +Cloud Debugger is safe for use with production apps or during development. The +Python debugger agent only few milliseconds to the request latency when a debug +snapshot is captured. In most cases, this is not noticeable to users. Furthermore, the Python debugger agent does not allow modification of application state in any way, and has close to zero impact on the app instances. @@ -28,19 +28,20 @@ Cloud Debugger consists of 3 primary components: 1. The Python debugger agent (this repo implements one for CPython 2.7, and an experimental one for CPython 3.6). -2. Cloud Debugger service storing and managing snapshots/logpoints. - Explore the API's using - [APIs Explorer](https://developers.google.com/apis-explorer/#p/clouddebugger/v2/). -3. User interface, including a command line interface - [`gcloud debug`](https://cloud.google.com/sdk/gcloud/reference/debug/) and a - Web interface on - [Google Cloud Console](https://console.developers.google.com/debug/). - See the [online help](https://cloud.google.com/debugger/docs/debugging) on - how to use Google Cloud Console Debug page. +2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the + APIs using [APIs + Explorer](https://developers.google.com/apis-explorer/#p/clouddebugger/v2/). +3. User interface, including a command line interface [`gcloud + debug`](https://cloud.google.com/sdk/gcloud/reference/debug/) and a Web + interface on [Google Cloud + Console](https://console.developers.google.com/debug/). See the [online + help](https://cloud.google.com/debugger/docs/debugging) on how to use Google + Cloud Console Debug page. ## Getting Help -1. StackOverflow: http://stackoverflow.com/questions/tagged/google-cloud-debugger +1. StackOverflow: + http://stackoverflow.com/questions/tagged/google-cloud-debugger 2. Send email to: [Cloud Debugger Feedback](mailto:cdbg-feedback@google.com) 3. Send Feedback from Google Cloud Console @@ -118,7 +119,7 @@ minimal image with the agent installed. # Attach Python Cloud Debugger try: import googleclouddebugger - googleclouddebugger.enable() + googleclouddebugger.enable(module='[MODULE]', version='[VERSION]') except ImportError: pass ``` @@ -126,7 +127,9 @@ minimal image with the agent installed. _Option B_: run the debugger agent as a module:
-    python -m googleclouddebugger -- myapp.py
+    python \
+        -m googleclouddebugger --module=[MODULE] --version=[VERSION] -- \
+        myapp.py
     
**Note:** This option does not work well with tools such as @@ -134,6 +137,16 @@ minimal image with the agent installed. processes, but the debugger does not get enabled on these worker processes. Please use _Option A_ instead. + Where, in both cases: + + * `[MODULE]` is the name of your app. This, along with the version, is + used to identify the debug target in the UI.
+ Example values: `MyApp`, `Backend`, or `Frontend`. + + * `[VERSION]` is the app version (for example, the build ID). The UI + displays the running version as `[MODULE] - [VERSION]`.
+ Example values: `v1.0`, `build_147`, or `v20170714`. + ### Outside Google Cloud Platform To use the Python debugger agent on machines not hosted by Google Cloud @@ -180,6 +193,8 @@ account. try: import googleclouddebugger googleclouddebugger.enable( + module='[MODULE]', + version='[VERSION]', service_account_json_file='/path/to/credentials.json') except ImportError: pass @@ -190,6 +205,8 @@ account.
     python \
         -m googleclouddebugger \
+        --module=[MODULE] \
+        --version=[VERSION] \
         --service_account_json_file=/path/to/credentials.json \
         -- \
         myapp.py
@@ -197,12 +214,10 @@ account.
 
 4.  Follow the rest of the steps in the [GCP](#google-cloud-platform) section.
 
-
 ### Django Web Framework
 
 You can use the Cloud Debugger to debug Django web framework applications.
 
-
 The best way to enable the Cloud Debugger with Django is to add the following
 code fragment to your `manage.py` file:
 
@@ -211,13 +226,46 @@ code fragment to your `manage.py` file:
 if os.environ.get('RUN_MAIN') or '--noreload' in sys.argv:
   try:
     import googleclouddebugger
-    googleclouddebugger.enable()
+    googleclouddebugger.enable(module='[MODULE]', version='[VERSION]')
   except ImportError:
     pass
 ```
 
-
 Alternatively, you can pass the `--noreload` flag when running the Django
 `manage.py` and use any one of the option A and B listed earlier. Note that
 using the `--noreload` flag disables the autoreload feature in Django, which
 means local changes to files will not be automatically picked up by Django.
+
+## Flag Reference
+
+The agent offers various flags to configure its behavior. Flags can be specified
+as keyword arguments:
+
+```python
+googleclouddebugger.enable(flag_name='flag_value')
+```
+
+or as command line arguments when running the agent as a module:
+
+```shell
+python -m googleclouddebugger --flag_name=flag_value -- myapp.py
+```
+
+The following flags are available:
+
+`module`: A name for your app. This, along with the version, is used to identify
+the debug target in the UI. 
+Example values: `MyApp`, `Backend`, or `Frontend`. + +`version`: A version for your app. The UI displays the running version as +`[MODULE] - [VERSION]`.
+If not provided, the UI will display the generated debuggee ID instead.
+Example values: `v1.0`, `build_147`, or `v20170714`. + +`service_account_json_file`: Path to JSON credentials of a [service +account](https://cloud.google.com/iam/docs/service-accounts) to use for +authentication. If not provided, the agent will fall back to [Application +Default Credentials](https://cloud.google.com/docs/authentication/production) +which are automatically available on machines hosted on GCP, or can be set via +`gcloud auth application-default login` or the `GOOGLE_APPLICATION_CREDENTIALS` +environment variable. From 1289c13bd918801550bc22a1070e70d93df1c0df Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 24 May 2018 09:20:08 -0700 Subject: [PATCH 139/241] Capture exception.args instead of exception.__dict__. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=197903220 --- src/googleclouddebugger/capture_collector.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index b5966d7..01ffe66 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -540,6 +540,12 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): # TODO(vlif): set value to func_name and type to 'function' return {'value': 'function ' + value.__name__} + if isinstance(value, Exception): + fields = self.CaptureVariablesList( + (('[%d]' % i, x) for i, x in enumerate(value.args)), + depth + 1, EMPTY_COLLECTION, limits) + return {'members': fields, 'type': type(value).__name__} + if can_enqueue: index = self._var_table_index.get(id(value)) if index is None: From 806313c0019e42bec34d0b318b946ad8cac72858 Mon Sep 17 00:00:00 2001 From: emrekultursay Date: Thu, 14 Jun 2018 18:51:09 -0700 Subject: [PATCH 140/241] Support custom breakpoint expiration in Python agent. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=200655812 --- src/googleclouddebugger/python_breakpoint.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 83b615c..27eab63 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -173,6 +173,10 @@ def __init__(self, definition, hub_client, breakpoints_manager, # Breakpoint expiration time. self.expiration_period = timedelta(hours=24) + if self.definition.get('expires_in'): + self.expiration_period = min( + timedelta(definition.get('expires_in').get('seconds', 0)), + self.expiration_period) self._hub_client = hub_client From 0fa32ca9b33c6a30ef00a55df2a0224b4c361b25 Mon Sep 17 00:00:00 2001 From: davidkao Date: Mon, 23 Jul 2018 16:03:04 -0700 Subject: [PATCH 141/241] Upgrade third_party/py/yaml to 3.13 and add the python3 variant. Just for clarification, 3.13 isn't listed in history notes (yet) on https://pyyaml.org/wiki/PyYAML but does seem to be the latest stable release based on the thread here: https://github.com/yaml/pyyaml/issues/193 This cl is largely modeled after cl/174091597, and does the following: + The package is reorganized to match the upstream package layout (a copybara setup should be easy now). + Use py2or3_library to support the two-separate-codebases approach that PyYAML uses for its Python 3 support, both for the library and the tests. + Use 'raise' instead of 'raise exc' (in the local modification to test_appliance.py files) for more useful tracebacks. + Drop the shell test that was failing (and possibly never worked) + Re-compile cythonized extension library using a newer version of cython (0.25.2 vs 0.20.1) + Add a test for yaml_with_c + Convert py_test to py2and3_test targets. + Revise tests/lib3 files to match local modifications made for the py2 tests and fix relative import behavior. + Adjust the googet package definition to the new layout. + Update METADATA + Fix yaml_data_visibility_config_reader which was explicitly working around broken py3 yaml string readin (to bytes) behavior (done in cl/186322439). Tested: TAP global presubmit with reruns (remaining failures appear to be unrelated/already failing in TAP): http://test/OCL:204627853:BASE:205100169:1531958109170:974f06c5 Ran PyYAML's tests (for both Py2 and Py3): blaze test //third_party/py/yaml/... http://sponge/9442b74c-c8b1-4b18-8cfb-5be58a59f5c0 Note that google_tests_with_c.python2 is failing but it is also failing at ToT + cl/204635243 with the same error: http://sponge/94a3cdab-38f2-401d-badf-d7b35330a76b Also manually compared the generated googet package layout following instructions at http://g3doc/corp/winops/g3doc/configmgmt/packaging/googet/creating-packages#TestingaGoogetPackage ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=205740539 --- .../yaml_data_visibility_config_reader.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py index 32ab8ed..ebf4db7 100644 --- a/src/googleclouddebugger/yaml_data_visibility_config_reader.py +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -113,17 +113,16 @@ def Read(f): _CheckData(yaml_data) try: - to_str = lambda v: v.decode() if six.PY3 and isinstance(v, bytes) else v return Config( - [to_str(val) for val in yaml_data.get(b'blacklist', ())], - [to_str(val) for val in yaml_data.get(b'whitelist', ('*'))]) + yaml_data.get('blacklist', ()), + yaml_data.get('whitelist', ('*'))) except UnicodeDecodeError as e: raise YAMLLoadError('%s' % e) def _CheckData(yaml_data): """Checks data for illegal keys and formatting.""" - legal_keys = set((b'blacklist', b'whitelist')) + legal_keys = set(('blacklist', 'whitelist')) unknown_keys = set(yaml_data) - legal_keys if unknown_keys: raise UnknownConfigKeyError( @@ -143,6 +142,6 @@ def _AssertDataIsList(key, lst): # each list entry must be a string for element in lst: - if not isinstance(element, (bytes, str)): + if not isinstance(element, str): raise ElementNotAStringError('Unsupported list element %s found in %s', (element, lst)) From f218724e6a48c0a665e9bb03accaf665004e592d Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Thu, 26 Jul 2018 14:26:29 -0700 Subject: [PATCH 142/241] Add pretty printer for protorpc.messages.Enum. For some reason __dict__ is empty. They do a bunch of weird metaclass stuff and I don't want to figure out why. http://google3/third_party/apphosting/python/protorpc/v1_0/protorpc/messages.py ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=206221417 --- src/googleclouddebugger/appengine_pretty_printers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/googleclouddebugger/appengine_pretty_printers.py b/src/googleclouddebugger/appengine_pretty_printers.py index 16ef31d..036caad 100644 --- a/src/googleclouddebugger/appengine_pretty_printers.py +++ b/src/googleclouddebugger/appengine_pretty_printers.py @@ -16,6 +16,11 @@ import six +try: + from protorpc import messages # pylint: disable=g-import-not-at-top +except ImportError: + messages = None + try: from google.appengine.ext import ndb # pylint: disable=g-import-not-at-top except ImportError: @@ -28,4 +33,7 @@ def PrettyPrinter(obj): if ndb and isinstance(obj, ndb.Model): return six.iteritems(obj.to_dict()), 'ndb.Model(%s)' % type(obj).__name__ + if messages and isinstance(obj, messages.Enum): + return [('name', obj.name), ('number', obj.number)], type(obj).__name__ + return None From f665980509699ec6e6df9ff037056473d3356e66 Mon Sep 17 00:00:00 2001 From: xinghuadou Date: Tue, 14 Aug 2018 10:37:19 -0700 Subject: [PATCH 143/241] Increment python debugger version in preparation for releasing 3.7. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=208671429 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index bc7a0ee..cf952cc 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.8' +__version__ = '2.9' From a1019e08b598c9e4ee1468e00f5f00cae7339d6f Mon Sep 17 00:00:00 2001 From: Hamidreza Asaadi Date: Thu, 15 Nov 2018 11:17:52 -0500 Subject: [PATCH 144/241] experimental support for python 3.7 documented --- README.md | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 163728d..4b67011 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Python Cloud Debugger Agent -Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7. +Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7 and +3.7 (experimental support) ## Overview @@ -27,16 +28,16 @@ tested on Debian Linux, but it should work on other distributions as well. Cloud Debugger consists of 3 primary components: 1. The Python debugger agent (this repo implements one for CPython 2.7, and an - experimental one for CPython 3.6). + experimental one for CPython 3.7). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the - APIs using [APIs - Explorer](https://developers.google.com/apis-explorer/#p/clouddebugger/v2/). -3. User interface, including a command line interface [`gcloud - debug`](https://cloud.google.com/sdk/gcloud/reference/debug/) and a Web - interface on [Google Cloud - Console](https://console.developers.google.com/debug/). See the [online - help](https://cloud.google.com/debugger/docs/debugging) on how to use Google - Cloud Console Debug page. + APIs using + [APIs Explorer](https://developers.google.com/apis-explorer/#p/clouddebugger/v2/). +3. User interface, including a command line interface + [`gcloud debug`](https://cloud.google.com/sdk/gcloud/reference/debug/) and a + Web interface on + [Google Cloud Console](https://console.developers.google.com/debug/). See + the [online help](https://cloud.google.com/debugger/docs/debugging) on how + to use Google Cloud Console Debug page. ## Getting Help @@ -81,12 +82,12 @@ sudo apt-get -y -q --no-install-recommends install \ ### Python 3 -There is experimental support for Python 3.6. Python 3.0 to 3.5 are not -supported, and newer versions have not been tested. +There is experimental support for Python 3.6 and Python 3.7. Python 3.0 to 3.5 +are not supported, and newer versions have not been tested. -To build, the `python3.6` and `python3.6-dev` packages are additionally needed. -If Python 3.6 is not the default version of the 'python' command on your system, -run the build script as `PYTHON=python3.6 ./build.sh`. +To build, the `python3.7` and `python3.7-dev` packages are additionally needed. +If Python 3.7 is not the default version of the 'python' command on your system, +run the build script as `PYTHON=python3.7 ./build.sh`. ### Alpine Linux From 37af43efbcc790f615af9f0129978e8dcff2929b Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Tue, 5 Feb 2019 13:38:31 -0500 Subject: [PATCH 145/241] Project import generated by Copybara. PiperOrigin-RevId: 232514149 Change-Id: Ie0c3361d099ea9812207a1ac703c41544ce386d0 --- src/build.sh | 0 src/googleclouddebugger/__init__.py | 4 +-- src/googleclouddebugger/__main__.py | 4 +++ .../breakpoints_manager.py | 8 +++++ src/googleclouddebugger/capture_collector.py | 2 +- src/googleclouddebugger/common.h | 2 -- src/googleclouddebugger/gcp_hub_client.py | 2 +- src/googleclouddebugger/labels.py | 8 ++--- src/googleclouddebugger/leaky_bucket.h | 1 - src/googleclouddebugger/module_explorer.py | 7 ++++ src/googleclouddebugger/native_module.cc | 10 +++--- src/googleclouddebugger/nullable.h | 1 - src/googleclouddebugger/python_breakpoint.py | 33 +++++++++++++++++++ 13 files changed, 64 insertions(+), 18 deletions(-) mode change 100644 => 100755 src/build.sh diff --git a/src/build.sh b/src/build.sh old mode 100644 new mode 100755 diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index a30b732..8d16481 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -33,7 +33,7 @@ from . import gcp_hub_client from . import glob_data_visibility_policy from . import yaml_data_visibility_config_reader -from . import cdbg_native +# MOE:insert from . import cdbg_native from . import version __version__ = version.__version__ @@ -48,7 +48,7 @@ def _StartDebugger(): global _hub_client global _breakpoints_manager - cdbg_native.InitializeModule(_flags) + # MOE:insert cdbg_native.InitializeModule(_flags) _hub_client = gcp_hub_client.GcpHubClient() visibility_policy = _GetVisibilityPolicy() diff --git a/src/googleclouddebugger/__main__.py b/src/googleclouddebugger/__main__.py index b6ef937..46a72e4 100644 --- a/src/googleclouddebugger/__main__.py +++ b/src/googleclouddebugger/__main__.py @@ -14,6 +14,10 @@ """Entry point for Python Cloud Debugger.""" # pylint: disable=invalid-name +# MOE:begin_strip +# This is executed when the debugger is started as a module. This is not +# how things are done in google3, so lint is unhappy about it. +# MOE:end_strip if __name__ == '__main__': import googleclouddebugger diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index 6a5496b..b4eba0c 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -129,6 +129,14 @@ def CheckBreakpointsExpiration(self): for breakpoint in expired_breakpoints: breakpoint.ExpireBreakpoint() + # MOE:begin_strip + def CheckCanariesApproval(self): + """Approves breakpoint canaries that have been healthy for long enough.""" + if getattr(self._hub_client, 'is_canary_task', False): + with self._lock: + for breakpoint in six.itervalues(self._active): + breakpoint.ApproveCanaryIfNeeded() + # MOE:end_strip @staticmethod def GetCurrentTime(): diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 01ffe66..38acb51 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -29,8 +29,8 @@ import six -from . import labels from . import cdbg_native as native +from . import labels # Externally defined functions to actually log a message. If these variables # are not initialized, the log action for breakpoints is invalid. diff --git a/src/googleclouddebugger/common.h b/src/googleclouddebugger/common.h index 98d393f..7d255eb 100644 --- a/src/googleclouddebugger/common.h +++ b/src/googleclouddebugger/common.h @@ -17,7 +17,6 @@ #ifndef DEVTOOLS_CDBG_DEBUGLETS_PYTHON_COMMON_H_ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_COMMON_H_ - // // Open source includes and definition of common constants. // @@ -60,7 +59,6 @@ using google::LogSeverity; using google::AddLogSink; using google::RemoveLogSink; - // Python 3 compatibility #if PY_MAJOR_VERSION >= 3 // Python 2 has both an 'int' and a 'long' type, and Python 3 only as an 'int' diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 365bf33..c399a06 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -38,9 +38,9 @@ import google.auth from google.oauth2 import service_account -from . import labels from . import backoff from . import cdbg_native as native +from . import labels from . import uniquifier_computer from . import version # This module catches all exception. This is safe because it runs in diff --git a/src/googleclouddebugger/labels.py b/src/googleclouddebugger/labels.py index 6e2999e..8070f82 100644 --- a/src/googleclouddebugger/labels.py +++ b/src/googleclouddebugger/labels.py @@ -14,7 +14,7 @@ """Defines the keys of the well known labels used by the cloud debugger. -DO NOT EDIT - This file is auto-generated +DO NOT EDIT """ @@ -23,7 +23,8 @@ class Breakpoint(object): SET_ALL = frozenset([ 'requestlogid', - ]) + ]) + class Debuggee(object): DOMAIN = 'domain' @@ -38,5 +39,4 @@ class Debuggee(object): 'module', 'version', 'minorversion', - ]) - + ]) diff --git a/src/googleclouddebugger/leaky_bucket.h b/src/googleclouddebugger/leaky_bucket.h index 0547539..4e0d400 100644 --- a/src/googleclouddebugger/leaky_bucket.h +++ b/src/googleclouddebugger/leaky_bucket.h @@ -63,7 +63,6 @@ class LeakyBucket { // indicates the current time in nanoseconds. int64 RefillBucket(int64 available_tokens, int64 current_time_ns); - // Atomically increment "tokens_". inline int64 AtomicIncrementTokens(int64 increment) { return tokens_ += increment; diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index b5ba8d5..8b32dfa 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -180,6 +180,13 @@ def CheckIgnoreCodeObject(code_object): if code_object_file == module_file: return False + # MOE:begin_strip + # "code_object.co_filename" has a google3 relative path in .par files, + # while "module.__file__" has a full path. + if (code_object_file.startswith('google3/') and + module_file.endswith(code_object_file)): + return False + # MOE:end_strip return True diff --git a/src/googleclouddebugger/native_module.cc b/src/googleclouddebugger/native_module.cc index f1e1ced..47ef4f6 100644 --- a/src/googleclouddebugger/native_module.cc +++ b/src/googleclouddebugger/native_module.cc @@ -16,7 +16,6 @@ // Ensure that Python.h is included before any other header. #include "common.h" - #include "bytecode_breakpoint.h" #include "common.h" #include "conditional_breakpoint.h" @@ -131,7 +130,6 @@ static PyObject* InitializeModule(PyObject* self, PyObject* py_args) { Py_RETURN_NONE; } - // Common code for LogXXX functions. // // The source file name and the source line are obtained automatically by @@ -356,10 +354,10 @@ static PyObject* ApplyDynamicLogsQuota(PyObject* self, PyObject* py_args) { static PyMethodDef g_module_functions[] = { { - "InitializeModule", - InitializeModule, - METH_VARARGS, - "Initialize C++ flags and logging." + "InitializeModule", + InitializeModule, + METH_VARARGS, + "Initialize C++ flags and logging." }, { "LogInfo", diff --git a/src/googleclouddebugger/nullable.h b/src/googleclouddebugger/nullable.h index a70ddb7..88703c3 100644 --- a/src/googleclouddebugger/nullable.h +++ b/src/googleclouddebugger/nullable.h @@ -17,7 +17,6 @@ #ifndef DEVTOOLS_CDBG_DEBUGLETS_PYTHON_NULLABLE_H_ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_NULLABLE_H_ - #include "common.h" namespace devtools { diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 27eab63..49c79ba 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -79,6 +79,13 @@ 'refersTo': 'BREAKPOINT_CONDITION', 'description': {'format': ERROR_CONDITION_MUTABLE_0}})]) +# MOE:begin_strip +# Time to wait after setting a canary breakpoint to approve it. +# Since on_idle is typically called every 40 seconds because of the hanging get +# on ListActiveBreakpoints, this is value is effectively rounded up to the +# nearest multiple of 40 seconds. +_CANARY_APPROVAL_TIME_DELTA = timedelta(seconds=35) +# MOE:end_strip # The implementation of datetime.strptime imports an undocumented module called # _strptime. If it happens at the wrong time, we can get an exception about @@ -178,6 +185,11 @@ def __init__(self, definition, hub_client, breakpoints_manager, timedelta(definition.get('expires_in').get('seconds', 0)), self.expiration_period) + # MOE:begin_strip + self.is_canary = self.definition.get('isCanary', False) + self._canary_approval_time = ( + breakpoints_manager.GetCurrentTime() + _CANARY_APPROVAL_TIME_DELTA) + # MOE:end_strip self._hub_client = hub_client self._breakpoints_manager = breakpoints_manager @@ -267,6 +279,20 @@ def ExpireBreakpoint(self): 'refersTo': 'BREAKPOINT_AGE', 'description': {'format': message}}}) + # MOE:begin_strip + def ApproveCanaryIfNeeded(self): + """Approves this canary breakpoint if the needed amount of time has passed. + + Each breakpoint will only be approved at most one time. + """ + # This doesn't get MOEified out on App Engine, so make sure this is done + # only for StubbyHubClient. + if (self.is_canary and + self._canary_approval_time < self._breakpoints_manager.GetCurrentTime() + and getattr(self._hub_client, 'EnqueueBreakpointCanaryApproval', None)): + self.is_canary = False + self._hub_client.EnqueueBreakpointCanaryApproval(self.definition) + # MOE:end_strip def _ActivateBreakpoint(self, module): """Sets the breakpoint in the loaded module, or complete with error.""" @@ -342,6 +368,13 @@ def _ActivateBreakpoint(self, module): line, condition, self._BreakpointEvent) + # MOE:begin_strip + # This doesn't get MOEified out on App Engine, so make sure this is done + # only for StubbyHubClient. + if (self.is_canary and + getattr(self._hub_client, 'EnqueueBreakpointCanaryRegistration', None)): + self._hub_client.EnqueueBreakpointCanaryRegistration(self.definition) + # MOE:end_strip def _RemoveImportHook(self): """Removes the import hook if one was installed.""" From f7e47a7bdecb936fd0167f5e14ac7c1890ed72cb Mon Sep 17 00:00:00 2001 From: Hamid Asaadi Date: Tue, 5 Feb 2019 13:41:11 -0800 Subject: [PATCH 146/241] Migrate scrubbing hints from MOE to Copybara (Python code) Also all duplicate scrubbing from common/ package is removed PiperOrigin-RevId: 232549429 Change-Id: I87a92b71058d3472d2fb900163db2f38e0a2a3eb --- src/googleclouddebugger/__init__.py | 4 +-- src/googleclouddebugger/__main__.py | 5 --- .../breakpoints_manager.py | 9 ----- src/googleclouddebugger/module_explorer.py | 8 ----- src/googleclouddebugger/python_breakpoint.py | 36 ------------------- 5 files changed, 2 insertions(+), 60 deletions(-) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 8d16481..a30b732 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -33,7 +33,7 @@ from . import gcp_hub_client from . import glob_data_visibility_policy from . import yaml_data_visibility_config_reader -# MOE:insert from . import cdbg_native +from . import cdbg_native from . import version __version__ = version.__version__ @@ -48,7 +48,7 @@ def _StartDebugger(): global _hub_client global _breakpoints_manager - # MOE:insert cdbg_native.InitializeModule(_flags) + cdbg_native.InitializeModule(_flags) _hub_client = gcp_hub_client.GcpHubClient() visibility_policy = _GetVisibilityPolicy() diff --git a/src/googleclouddebugger/__main__.py b/src/googleclouddebugger/__main__.py index 46a72e4..1f55572 100644 --- a/src/googleclouddebugger/__main__.py +++ b/src/googleclouddebugger/__main__.py @@ -14,11 +14,6 @@ """Entry point for Python Cloud Debugger.""" # pylint: disable=invalid-name -# MOE:begin_strip -# This is executed when the debugger is started as a module. This is not -# how things are done in google3, so lint is unhappy about it. -# MOE:end_strip - if __name__ == '__main__': import googleclouddebugger googleclouddebugger._DebuggerMain() diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index b4eba0c..07f4094 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -129,15 +129,6 @@ def CheckBreakpointsExpiration(self): for breakpoint in expired_breakpoints: breakpoint.ExpireBreakpoint() - # MOE:begin_strip - def CheckCanariesApproval(self): - """Approves breakpoint canaries that have been healthy for long enough.""" - if getattr(self._hub_client, 'is_canary_task', False): - with self._lock: - for breakpoint in six.itervalues(self._active): - breakpoint.ApproveCanaryIfNeeded() - # MOE:end_strip - @staticmethod def GetCurrentTime(): """Wrapper around datetime.now() function. diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index 8b32dfa..75edb05 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -180,14 +180,6 @@ def CheckIgnoreCodeObject(code_object): if code_object_file == module_file: return False - # MOE:begin_strip - # "code_object.co_filename" has a google3 relative path in .par files, - # while "module.__file__" has a full path. - if (code_object_file.startswith('google3/') and - module_file.endswith(code_object_file)): - return False - # MOE:end_strip - return True def CheckIgnoreClass(cls): diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 49c79ba..0e5869c 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -79,14 +79,6 @@ 'refersTo': 'BREAKPOINT_CONDITION', 'description': {'format': ERROR_CONDITION_MUTABLE_0}})]) -# MOE:begin_strip -# Time to wait after setting a canary breakpoint to approve it. -# Since on_idle is typically called every 40 seconds because of the hanging get -# on ListActiveBreakpoints, this is value is effectively rounded up to the -# nearest multiple of 40 seconds. -_CANARY_APPROVAL_TIME_DELTA = timedelta(seconds=35) -# MOE:end_strip - # The implementation of datetime.strptime imports an undocumented module called # _strptime. If it happens at the wrong time, we can get an exception about # trying to import while another thread holds the import lock. This dummy call @@ -185,12 +177,6 @@ def __init__(self, definition, hub_client, breakpoints_manager, timedelta(definition.get('expires_in').get('seconds', 0)), self.expiration_period) - # MOE:begin_strip - self.is_canary = self.definition.get('isCanary', False) - self._canary_approval_time = ( - breakpoints_manager.GetCurrentTime() + _CANARY_APPROVAL_TIME_DELTA) - # MOE:end_strip - self._hub_client = hub_client self._breakpoints_manager = breakpoints_manager self._cookie = None @@ -279,21 +265,6 @@ def ExpireBreakpoint(self): 'refersTo': 'BREAKPOINT_AGE', 'description': {'format': message}}}) - # MOE:begin_strip - def ApproveCanaryIfNeeded(self): - """Approves this canary breakpoint if the needed amount of time has passed. - - Each breakpoint will only be approved at most one time. - """ - # This doesn't get MOEified out on App Engine, so make sure this is done - # only for StubbyHubClient. - if (self.is_canary and - self._canary_approval_time < self._breakpoints_manager.GetCurrentTime() - and getattr(self._hub_client, 'EnqueueBreakpointCanaryApproval', None)): - self.is_canary = False - self._hub_client.EnqueueBreakpointCanaryApproval(self.definition) - # MOE:end_strip - def _ActivateBreakpoint(self, module): """Sets the breakpoint in the loaded module, or complete with error.""" @@ -368,13 +339,6 @@ def _ActivateBreakpoint(self, module): line, condition, self._BreakpointEvent) - # MOE:begin_strip - # This doesn't get MOEified out on App Engine, so make sure this is done - # only for StubbyHubClient. - if (self.is_canary and - getattr(self._hub_client, 'EnqueueBreakpointCanaryRegistration', None)): - self._hub_client.EnqueueBreakpointCanaryRegistration(self.definition) - # MOE:end_strip def _RemoveImportHook(self): """Removes the import hook if one was installed.""" From 2b2c1f7f461b9a04ddf7f4c16fefd1418cdb772e Mon Sep 17 00:00:00 2001 From: Hamid Asaadi Date: Fri, 22 Feb 2019 07:54:52 -0800 Subject: [PATCH 147/241] Bump version for python agents PiperOrigin-RevId: 235191810 Change-Id: Iee6096c756adf5c55f221f7dd07b0d1d804b0bef --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index cf952cc..6dd18e5 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.9' +__version__ = '2.10' From 98ae44e9e0b52f6c6874753695e8e21c9598969a Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 22 Feb 2019 11:55:01 -0800 Subject: [PATCH 148/241] Add support for Cloud Run environment variables. The Python debug agent will now: Use K_SERVICE to populate the service context (if available). Use K_REVISION to populate the version context (if available). PiperOrigin-RevId: 235231863 Change-Id: Ia0cba9465b2b0b44f86411ae5692d41c184e353c --- src/googleclouddebugger/gcp_hub_client.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index c399a06..acc5ea0 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -56,8 +56,10 @@ # a map is optional environment variable that can be used to set the flag # (flags still take precedence). _DEBUGGEE_LABELS = { - labels.Debuggee.MODULE: ['GAE_SERVICE', 'GAE_MODULE_NAME'], - labels.Debuggee.VERSION: ['GAE_VERSION', 'GAE_MODULE_VERSION'], + labels.Debuggee.MODULE: ['GAE_SERVICE', 'GAE_MODULE_NAME', 'K_SERVICE'], + labels.Debuggee.VERSION: [ + 'GAE_VERSION', 'GAE_MODULE_VERSION', 'K_REVISION' + ], labels.Debuggee.MINOR_VERSION: ['GAE_DEPLOYMENT_ID', 'GAE_MINOR_VERSION'] } From 89ce3782c98b814838a3ecb5479ed3882368cbee Mon Sep 17 00:00:00 2001 From: Hamid Asaadi Date: Fri, 8 Mar 2019 12:08:06 -0800 Subject: [PATCH 149/241] Update external build metadata to state explicit support for Python 3.7. The OSS issue is here: https://github.com/GoogleCloudPlatform/cloud-debug-python/issues/8 PiperOrigin-RevId: 237494949 Change-Id: I7f0b17164baf6f51007ad523d44e0a25e10e8e89 --- src/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/setup.py b/src/setup.py index 8b9ea30..0564757 100644 --- a/src/setup.py +++ b/src/setup.py @@ -120,6 +120,7 @@ def ReadConfig(section, value, default): classifiers=[ 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', ]) From 2f30fe874ca8e07cd3ec2872890a63e51e571f13 Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Mon, 25 Mar 2019 08:35:48 -0700 Subject: [PATCH 150/241] Formatting fixes. PiperOrigin-RevId: 240144984 Change-Id: I30d7c49cc7bf2856e9effe1d9bdc19299db09740 --- src/googleclouddebugger/common.h | 1 - src/googleclouddebugger/native_module.cc | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/common.h b/src/googleclouddebugger/common.h index 7d255eb..75889d2 100644 --- a/src/googleclouddebugger/common.h +++ b/src/googleclouddebugger/common.h @@ -17,7 +17,6 @@ #ifndef DEVTOOLS_CDBG_DEBUGLETS_PYTHON_COMMON_H_ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_COMMON_H_ -// // Open source includes and definition of common constants. // diff --git a/src/googleclouddebugger/native_module.cc b/src/googleclouddebugger/native_module.cc index 47ef4f6..86eaf4b 100644 --- a/src/googleclouddebugger/native_module.cc +++ b/src/googleclouddebugger/native_module.cc @@ -16,6 +16,7 @@ // Ensure that Python.h is included before any other header. #include "common.h" + #include "bytecode_breakpoint.h" #include "common.h" #include "conditional_breakpoint.h" From da3bb1401a18ff603a26bae2e2bd2eaebcc697d4 Mon Sep 17 00:00:00 2001 From: Jason Borg Date: Thu, 4 Apr 2019 14:08:38 -0700 Subject: [PATCH 151/241] Updated the python agent to not override any pre existing labels in the breakpoint if there was a conflict with a label it wanted to add. Expected agent behaviour with respect to pre existing breakpoint labels is that they are not removed or overriden by the agent. PiperOrigin-RevId: 242004674 Change-Id: I218c615b715ae74ea958f1e8d509a7e07668378f --- src/googleclouddebugger/capture_collector.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 38acb51..300b6db 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -642,7 +642,7 @@ def _CaptureEnvironmentLabels(self): if callable(breakpoint_labels_collector): for (key, value) in six.iteritems(breakpoint_labels_collector()): - self.breakpoint['labels'][key] = value + self._StoreLabel(key, value) def _CaptureRequestLogId(self): """Captures the request log id if possible. @@ -654,8 +654,7 @@ def _CaptureRequestLogId(self): request_log_id = request_log_id_collector() if request_log_id: # We have a request_log_id, save it into the breakpoint labels - self.breakpoint['labels'][ - labels.Breakpoint.REQUEST_LOG_ID] = request_log_id + self._StoreLabel(labels.Breakpoint.REQUEST_LOG_ID, request_log_id) def _CaptureUserId(self): """Captures the user id of the end user, if possible.""" @@ -663,6 +662,20 @@ def _CaptureUserId(self): if user_kind and user_id: self.breakpoint['evaluatedUserId'] = {'kind': user_kind, 'id': user_id} + def _StoreLabel(self, name, value): + """Stores the specified label in the breakpoint's labels. + + In the event of a duplicate label, favour the pre-existing labels. This + generally should not be an issue as the pre-existing client label names are + chosen with care and there should be no conflicts. + + Args: + name: The name of the label to be stored. + value: The value of the label to be stored. + """ + if name not in self.breakpoint['labels']: + self.breakpoint['labels'][name] = value + class LogCollector(object): """Captures minimal application snapshot and logs it to application log. From 4ca478a85929d42366e71b1dec1ca1a0f4b54abb Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Tue, 7 May 2019 11:45:39 -0700 Subject: [PATCH 152/241] Replace unsafe YAML loads with safe loads. PiperOrigin-RevId: 247060974 Change-Id: I08dec7abea9b7ec06454c2c42bff838f3a02250a --- src/googleclouddebugger/yaml_data_visibility_config_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py index ebf4db7..7f8e178 100644 --- a/src/googleclouddebugger/yaml_data_visibility_config_reader.py +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -104,7 +104,7 @@ def Read(f): Error (some subclass): If there is a problem loading or parsing the file. """ try: - yaml_data = yaml.load(f) + yaml_data = yaml.safe_load(f) except yaml.YAMLError as e: raise ParseError('%s' % e) except IOError as e: From 8ff2fdbac8786177c2e5ac7eb949b51171786bc4 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 8 May 2019 12:07:06 -0700 Subject: [PATCH 153/241] Modernize exception handling syntax. PiperOrigin-RevId: 247262373 Change-Id: Ib6d0ceb49fed6c64c7942427a90e8ac33b560d4f --- src/googleclouddebugger/yaml_data_visibility_config_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py index 7f8e178..198af80 100644 --- a/src/googleclouddebugger/yaml_data_visibility_config_reader.py +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -17,7 +17,7 @@ Example Usage: try: config = yaml_data_visibility_config_reader.OpenAndRead(filename) - except yaml_data_visibility_config_reader.Error, e: + except yaml_data_visibility_config_reader.Error as e: ... visibility_policy = GlobDataVisibilityPolicy( From 4b4f28a04cf1ad7dcf0f7374a832738c5f9f5880 Mon Sep 17 00:00:00 2001 From: Hamid Asaadi Date: Thu, 9 May 2019 07:20:02 -0700 Subject: [PATCH 154/241] Bump python agent version for Cloud Run support. PiperOrigin-RevId: 247419773 Change-Id: I9c3d6d4ca97604946394f75030f9007d54725b83 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 6dd18e5..ac49d7c 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.10' +__version__ = '2.11' From 2bbc6aa7339d3d07b17515949a566cd8af385d7d Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 9 May 2019 11:56:01 -0700 Subject: [PATCH 155/241] Fix some typos. PiperOrigin-RevId: 247469722 Change-Id: I43e56eacc93dbde4e4450ec56f40e35576e7e43c --- src/googleclouddebugger/capture_collector.py | 4 ++-- src/googleclouddebugger/gcp_hub_client.py | 2 +- src/googleclouddebugger/glob_data_visibility_policy.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 300b6db..47cd775 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -400,12 +400,12 @@ def CaptureNamedVariable(self, name, value, depth, limits): name = str(id(name)) self._total_size += len(name) - v = (self.CheckDataVisiblity(value) or + v = (self.CheckDataVisibility(value) or self.CaptureVariable(value, depth, limits)) v['name'] = name return v - def CheckDataVisiblity(self, value): + def CheckDataVisibility(self, value): """Returns a status object if the given name is not visible. Args: diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index acc5ea0..a6528fa 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -146,7 +146,7 @@ def filter(self, record): def InitializeDebuggeeLabels(self, flags): """Initialize debuggee labels from environment variables and flags. - The caller passes all the flags that the the debuglet got. This function + The caller passes all the flags that the debuglet got. This function will only use the flags used to label the debuggee. Flags take precedence over environment variables. diff --git a/src/googleclouddebugger/glob_data_visibility_policy.py b/src/googleclouddebugger/glob_data_visibility_policy.py index 2664acc..00255ef 100644 --- a/src/googleclouddebugger/glob_data_visibility_policy.py +++ b/src/googleclouddebugger/glob_data_visibility_policy.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Determines the visibilty of python data and symbols. +"""Determines the visibility of python data and symbols. Example Usage: From 16a30bccbe43b09bb973bb696f0869fa523c6c9c Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 19 Jun 2019 07:40:05 -0700 Subject: [PATCH 156/241] Retry breakpoint updates on socket error. Register and list breakpoint operations already behave in a sane manner on socket errors, but update breakpoint will currently drop the update and never retry it. PiperOrigin-RevId: 253993634 Change-Id: I253e6dc441fd2d306be27d8b728172019156ca44 --- src/googleclouddebugger/gcp_hub_client.py | 27 +++++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index a6528fa..7316669 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -22,6 +22,7 @@ import logging import os import platform +import socket import sys import threading import time @@ -418,17 +419,29 @@ def _TransmitBreakpointUpdates(self, service): # not be retried. All other errors are assumed to be transient. status = err.resp.status is_transient = ((status >= 500) or (status == 408)) - if is_transient and retry_count < self.max_transmit_attempts - 1: - native.LogInfo('Failed to send breakpoint %s update: %s' % ( - breakpoint['id'], traceback.format_exc())) - retry_list.append((breakpoint, retry_count + 1)) - elif is_transient: - native.LogWarning( - 'Breakpoint %s retry count exceeded maximum' % breakpoint['id']) + if is_transient: + if retry_count < self.max_transmit_attempts - 1: + native.LogInfo('Failed to send breakpoint %s update: %s' % + (breakpoint['id'], traceback.format_exc())) + retry_list.append((breakpoint, retry_count + 1)) + else: + native.LogWarning('Breakpoint %s retry count exceeded maximum' % + breakpoint['id']) else: # This is very common if multiple instances are sending final update # simultaneously. native.LogInfo('%s, breakpoint: %s' % (err, breakpoint['id'])) + except socket.error as err: + if retry_count < self.max_transmit_attempts - 1: + native.LogInfo( + 'Socket error %d while sending breakpoint %s update: %s' % + (err.errno, breakpoint['id'], traceback.format_exc())) + retry_list.append((breakpoint, retry_count + 1)) + else: + native.LogWarning('Breakpoint %s retry count exceeded maximum' % + breakpoint['id']) + # Socket errors shouldn't persist like this; reconnect. + reconnect = True except BaseException: native.LogWarning( 'Fatal error sending breakpoint %s update: %s' % ( From 153fce9cfab1b871581db7f495d579fbaae59c53 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 19 Jun 2019 10:32:24 -0700 Subject: [PATCH 157/241] Release python agent version 2.12 PiperOrigin-RevId: 254023705 Change-Id: Idbced6e7cd329e27e2c4f97946cb8a732c5cd46e --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index ac49d7c..0a1a6ff 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.11' +__version__ = '2.12' From 29a38386482e08e89e6b1b4a3e6804a97bcbc79d Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Tue, 23 Jul 2019 13:58:41 -0700 Subject: [PATCH 158/241] Internal change PiperOrigin-RevId: 259605458 Change-Id: I468180cd0ba5a8d65bcb227dde25081137efc8ee --- src/googleclouddebugger/leaky_bucket.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/googleclouddebugger/leaky_bucket.h b/src/googleclouddebugger/leaky_bucket.h index 4e0d400..3b72ef6 100644 --- a/src/googleclouddebugger/leaky_bucket.h +++ b/src/googleclouddebugger/leaky_bucket.h @@ -65,12 +65,12 @@ class LeakyBucket { // Atomically increment "tokens_". inline int64 AtomicIncrementTokens(int64 increment) { - return tokens_ += increment; + return tokens_.fetch_add(increment, std::memory_order_relaxed) + increment; } // Atomically load the value of "tokens_". inline int64 AtomicLoadTokens() const { - return tokens_; + return tokens_.load(std::memory_order_relaxed); } private: From 89c4d710733118dd4ee80a5c808e1509a7977288 Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Thu, 25 Jul 2019 07:11:17 -0700 Subject: [PATCH 159/241] Add yelouis to cloud debugger Python agent copy.bara.sky file PiperOrigin-RevId: 259940524 Change-Id: Ic60e4d959e9d46aad6b37bf9464a9f6751657bf1 --- src/googleclouddebugger/bytecode_breakpoint.cc | 2 +- src/googleclouddebugger/bytecode_breakpoint.h | 4 ++-- src/googleclouddebugger/bytecode_manipulator.cc | 2 +- src/googleclouddebugger/capture_collector.py | 14 +++++++------- src/googleclouddebugger/conditional_breakpoint.cc | 2 +- src/googleclouddebugger/immutability_tracer.cc | 10 +++++----- src/googleclouddebugger/imphook2.py | 4 ++-- src/googleclouddebugger/python_breakpoint.py | 10 +++++----- src/googleclouddebugger/python_util.cc | 2 +- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/googleclouddebugger/bytecode_breakpoint.cc b/src/googleclouddebugger/bytecode_breakpoint.cc index ed53eab..58447b1 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.cc +++ b/src/googleclouddebugger/bytecode_breakpoint.cc @@ -48,7 +48,7 @@ void BytecodeBreakpoint::Detach() { it->second->breakpoints.clear(); PatchCodeObject(it->second); - // TODO(vlif): assert zombie_refs.empty() after garbage collection + // TODO: assert zombie_refs.empty() after garbage collection // for zombie refs is implemented. delete it->second; diff --git a/src/googleclouddebugger/bytecode_breakpoint.h b/src/googleclouddebugger/bytecode_breakpoint.h index bbc9da9..b3edadf 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.h +++ b/src/googleclouddebugger/bytecode_breakpoint.h @@ -29,7 +29,7 @@ namespace cdbg { // Sets breakpoints in Python code with zero runtime overhead. // BytecodeBreakpoint rewrites Python bytecode to insert a breakpoint. The // implementation is specific to CPython 2.7. -// TODO(vlif): rename to BreakpointsEmulator when the original implementation +// TODO: rename to BreakpointsEmulator when the original implementation // of BreakpointsEmulator goes away. class BytecodeBreakpoint { public: @@ -94,7 +94,7 @@ class BytecodeBreakpoint { // constants. Instead we store these references in a special zombie pool. // Then once we know that no Python thread is executing the code object, // we can release all of them. - // TODO(vlif): implement garbage collection for zombie refs. + // TODO: implement garbage collection for zombie refs. std::vector zombie_refs; // Original value of PyCodeObject::co_stacksize before patching. diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 6bdb09a..ade12c6 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -756,7 +756,7 @@ bool BytecodeManipulator::AppendMethodCall( // block. Unfortunately not all instructions can be moved: // 1. Instructions with relative offset can't be moved forward, because // the offset can't be negative. - // TODO(vlif): FORWARD_JUMP can be replaced with ABSOLUTE_JUMP. + // TODO: FORWARD_JUMP can be replaced with ABSOLUTE_JUMP. // 2. YIELD_VALUE can't be moved because generator object keeps the frame // object in between "yield" calls. If the breakpoint is added or // removed, subsequent calls into the generator will jump into invalid diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index 47cd775..fc79366 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -14,7 +14,7 @@ """Captures application state on a breakpoint hit.""" -# TODO(vlif): rename this file to collector.py. +# TODO: rename this file to collector.py. import copy import datetime @@ -53,7 +53,7 @@ _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) _VECTOR_TYPES = (tuple, list, set) -# TODO(vlif): move to messages.py module. +# TODO: move to messages.py module. EMPTY_DICTIONARY = 'Empty dictionary' EMPTY_COLLECTION = 'Empty collection' OBJECT_HAS_NO_FIELDS = 'Object has no fields' @@ -396,7 +396,7 @@ def CaptureNamedVariable(self, name, value, depth, limits): """ if not hasattr(name, '__dict__'): name = str(name) - else: # TODO(vlif): call str(name) with immutability verifier here. + else: # TODO: call str(name) with immutability verifier here. name = str(id(name)) self._total_size += len(name) @@ -488,8 +488,8 @@ def CaptureVariable(self, value, depth, limits, can_enqueue=True): def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): """Captures a single nameless object into Variable message. - TODO(vlif): safely evaluate iterable types. - TODO(vlif): safely call str(value) + TODO: safely evaluate iterable types. + TODO: safely call str(value) Args: value: data to capture @@ -537,7 +537,7 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): if isinstance(value, types.FunctionType): self._total_size += len(value.__name__) - # TODO(vlif): set value to func_name and type to 'function' + # TODO: set value to func_name and type to 'function' return {'value': 'function ' + value.__name__} if isinstance(value, Exception): @@ -567,7 +567,7 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): 'type': object_type} if not hasattr(value, '__dict__'): - # TODO(vlif): keep "value" empty and populate the "type" field instead. + # TODO: keep "value" empty and populate the "type" field instead. r = str(type(value)) self._total_size += len(r) return {'value': r} diff --git a/src/googleclouddebugger/conditional_breakpoint.cc b/src/googleclouddebugger/conditional_breakpoint.cc index eed062c..ca531cc 100644 --- a/src/googleclouddebugger/conditional_breakpoint.cc +++ b/src/googleclouddebugger/conditional_breakpoint.cc @@ -80,7 +80,7 @@ bool ConditionalBreakpoint::EvaluateCondition(PyFrameObject* frame) { line_count = immutability_tracer.GetLineCount(); } - // TODO(vlif): clear breakpoint if condition evaluation failed due to + // TODO: clear breakpoint if condition evaluation failed due to // mutable code or timeout. auto eval_exception = ClearPythonException(); diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index 1744863..e360a39 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -414,7 +414,7 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case IMPORT_FROM: case SETUP_EXCEPT: case SETUP_FINALLY: - // TODO(xinghuadou): allow changing fields of locally created objects/lists. + // TODO: allow changing fields of locally created objects/lists. case STORE_SUBSCR: case DELETE_SUBSCR: case STORE_NAME: @@ -425,11 +425,11 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case SET_ADD: case MAP_ADD: case STORE_DEREF: - // TODO(xinghuadou): allow exception handling + // TODO: allow exception handling case RAISE_VARARGS: case END_FINALLY: case SETUP_WITH: - // TODO(xinghuadou): allow closures + // TODO: allow closures case LOAD_CLOSURE: #if PY_MAJOR_VERSION >= 3 case GET_AITER: @@ -523,7 +523,7 @@ void ImmutabilityTracer::ProcessCodeRange(const uint8* code_start, void ImmutabilityTracer::ProcessCCall(PyObject* function) { if (PyCFunction_Check(function)) { - // TODO(vlif): the application code can define its own "str" function + // TODO: the application code can define its own "str" function // that will do some evil things. Application can also override builtin // "str" method. If we want to protect against it, we should load pointers // to native functions when debugger initializes (which happens before @@ -553,7 +553,7 @@ void ImmutabilityTracer::ProcessCCall(PyObject* function) { void ImmutabilityTracer::SetMutableCodeException() { - // TODO(vlif): use custom type for this exception. This way we can provide + // TODO: use custom type for this exception. This way we can provide // a more detailed error message. PyErr_SetString( PyExc_SystemError, diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index ba94484..b4cf004 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -68,7 +68,7 @@ def AddImportCallbackBySuffix(path, callback): the callback will be invoked. This function does not validates the existence of such a module and it's the responsibility of the caller. - TODO(erezh): handle module reload. + TODO: handle module reload. Args: path: python module file path. It may be missing the directories for the @@ -418,7 +418,7 @@ def GetModuleFromName(name, path): nonempty_modules = (m for m in modules if m) for module in nonempty_modules: - # TODO(emrekultursay): Write unit test to cover None case. + # TODO: Write unit test to cover None case. mod_file = getattr(module, '__file__', None) if not mod_file: continue diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 0e5869c..a3ed6bb 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -26,7 +26,7 @@ from . import module_search2 from . import module_utils2 -# TODO(vlif): move to messages.py module. +# TODO: move to messages.py module. # Use the following schema to define breakpoint error message constant: # ERROR___ ERROR_LOCATION_FILE_EXTENSION_0 = ( @@ -126,7 +126,7 @@ def _MultipleModulesFoundError(path, candidates): def _NormalizePath(path): """Removes surrounding whitespace, leading separator and normalize.""" - # TODO(emrekultursay): Calling os.path.normpath "may change the meaning of a + # TODO: Calling os.path.normpath "may change the meaning of a # path that contains symbolic links" (e.g., "A/foo/../B" != "A/B" if foo is a # symlink). This might cause trouble when matching against loaded module # paths. We should try to avoid using it. @@ -239,7 +239,7 @@ def GetBreakpointId(self): def GetExpirationTime(self): """Computes the timestamp at which this breakpoint will expire.""" - # TODO(emrekultursay): Move this to a common method. + # TODO: Move this to a common method. if '.' not in self.definition['createTime']: fmt = '%Y-%m-%dT%H:%M:%S%Z' else: @@ -278,7 +278,7 @@ def _ActivateBreakpoint(self, module): if not status: # First two parameters are common: the line of the breakpoint and the # module we are trying to insert the breakpoint in. - # TODO(emrekultursay): Do not display the entire path of the file. Either + # TODO: Do not display the entire path of the file. Either # strip some prefix, or display the path in the breakpoint. params = [str(line), os.path.splitext(module.__file__)[0] + '.py'] @@ -398,7 +398,7 @@ def _BreakpointEvent(self, event, frame): collector = capture_collector.CaptureCollector( self.definition, self.data_visibility_policy) - # TODO(b/69119299): This is a temporary try/except. All exceptions should be + # TODO: This is a temporary try/except. All exceptions should be # caught inside Collect and converted into breakpoint error messages. try: collector.Collect(frame) diff --git a/src/googleclouddebugger/python_util.cc b/src/googleclouddebugger/python_util.cc index d86d8b9..0533ad7 100644 --- a/src/googleclouddebugger/python_util.cc +++ b/src/googleclouddebugger/python_util.cc @@ -181,7 +181,7 @@ Nullable ClearPythonException() { return Nullable(); // return nullptr. } - // TODO(vlif): call str(exception_obj) with a verification of immutability + // TODO: call str(exception_obj) with a verification of immutability // that the object state is not being altered. auto exception_type = reinterpret_cast(exception_obj->ob_type); From 1dae55a099b50fddc975b8a40cbf5f4636a4e5d6 Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Thu, 15 Aug 2019 07:11:35 -0700 Subject: [PATCH 160/241] Python debugger agent logs version upon initialization PiperOrigin-RevId: 263557006 Change-Id: Ib4a2306ca20b4b5188b46640ce46b1a4b99dec78 --- src/googleclouddebugger/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index a30b732..e631edf 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -49,6 +49,8 @@ def _StartDebugger(): global _breakpoints_manager cdbg_native.InitializeModule(_flags) + cdbg_native.LogInfo('Initializing Cloud Debugger Python agent version: %s' % + __version__) _hub_client = gcp_hub_client.GcpHubClient() visibility_policy = _GetVisibilityPolicy() From e87c0077e7ad494548f61dc4375948e76bd886f1 Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Mon, 9 Sep 2019 10:55:40 -0700 Subject: [PATCH 161/241] Handle module file being non-str type in python agent. PiperOrigin-RevId: 268031522 Change-Id: I06557c6191847b2433cde2f49116a4b16ddbcf13 --- src/googleclouddebugger/imphook2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index b4cf004..1aeb89f 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -422,6 +422,8 @@ def GetModuleFromName(name, path): mod_file = getattr(module, '__file__', None) if not mod_file: continue + if not isinstance(mod_file, str): + continue mod_root = os.path.splitext(mod_file)[0] From 3dc4c44f55b455dfbfcfa66cb88a5034046b96d3 Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Mon, 9 Sep 2019 11:19:42 -0700 Subject: [PATCH 162/241] Bump cloud debugger Python agent to version 2.13 PiperOrigin-RevId: 268037333 Change-Id: Ifa5be1fb3a0be1a07dd66a07927c58df68abd30e --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 0a1a6ff..221dfed 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.12' +__version__ = '2.13' From 5a1590ed0efcc08b469db40cf7b4db39b65d20a4 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 11 Sep 2019 10:28:16 -0700 Subject: [PATCH 163/241] Adhere to the style guide for include formatting. PiperOrigin-RevId: 268488337 Change-Id: Ia8ee81617915be312e5e8c52c5d3d804f9b5db80 --- src/googleclouddebugger/bytecode_manipulator.cc | 1 + src/googleclouddebugger/bytecode_manipulator.h | 1 + src/googleclouddebugger/immutability_tracer.h | 1 + src/googleclouddebugger/python_callback.h | 1 + src/googleclouddebugger/python_util.h | 1 + src/googleclouddebugger/rate_limit.h | 1 + 6 files changed, 6 insertions(+) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index ade12c6..e201fe0 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -18,6 +18,7 @@ #include "common.h" #include "bytecode_manipulator.h" + #include namespace devtools { diff --git a/src/googleclouddebugger/bytecode_manipulator.h b/src/googleclouddebugger/bytecode_manipulator.h index 3177bdd..5046506 100644 --- a/src/googleclouddebugger/bytecode_manipulator.h +++ b/src/googleclouddebugger/bytecode_manipulator.h @@ -18,6 +18,7 @@ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_BYTECODE_MANIPULATOR_H_ #include + #include "common.h" namespace devtools { diff --git a/src/googleclouddebugger/immutability_tracer.h b/src/googleclouddebugger/immutability_tracer.h index 49b351f..6f9b9b1 100644 --- a/src/googleclouddebugger/immutability_tracer.h +++ b/src/googleclouddebugger/immutability_tracer.h @@ -18,6 +18,7 @@ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_IMMUTABILITY_TRACER_H_ #include + #include "common.h" #include "python_util.h" diff --git a/src/googleclouddebugger/python_callback.h b/src/googleclouddebugger/python_callback.h index 9c86fb6..2e258f3 100644 --- a/src/googleclouddebugger/python_callback.h +++ b/src/googleclouddebugger/python_callback.h @@ -18,6 +18,7 @@ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYTHON_CALLBACK_H_ #include + #include "common.h" #include "python_util.h" diff --git a/src/googleclouddebugger/python_util.h b/src/googleclouddebugger/python_util.h index f4db534..3c2ca56 100644 --- a/src/googleclouddebugger/python_util.h +++ b/src/googleclouddebugger/python_util.h @@ -19,6 +19,7 @@ #include #include + #include "common.h" #include "nullable.h" diff --git a/src/googleclouddebugger/rate_limit.h b/src/googleclouddebugger/rate_limit.h index c7db0c0..a7cf976 100644 --- a/src/googleclouddebugger/rate_limit.h +++ b/src/googleclouddebugger/rate_limit.h @@ -18,6 +18,7 @@ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_RATE_LIMIT_H_ #include + #include "leaky_bucket.h" #include "common.h" From 0ad6fc71dc6448f735321bc715df55101075ff2d Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Fri, 4 Oct 2019 00:16:53 -0400 Subject: [PATCH 164/241] Internal change PiperOrigin-RevId: 272803181 Change-Id: I427e8044887ab867a905d1db5c6731406709e29d --- src/googleclouddebugger/python_util.cc | 16 ++++++---------- src/googleclouddebugger/python_util.h | 4 ++-- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/googleclouddebugger/python_util.cc b/src/googleclouddebugger/python_util.cc index 0533ad7..9007e44 100644 --- a/src/googleclouddebugger/python_util.cc +++ b/src/googleclouddebugger/python_util.cc @@ -174,18 +174,17 @@ bool RegisterPythonType(PyTypeObject* type) { return true; } - -Nullable ClearPythonException() { +Nullable ClearPythonException() { PyObject* exception_obj = PyErr_Occurred(); if (exception_obj == nullptr) { - return Nullable(); // return nullptr. + return Nullable(); // return nullptr. } // TODO: call str(exception_obj) with a verification of immutability // that the object state is not being altered. auto exception_type = reinterpret_cast(exception_obj->ob_type); - string msg = exception_type->tp_name; + std::string msg = exception_type->tp_name; #ifndef NDEBUG PyErr_Print(); @@ -202,10 +201,9 @@ Nullable ClearPythonException() { PyErr_Clear(); - return Nullable(msg); + return Nullable(msg); } - PyObject* GetDebugletModuleObject(const char* key) { PyObject* module_dict = PyModule_GetDict(GetDebugletModule()); if (module_dict == nullptr) { @@ -222,8 +220,7 @@ PyObject* GetDebugletModuleObject(const char* key) { return object; } - -string CodeObjectDebugString(PyCodeObject* code_object) { +std::string CodeObjectDebugString(PyCodeObject* code_object) { if (code_object == nullptr) { return ""; } @@ -232,7 +229,7 @@ string CodeObjectDebugString(PyCodeObject* code_object) { return ""; } - string str; + std::string str; if ((code_object->co_name != nullptr) && PyBytes_CheckExact(code_object->co_name)) { @@ -253,7 +250,6 @@ string CodeObjectDebugString(PyCodeObject* code_object) { return str; } - std::vector PyBytesToByteArray(PyObject* obj) { DCHECK(PyBytes_CheckExact(obj)); diff --git a/src/googleclouddebugger/python_util.h b/src/googleclouddebugger/python_util.h index 3c2ca56..139d9dc 100644 --- a/src/googleclouddebugger/python_util.h +++ b/src/googleclouddebugger/python_util.h @@ -303,14 +303,14 @@ ScopedPyObject NewNativePythonObject() { // Checks whether the previous call generated an exception. If not, returns // nullptr. Otherwise formats the exception to string. -Nullable ClearPythonException(); +Nullable ClearPythonException(); // Gets Python object from dictionary of a native module. Returns nullptr if not // found. In case of success returns borrowed reference. PyObject* GetDebugletModuleObject(const char* key); // Formats the name and the origin of the code object for logging. -string CodeObjectDebugString(PyCodeObject* code_object); +std::string CodeObjectDebugString(PyCodeObject* code_object); // Reads Python string as a byte array. The function does not verify that // "obj" is of a string type. From 9beb0b423c905343945f4f591955f7d8c76830b0 Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Thu, 7 Nov 2019 14:03:50 -0500 Subject: [PATCH 165/241] Internal change PiperOrigin-RevId: 279123936 Change-Id: Ibb8550f7a8b4e516820b4c53d8ee22a1bbc5dc44 --- src/googleclouddebugger/leaky_bucket.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/googleclouddebugger/leaky_bucket.cc b/src/googleclouddebugger/leaky_bucket.cc index 83caa68..b574632 100644 --- a/src/googleclouddebugger/leaky_bucket.cc +++ b/src/googleclouddebugger/leaky_bucket.cc @@ -19,6 +19,9 @@ #include "leaky_bucket.h" +#include "third_party/absl/time/clock.h" +#include "third_party/absl/time/time.h" + #ifndef NACL_BUILD #include #include From 4ccc9bbf6bcb37be4cb5830b98ede3e99953a8f6 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 8 Nov 2019 16:00:03 -0500 Subject: [PATCH 166/241] Rolling back build breaking change PiperOrigin-RevId: 279373478 Change-Id: I135ea47b784d357c26b72563f3ba3b15f1edaea8 --- src/googleclouddebugger/leaky_bucket.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/googleclouddebugger/leaky_bucket.cc b/src/googleclouddebugger/leaky_bucket.cc index b574632..83caa68 100644 --- a/src/googleclouddebugger/leaky_bucket.cc +++ b/src/googleclouddebugger/leaky_bucket.cc @@ -19,9 +19,6 @@ #include "leaky_bucket.h" -#include "third_party/absl/time/clock.h" -#include "third_party/absl/time/time.h" - #ifndef NACL_BUILD #include #include From a4ee1e54d12183bfb39c2b9bc9b0d371a2f56a9c Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 22 Nov 2019 10:42:37 -0800 Subject: [PATCH 167/241] sort and group the #include directives PiperOrigin-RevId: 281997998 Change-Id: Ifb0380ab3ceb6bd76ff09b73914d95b412a4640a --- src/googleclouddebugger/bytecode_breakpoint.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/bytecode_breakpoint.h b/src/googleclouddebugger/bytecode_breakpoint.h index b3edadf..f7ecccf 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.h +++ b/src/googleclouddebugger/bytecode_breakpoint.h @@ -18,8 +18,9 @@ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_BYTECODE_BREAKPOINT_H_ #include -#include #include +#include + #include "common.h" #include "python_util.h" From 19b1147bb5759f7ac2a47449041ec83f183141db Mon Sep 17 00:00:00 2001 From: Jason Borg Date: Thu, 28 Nov 2019 14:00:31 -0800 Subject: [PATCH 168/241] Add support for Python 3.8 opcodes. PiperOrigin-RevId: 282973931 Change-Id: Id021919aa0bc75fc7cb956d582e41ebdf026845d --- .../bytecode_manipulator.cc | 9 +++++++ .../immutability_tracer.cc | 25 ++++++++++++++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index e201fe0..e5d9ebb 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -126,10 +126,16 @@ static PythonOpcodeType GetOpcodeType(uint8 opcode) { case FOR_ITER: case JUMP_FORWARD: +#if PY_VERSION_HEX < 0x03080000 + // Removed in Python 3.8. case SETUP_LOOP: case SETUP_EXCEPT: +#endif case SETUP_FINALLY: case SETUP_WITH: +#if PY_VERSION_HEX >= 0x03080000 + case CALL_FINALLY: +#endif return BRANCH_DELTA_OPCODE; case JUMP_IF_FALSE_OR_POP: @@ -137,7 +143,10 @@ static PythonOpcodeType GetOpcodeType(uint8 opcode) { case JUMP_ABSOLUTE: case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: +#if PY_VERSION_HEX < 0x03080000 + // Removed in Python 3.8. case CONTINUE_LOOP: +#endif return BRANCH_ABSOLUTE_OPCODE; default: diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index e360a39..4af1863 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -331,7 +331,6 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case INPLACE_AND: case INPLACE_XOR: case INPLACE_OR: - case BREAK_LOOP: case RETURN_VALUE: case YIELD_VALUE: case POP_BLOCK: @@ -351,8 +350,6 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case POP_JUMP_IF_TRUE: case POP_JUMP_IF_FALSE: case LOAD_GLOBAL: - case CONTINUE_LOOP: - case SETUP_LOOP: case LOAD_FAST: case STORE_FAST: case DELETE_FAST: @@ -362,6 +359,12 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case LOAD_DEREF: case CALL_FUNCTION_KW: case EXTENDED_ARG: +#if PY_VERSION_HEX < 0x03080000 + // These were all removed in Python 3.8. + case BREAK_LOOP: + case CONTINUE_LOOP: + case SETUP_LOOP: +#endif #if PY_MAJOR_VERSION >= 3 case DUP_TOP_TWO: case BINARY_MATRIX_MULTIPLY: @@ -385,6 +388,10 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case LOAD_METHOD: case CALL_METHOD: #endif +#if PY_VERSION_HEX >= 0x03080000 + // Added back in Python 3.8 (was in 2.7 as well) + case ROT_FOUR: +#endif #else case ROT_FOUR: case DUP_TOPX: @@ -412,7 +419,6 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case IMPORT_STAR: case IMPORT_NAME: case IMPORT_FROM: - case SETUP_EXCEPT: case SETUP_FINALLY: // TODO: allow changing fields of locally created objects/lists. case STORE_SUBSCR: @@ -431,6 +437,10 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { case SETUP_WITH: // TODO: allow closures case LOAD_CLOSURE: +#if PY_VERSION_HEX < 0x03080000 + // Removed in Python 3.8. + case SETUP_EXCEPT: +#endif #if PY_MAJOR_VERSION >= 3 case GET_AITER: case GET_ANEXT: @@ -447,6 +457,13 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { #endif case DELETE_DEREF: case SETUP_ASYNC_WITH: +#if PY_VERSION_HEX >= 0x03080000 + // Added in Python 3.8. + case BEGIN_FINALLY: + case END_ASYNC_FOR: + case CALL_FINALLY: + case POP_FINALLY: +#endif #else case STORE_SLICE+0: case STORE_SLICE+1: From b97ce3f0ec89c8cc66d9710198dc8588a3938de4 Mon Sep 17 00:00:00 2001 From: Jason Borg Date: Mon, 2 Dec 2019 11:26:43 -0800 Subject: [PATCH 169/241] Document Python 3.8 support. PiperOrigin-RevId: 283380219 Change-Id: I5a8b88706fa53343525a39e4625ce4a7f91f1ca2 --- README.md | 19 ++++++++++--------- src/setup.py | 1 + 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 4b67011..3b2f062 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Python Cloud Debugger Agent -Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7 and -3.7 (experimental support) +Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7, and +with experimental support for Python 3.6, Python 3.7 and Python 3.8. ## Overview @@ -27,8 +27,8 @@ tested on Debian Linux, but it should work on other distributions as well. Cloud Debugger consists of 3 primary components: -1. The Python debugger agent (this repo implements one for CPython 2.7, and an - experimental one for CPython 3.7). +1. The Python debugger agent (this repo implements one for CPython 2.7, and + experimental ones for CPython 3.6, 3.7 and 3.8). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the APIs using [APIs Explorer](https://developers.google.com/apis-explorer/#p/clouddebugger/v2/). @@ -82,12 +82,13 @@ sudo apt-get -y -q --no-install-recommends install \ ### Python 3 -There is experimental support for Python 3.6 and Python 3.7. Python 3.0 to 3.5 -are not supported, and newer versions have not been tested. +There is experimental support for Python 3.6, Python 3.7 and Python 3.8. Python +3.0 to 3.5 are not supported, and newer versions have not been tested. -To build, the `python3.7` and `python3.7-dev` packages are additionally needed. -If Python 3.7 is not the default version of the 'python' command on your system, -run the build script as `PYTHON=python3.7 ./build.sh`. +To build for Python 3.x (x in [6-8]), the `python3.x` and `python3.x-dev` +packages are additionally needed. If Python 3.x is not the default version of +the 'python' command on your system, run the build script as `PYTHON=python3.x +./build.sh`. ### Alpine Linux diff --git a/src/setup.py b/src/setup.py index 0564757..46a229f 100644 --- a/src/setup.py +++ b/src/setup.py @@ -121,6 +121,7 @@ def ReadConfig(section, value, default): 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', ]) From b635cebc58690dc11b34564660f47fa6f03f44cb Mon Sep 17 00:00:00 2001 From: Jason Borg Date: Mon, 2 Dec 2019 11:44:37 -0800 Subject: [PATCH 170/241] Release python agent version 2.14, with Python 3.8 support. PiperOrigin-RevId: 283383931 Change-Id: I230ea2382c535b2c07e17c5eccafc9088d85f181 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 221dfed..6700d1f 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.13' +__version__ = '2.14' From ecd3dad7548aca87e6497e66db5bdfdb2c37d2e2 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 3 Dec 2019 11:39:55 -0800 Subject: [PATCH 171/241] Deprecate NACL_BUILD PiperOrigin-RevId: 283588727 Change-Id: I144c0bfacc6f5ecfab822fa561c654489c75fd17 --- src/googleclouddebugger/leaky_bucket.cc | 8 -------- src/googleclouddebugger/native_module.cc | 3 ++- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/googleclouddebugger/leaky_bucket.cc b/src/googleclouddebugger/leaky_bucket.cc index 83caa68..2495084 100644 --- a/src/googleclouddebugger/leaky_bucket.cc +++ b/src/googleclouddebugger/leaky_bucket.cc @@ -19,12 +19,8 @@ #include "leaky_bucket.h" -#ifndef NACL_BUILD #include #include -#else // NACL_BUILD -#include "third_party/apphosting/nacl/chromium/base/time.h" -#endif // NACL_BUILD #include #include @@ -33,13 +29,9 @@ namespace devtools { namespace cdbg { static int64 NowInNanoseconds() { -#ifndef NACL_BUILD timespec time; clock_gettime(CLOCK_MONOTONIC, &time); return 1000000000LL * time.tv_sec + time.tv_nsec; -#else // NACL_BUILD - return (base::Time::Now() - base::Time::UnixEpoch()).InMicroseconds() * 1000; -#endif // NACL_BUILD } diff --git a/src/googleclouddebugger/native_module.cc b/src/googleclouddebugger/native_module.cc index 86eaf4b..162854c 100644 --- a/src/googleclouddebugger/native_module.cc +++ b/src/googleclouddebugger/native_module.cc @@ -17,11 +17,12 @@ // Ensure that Python.h is included before any other header. #include "common.h" +#include "native_module.h" + #include "bytecode_breakpoint.h" #include "common.h" #include "conditional_breakpoint.h" #include "immutability_tracer.h" -#include "native_module.h" #include "python_callback.h" #include "python_util.h" #include "rate_limit.h" From e24f0741d8ad57d4e2a326093b1da65658ae3560 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 16 Dec 2019 14:23:11 -0800 Subject: [PATCH 172/241] Internal change PiperOrigin-RevId: 285849834 Change-Id: Ifa640fa77e94480afeaf2053455c4b4d1a66c653 --- src/googleclouddebugger/common.h | 1 + src/googleclouddebugger/leaky_bucket.cc | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/googleclouddebugger/common.h b/src/googleclouddebugger/common.h index 75889d2..6d98b81 100644 --- a/src/googleclouddebugger/common.h +++ b/src/googleclouddebugger/common.h @@ -29,6 +29,7 @@ #include #include +#include #include #include "glog/logging.h" diff --git a/src/googleclouddebugger/leaky_bucket.cc b/src/googleclouddebugger/leaky_bucket.cc index 2495084..a4fd7cd 100644 --- a/src/googleclouddebugger/leaky_bucket.cc +++ b/src/googleclouddebugger/leaky_bucket.cc @@ -19,9 +19,6 @@ #include "leaky_bucket.h" -#include -#include - #include #include From 2bd8d582398b4db659f5b1c289985c14859bd758 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 23 Apr 2020 08:12:57 -0700 Subject: [PATCH 173/241] Migrate from apiclient to googleapiclient. PiperOrigin-RevId: 308053810 Change-Id: If6c6fe702c36b38f031a7dbe5ff386620d883a99 --- src/googleclouddebugger/gcp_hub_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 7316669..d05fb8f 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -30,9 +30,9 @@ -import apiclient -import apiclient.discovery import google_auth_httplib2 +import googleapiclient +import googleapiclient.discovery import httplib2 import six @@ -129,7 +129,7 @@ def filter(self, record): return False return True self._log_filter = _ChildLogFilter({logging.INFO}) - apiclient.discovery.logger.addFilter(self._log_filter) + googleapiclient.discovery.logger.addFilter(self._log_filter) # # Configuration options (constants only modified by unit test) @@ -265,7 +265,7 @@ def _BuildService(self): http = httplib2.Http(timeout=_HTTP_TIMEOUT_SECONDS) http = google_auth_httplib2.AuthorizedHttp(self._credentials, http) - api = apiclient.discovery.build( + api = googleapiclient.discovery.build( 'clouddebugger', 'v2', http=http, cache_discovery=False) return api.controller() @@ -414,7 +414,7 @@ def _TransmitBreakpointUpdates(self, service): native.LogInfo('Breakpoint %s update transmitted successfully' % ( breakpoint['id'])) - except apiclient.errors.HttpError as err: + except googleapiclient.errors.HttpError as err: # Treat 400 error codes (except timeout) as application error that will # not be retried. All other errors are assumed to be transient. status = err.resp.status From b539920fb22786650c18cd1735cff1142ac56f71 Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Thu, 30 Apr 2020 07:30:25 -0700 Subject: [PATCH 174/241] Change console.developers.google.com/debug to console.cloud.google.com/debug PiperOrigin-RevId: 309221795 Change-Id: Ib0bdbe432575232dedd1267590e06e6401638505 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3b2f062..fa0ef7e 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Cloud Debugger consists of 3 primary components: 3. User interface, including a command line interface [`gcloud debug`](https://cloud.google.com/sdk/gcloud/reference/debug/) and a Web interface on - [Google Cloud Console](https://console.developers.google.com/debug/). See + [Google Cloud Console](https://console.cloud.google.com/debug/). See the [online help](https://cloud.google.com/debugger/docs/debugging) on how to use Google Cloud Console Debug page. From 70dd605547fdcace80d1089eb3444d01d238ed17 Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Thu, 30 Apr 2020 08:43:17 -0700 Subject: [PATCH 175/241] Update GCP documentation links PiperOrigin-RevId: 309232456 Change-Id: I63cab07e7d7367617232b1e757afb32bd0cb3c48 --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index fa0ef7e..740e60f 100644 --- a/README.md +++ b/README.md @@ -31,12 +31,12 @@ Cloud Debugger consists of 3 primary components: experimental ones for CPython 3.6, 3.7 and 3.8). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the APIs using - [APIs Explorer](https://developers.google.com/apis-explorer/#p/clouddebugger/v2/). + [APIs Explorer](https://cloud.google.com/debugger/api/reference/rest/). 3. User interface, including a command line interface [`gcloud debug`](https://cloud.google.com/sdk/gcloud/reference/debug/) and a Web interface on - [Google Cloud Console](https://console.cloud.google.com/debug/). See - the [online help](https://cloud.google.com/debugger/docs/debugging) on how + [Google Cloud Console](https://console.cloud.google.com/debug/). See the + [online help](https://cloud.google.com/debugger/docs/using/snapshots) on how to use Google Cloud Console Debug page. ## Getting Help From d3bf11dba4fcbc218f6b32449d2d22028966a7df Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Thu, 7 May 2020 08:59:17 -0700 Subject: [PATCH 176/241] Fix version number of google-auth for Python 2.7 PiperOrigin-RevId: 310369571 Change-Id: Ic2677ac85e739b1839e05660a9533334d030f431 --- src/setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/setup.py b/src/setup.py index 46a229f..0379fa2 100644 --- a/src/setup.py +++ b/src/setup.py @@ -23,6 +23,7 @@ from glob import glob import os import re +import sys from distutils import sysconfig from setuptools import Extension from setuptools import setup @@ -108,7 +109,8 @@ def ReadConfig(section, value, default): version=version, install_requires=[ 'google-api-python-client', - 'google-auth>=1.0.0', + 'google-auth==1.8.2' + if sys.version_info.major < 3 else 'google-auth>=1.0.0', 'google-auth-httplib2', 'pyyaml', 'six>=1.10.0', From a0e233077afe486ab852651ebc750ae75aae7e80 Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Tue, 9 Jun 2020 14:20:56 -0700 Subject: [PATCH 177/241] Supports breakpoint canary in GCP The breakpoint canary feature is controlled using the new 'breakpoint_enable_canary' and 'breakpoint_allow_canary_override' parameters when calling enable() or the same-name flags when run as a module. PiperOrigin-RevId: 315558797 Change-Id: I13c7a2077055ecfb6e8c28c726a8315b60ce505e --- README.md | 6 ++++ src/googleclouddebugger/__init__.py | 3 ++ src/googleclouddebugger/gcp_hub_client.py | 38 +++++++++++++++++++++-- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 740e60f..d4afe57 100644 --- a/README.md +++ b/README.md @@ -271,3 +271,9 @@ Default Credentials](https://cloud.google.com/docs/authentication/production) which are automatically available on machines hosted on GCP, or can be set via `gcloud auth application-default login` or the `GOOGLE_APPLICATION_CREDENTIALS` environment variable. + +`breakpoint_enable_canary`: Whether to enable the +[breakpoint canary feature](https://cloud.google.com/debugger/docs/using/snapshots#with_canarying). +It expects a boolean value (`True`/`False`) or a string, with `'True'` +interpreted as `True` and any other string interpreted as `False`). If not +provided, the breakpoint canarying will not be enabled. diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index e631edf..f9364eb 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -72,6 +72,9 @@ def _StartDebugger(): _flags.get('project_id'), _flags.get('project_number'), _flags.get('service_account_json_file')) + _hub_client.SetupCanaryMode( + _flags.get('breakpoint_enable_canary'), + _flags.get('breakpoint_allow_canary_override')) _hub_client.InitializeDebuggeeLabels(_flags) _hub_client.Start() diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index d05fb8f..5b95367 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -75,6 +75,15 @@ # of 40 seconds. _HTTP_TIMEOUT_SECONDS = 100 +# The map from the values of flags (breakpoint_enable_canary, +# breakpoint_allow_canary_override) to canary mode. +_CANARY_MODE_MAP = { + (True, True): 'CANARY_MODE_DEFAULT_ENABLED', + (True, False): 'CANARY_MODE_ALWAYS_ENABLED', + (False, True): 'CANARY_MODE_DEFAULT_DISABLED', + (False, False): 'CANARY_MODE_ALWAYS_DISABLED', +} + class NoProjectIdError(Exception): """Used to indicate the project id cannot be determined.""" @@ -101,6 +110,8 @@ def __init__(self): self._debuggee_labels = {} self._service_account_auth = False self._debuggee_id = None + self._agent_id = None + self._canary_mode = None self._wait_token = 'init' self._breakpoints = [] self._main_thread = None @@ -218,6 +229,21 @@ def SetupAuth(self, self._project_id = project_id self._project_number = project_number or project_id + def SetupCanaryMode(self, breakpoint_enable_canary, + breakpoint_allow_canary_override): + """Sets up canaryMode for the debuggee according to input parameters. + + Args: + breakpoint_enable_canary: str or bool, whether to enable breakpoint + canary. Any string except 'True' is interpreted as False. + breakpoint_allow_canary_override: str or bool, whether to allow the + individually set breakpoint to override the canary behavior. Any + string except 'True' is interpreted as False. + """ + enable_canary = breakpoint_enable_canary in ('True', True) + allow_canary_override = breakpoint_allow_canary_override in ('True', True) + self._canary_mode = _CANARY_MODE_MAP[enable_canary, allow_canary_override] + def Start(self): """Starts the worker thread.""" self._shutdown = False @@ -327,8 +353,11 @@ def _RegisterDebuggee(self, service): self._project_number = project_number or self._project_number self._debuggee_id = response['debuggee']['id'] - native.LogInfo('Debuggee registered successfully, ID: %s' % ( - self._debuggee_id)) + self._agent_id = response['agentId'] + native.LogInfo( + 'Debuggee registered successfully, ID: %s, agent ID: %s, ' + 'canary mode: %s' % (self._debuggee_id, self._agent_id, + response['debuggee'].get('canaryMode'))) self.register_backoff.Succeeded() return (False, 0) # Proceed immediately to list active breakpoints. except BaseException: @@ -355,7 +384,9 @@ def _ListActiveBreakpoints(self, service): """ try: response = service.debuggees().breakpoints().list( - debuggeeId=self._debuggee_id, waitToken=self._wait_token, + debuggeeId=self._debuggee_id, + agentId=self._agent_id, + waitToken=self._wait_token, successOnTimeout=True).execute() if not response.get('waitExpired'): self._wait_token = response.get('nextWaitToken') @@ -469,6 +500,7 @@ def _GetDebuggee(self): 'description': self._GetDebuggeeDescription(), 'labels': self._debuggee_labels, 'agentVersion': agent_version, + 'canaryMode': self._canary_mode, } source_context = self._ReadAppJsonFile('source-context.json') From 93aad83f61e8e41a870239e15134d555254079db Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Wed, 10 Jun 2020 09:55:08 -0700 Subject: [PATCH 178/241] Pin google-api-python-client version for agent dependencies in Python2 PiperOrigin-RevId: 315709983 Change-Id: Ia1e2527dcab204003015575843a5e7da6d4cac98 --- src/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/setup.py b/src/setup.py index 0379fa2..100454b 100644 --- a/src/setup.py +++ b/src/setup.py @@ -108,7 +108,8 @@ def ReadConfig(section, value, default): author='Google Inc.', version=version, install_requires=[ - 'google-api-python-client', + 'google-api-python-client==1.8.4' + if sys.version_info.major < 3 else 'google-api-python-client', 'google-auth==1.8.2' if sys.version_info.major < 3 else 'google-auth>=1.0.0', 'google-auth-httplib2', From b34d5eeb684e1aab9c07343a6b2deadb4599b62f Mon Sep 17 00:00:00 2001 From: Louis Ye Date: Thu, 11 Jun 2020 10:14:23 -0700 Subject: [PATCH 179/241] Bump Python agent version to 2.15 PiperOrigin-RevId: 315923520 Change-Id: Ic9004306704e1fb07d89d05a1312346f2a56a527 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 6700d1f..da8e00f 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.14' +__version__ = '2.15' From 9ab1853605ec972a91d129b488b184e61ac0edb4 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 13 Oct 2020 06:48:15 -0700 Subject: [PATCH 180/241] Remove references to experimental support for Python 3. Python 3.6-3.8 is fully supported. PiperOrigin-RevId: 336866588 Change-Id: I3361b2c07f85c211fb28b0537aa39d9a58e2c4a2 --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d4afe57..29679ae 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Python Cloud Debugger Agent -Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7, and -with experimental support for Python 3.6, Python 3.7 and Python 3.8. +Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7, +Python 3.6, Python 3.7 and Python 3.8. ## Overview @@ -27,8 +27,8 @@ tested on Debian Linux, but it should work on other distributions as well. Cloud Debugger consists of 3 primary components: -1. The Python debugger agent (this repo implements one for CPython 2.7, and - experimental ones for CPython 3.6, 3.7 and 3.8). +1. The Python debugger agent (this repo implements one for CPython 2.7, 3.6, + 3.7 and 3.8). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the APIs using [APIs Explorer](https://cloud.google.com/debugger/api/reference/rest/). @@ -82,8 +82,8 @@ sudo apt-get -y -q --no-install-recommends install \ ### Python 3 -There is experimental support for Python 3.6, Python 3.7 and Python 3.8. Python -3.0 to 3.5 are not supported, and newer versions have not been tested. +There is support for Python 3.6, Python 3.7 and Python 3.8. Python 3.0 to 3.5 +are not supported, and newer versions have not been tested. To build for Python 3.x (x in [6-8]), the `python3.x` and `python3.x-dev` packages are additionally needed. If Python 3.x is not the default version of From 1638b9fe309b0b1000f719d5978691e74eb7f755 Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Thu, 4 Feb 2021 10:05:24 -0800 Subject: [PATCH 181/241] Migrate all flags under devtools/cdbg/debuglets/python to Abseil Flags and add portability workaround for the code to work in open source. PiperOrigin-RevId: 355651987 Change-Id: Ic2427ecfeab87eaa9dfb2a03a4f01889caa0557b --- src/googleclouddebugger/common.h | 30 +++++++++++++ .../immutability_tracer.cc | 8 ++-- src/googleclouddebugger/rate_limit.cc | 44 +++++++++---------- 3 files changed, 55 insertions(+), 27 deletions(-) diff --git a/src/googleclouddebugger/common.h b/src/googleclouddebugger/common.h index 6d98b81..59d5255 100644 --- a/src/googleclouddebugger/common.h +++ b/src/googleclouddebugger/common.h @@ -59,6 +59,36 @@ using google::LogSeverity; using google::AddLogSink; using google::RemoveLogSink; +// The open source build uses gflags, which uses the traditional (v1) flags APIs +// to define/declare/access command line flags. The internal build has upgraded +// to use v2 flags API (DEFINE_FLAG/DECLARE_FLAG/GetFlag/SetFlag), which is not +// supported by gflags yet (and absl is not released to open source yet). +// Here, we use simple, dummy v2 flags wrappers around v1 flags implementation. +// This allows us to use the same flags APIs both internally and externally. + +#define ABSL_FLAG(type, name, default_value, help) \ + DEFINE_##type(name, default_value, help) + +#define ABSL_DECLARE_FLAG(type, name) DECLARE_##type(name) + +namespace absl { +// Return the value of an old-style flag. Not thread-safe. +inline bool GetFlag(bool flag) { return flag; } +inline int32 GetFlag(int32 flag) { return flag; } +inline int64 GetFlag(int64 flag) { return flag; } +inline uint64 GetFlag(uint64 flag) { return flag; } +inline double GetFlag(double flag) { return flag; } +inline string GetFlag(const string& flag) { return flag; } + +// Change the value of an old-style flag. Not thread-safe. +inline void SetFlag(bool* f, bool v) { *f = v; } +inline void SetFlag(int32* f, int32 v) { *f = v; } +inline void SetFlag(int64* f, int64 v) { *f = v; } +inline void SetFlag(uint64* f, uint64 v) { *f = v; } +inline void SetFlag(double* f, double v) { *f = v; } +inline void SetFlag(string* f, const string& v) { *f = v; } +} // namespace absl + // Python 3 compatibility #if PY_MAJOR_VERSION >= 3 // Python 2 has both an 'int' and a 'long' type, and Python 3 only as an 'int' diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index 4af1863..8f7f14a 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -21,10 +21,8 @@ #include "python_util.h" -DEFINE_int32( - max_expression_lines, - 10000, - "maximum number of Python lines to allow in a single expression"); +ABSL_FLAG(int32, max_expression_lines, 10000, + "maximum number of Python lines to allow in a single expression"); namespace devtools { namespace cdbg { @@ -192,7 +190,7 @@ int ImmutabilityTracer::OnTraceCallbackInternal( break; } - if (line_count_ > FLAGS_max_expression_lines) { + if (line_count_ > absl::GetFlag(FLAGS_max_expression_lines)) { LOG(INFO) << "Expression evaluation exceeded quota"; mutable_code_detected_ = true; } diff --git a/src/googleclouddebugger/rate_limit.cc b/src/googleclouddebugger/rate_limit.cc index 20d7bb1..4fbb72c 100644 --- a/src/googleclouddebugger/rate_limit.cc +++ b/src/googleclouddebugger/rate_limit.cc @@ -19,22 +19,20 @@ #include "rate_limit.h" -DEFINE_int32( - max_condition_lines_rate, - 5000, +ABSL_FLAG( + int32, max_condition_lines_rate, 5000, "maximum number of Python lines/sec to spend on condition evaluation"); -DEFINE_int32( - max_dynamic_log_rate, +ABSL_FLAG( + int32, max_dynamic_log_rate, 50, // maximum of 50 log entries per second on average "maximum rate of dynamic log entries in this process; short bursts are " "allowed to exceed this limit"); -DEFINE_int32( - max_dynamic_log_bytes_rate, - 20480, // maximum of 20K bytes per second on average - "maximum rate of dynamic log bytes in this process; short bursts are " - "allowed to exceed this limit"); +ABSL_FLAG(int32, max_dynamic_log_bytes_rate, + 20480, // maximum of 20K bytes per second on average + "maximum rate of dynamic log bytes in this process; short bursts are " + "allowed to exceed this limit"); namespace devtools { namespace cdbg { @@ -60,22 +58,24 @@ static std::unique_ptr g_global_dynamic_log_bytes_quota; static int64 GetBaseConditionQuotaCapacity() { - return FLAGS_max_condition_lines_rate * kConditionCostCapacityFactor; + return absl::GetFlag(FLAGS_max_condition_lines_rate) * + kConditionCostCapacityFactor; } void LazyInitializeRateLimit() { if (g_global_condition_quota == nullptr) { - g_global_condition_quota.reset(new LeakyBucket( - GetBaseConditionQuotaCapacity(), - FLAGS_max_condition_lines_rate)); + g_global_condition_quota.reset( + new LeakyBucket(GetBaseConditionQuotaCapacity(), + absl::GetFlag(FLAGS_max_condition_lines_rate))); g_global_dynamic_log_quota.reset(new LeakyBucket( - FLAGS_max_dynamic_log_rate * kDynamicLogCapacityFactor, - FLAGS_max_dynamic_log_rate)); + absl::GetFlag(FLAGS_max_dynamic_log_rate) * kDynamicLogCapacityFactor, + absl::GetFlag(FLAGS_max_dynamic_log_rate))); - g_global_dynamic_log_bytes_quota.reset(new LeakyBucket( - FLAGS_max_dynamic_log_bytes_rate * kDynamicLogBytesCapacityFactor, - FLAGS_max_dynamic_log_bytes_rate)); + g_global_dynamic_log_bytes_quota.reset( + new LeakyBucket(absl::GetFlag(FLAGS_max_dynamic_log_bytes_rate) * + kDynamicLogBytesCapacityFactor, + absl::GetFlag(FLAGS_max_dynamic_log_bytes_rate))); } } @@ -100,9 +100,9 @@ LeakyBucket* GetGlobalDynamicLogBytesQuota() { } std::unique_ptr CreatePerBreakpointConditionQuota() { - return std::unique_ptr(new LeakyBucket( - GetBaseConditionQuotaCapacity() / 2, - FLAGS_max_condition_lines_rate / 2)); + return std::unique_ptr( + new LeakyBucket(GetBaseConditionQuotaCapacity() / 2, + absl::GetFlag(FLAGS_max_condition_lines_rate) / 2)); } } // namespace cdbg From 22a479acb19595a0d45ae9ac3e9f30ceb9f020da Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 5 Feb 2021 12:02:47 -0800 Subject: [PATCH 182/241] Add direct dependency on google-api-core. PiperOrigin-RevId: 355899018 Change-Id: Ibd876f085180604e1a469f47f3fe763e739b29c8 --- src/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/setup.py b/src/setup.py index 100454b..bef4a37 100644 --- a/src/setup.py +++ b/src/setup.py @@ -113,6 +113,8 @@ def ReadConfig(section, value, default): 'google-auth==1.8.2' if sys.version_info.major < 3 else 'google-auth>=1.0.0', 'google-auth-httplib2', + 'google-api-core==1.15.0' + if sys.version_info.major < 3 else 'google-api-core', 'pyyaml', 'six>=1.10.0', ], From cbd4c609a52af8343837f5237ddb6955b992d933 Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Thu, 25 Feb 2021 19:26:15 -0800 Subject: [PATCH 183/241] Internal change PiperOrigin-RevId: 359668609 Change-Id: Idff920d096ce536efc0684c53b86f83a0b650610 --- src/googleclouddebugger/leaky_bucket.cc | 36 +++++++++++-------------- src/googleclouddebugger/leaky_bucket.h | 27 ++++++++++--------- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/src/googleclouddebugger/leaky_bucket.cc b/src/googleclouddebugger/leaky_bucket.cc index a4fd7cd..aa18ef0 100644 --- a/src/googleclouddebugger/leaky_bucket.cc +++ b/src/googleclouddebugger/leaky_bucket.cc @@ -20,19 +20,19 @@ #include "leaky_bucket.h" #include +#include #include namespace devtools { namespace cdbg { -static int64 NowInNanoseconds() { +static int64_t NowInNanoseconds() { timespec time; clock_gettime(CLOCK_MONOTONIC, &time); return 1000000000LL * time.tv_sec + time.tv_nsec; } - -LeakyBucket::LeakyBucket(int64 capacity, int64 fill_rate) +LeakyBucket::LeakyBucket(int64_t capacity, int64_t fill_rate) : capacity_(capacity), fractional_tokens_(0.0), fill_rate_(fill_rate), @@ -40,20 +40,19 @@ LeakyBucket::LeakyBucket(int64 capacity, int64 fill_rate) tokens_ = capacity; } - -bool LeakyBucket::RequestTokensSlow(int64 requested_tokens) { +bool LeakyBucket::RequestTokensSlow(int64_t requested_tokens) { // Getting the time outside the lock is significantly faster (reduces // contention, etc.). - const int64 current_time_ns = NowInNanoseconds(); + const int64_t current_time_ns = NowInNanoseconds(); std::lock_guard lock(mu_); - const int64 cur_tokens = AtomicLoadTokens(); + const int64_t cur_tokens = AtomicLoadTokens(); if (cur_tokens >= 0) { return true; } - const int64 available_tokens = + const int64_t available_tokens = RefillBucket(requested_tokens + cur_tokens, current_time_ns); if (available_tokens >= 0) { return true; @@ -66,17 +65,15 @@ bool LeakyBucket::RequestTokensSlow(int64 requested_tokens) { return false; } - -int64 LeakyBucket::RefillBucket( - int64 available_tokens, - int64 current_time_ns) { +int64_t LeakyBucket::RefillBucket(int64_t available_tokens, + int64_t current_time_ns) { if (current_time_ns <= fill_time_ns_) { // We check to see if the bucket has been refilled after we checked the // current time but before we grabbed mu_. If it has there's nothing to do. return AtomicLoadTokens(); } - const int64 elapsed_ns = current_time_ns - fill_time_ns_; + const int64_t elapsed_ns = current_time_ns - fill_time_ns_; fill_time_ns_ = current_time_ns; // Calculate the number of tokens we can add. Note elapsed is in ns while @@ -85,10 +82,10 @@ int64 LeakyBucket::RefillBucket( // don't add more than the capacity of leaky bucket. fractional_tokens_ += std::min(elapsed_ns * (fill_rate_ / 1e9), static_cast(capacity_)); - const int64 ideal_tokens_to_add = fractional_tokens_; + const int64_t ideal_tokens_to_add = fractional_tokens_; - const int64 max_tokens_to_add = capacity_ - available_tokens; - int64 real_tokens_to_add; + const int64_t max_tokens_to_add = capacity_ - available_tokens; + int64_t real_tokens_to_add; if (max_tokens_to_add < ideal_tokens_to_add) { fractional_tokens_ = 0.0; real_tokens_to_add = max_tokens_to_add; @@ -100,16 +97,15 @@ int64 LeakyBucket::RefillBucket( return AtomicIncrementTokens(real_tokens_to_add); } - -void LeakyBucket::TakeTokens(int64 tokens) { - const int64 remaining = AtomicIncrementTokens(-tokens); +void LeakyBucket::TakeTokens(int64_t tokens) { + const int64_t remaining = AtomicIncrementTokens(-tokens); if (remaining < 0) { // (Try to) refill the bucket. If we don't do this, we could just // keep decreasing forever without refilling. We need to be // refilling at least as frequently as every capacity_ / // fill_rate_ seconds. Otherwise, we waste tokens. - const int64 current_time_ns = NowInNanoseconds(); + const int64_t current_time_ns = NowInNanoseconds(); std::lock_guard lock(mu_); RefillBucket(remaining, current_time_ns); diff --git a/src/googleclouddebugger/leaky_bucket.h b/src/googleclouddebugger/leaky_bucket.h index 3b72ef6..4dd8d27 100644 --- a/src/googleclouddebugger/leaky_bucket.h +++ b/src/googleclouddebugger/leaky_bucket.h @@ -18,6 +18,7 @@ #define DEVTOOLS_CDBG_COMMON_LEAKY_BUCKET_H_ #include +#include #include // NOLINT #include "common.h" @@ -32,7 +33,7 @@ class LeakyBucket { public: // "capacity": The max number of tokens the bucket can hold at any point. // "fill_rate": The rate which the bucket fills in tokens per second. - LeakyBucket(int64 capacity, int64 fill_rate); + LeakyBucket(int64_t capacity, int64_t fill_rate); ~LeakyBucket() {} @@ -46,30 +47,30 @@ class LeakyBucket { // tokens are being acquired. Suddenly, infinite demand arrives. // At most "capacity_" tokens will be granted immediately. Subsequent // requests will only be admitted based on the fill rate. - inline bool RequestTokens(int64 requested_tokens); + inline bool RequestTokens(int64_t requested_tokens); // Takes tokens from bucket, possibly sending the number of tokens in the // bucket negative. - void TakeTokens(int64 tokens); + void TakeTokens(int64_t tokens); private: // The slow path of RequestTokens. Grabs a lock and may refill tokens_ // using the fill rate and time passed since last fill. - bool RequestTokensSlow(int64 requested_tokens); + bool RequestTokensSlow(int64_t requested_tokens); // Refills the bucket with newly added tokens since last update and returns // the current amount of tokens in the bucket. 'available_tokens' indicates // the number of tokens in the bucket before refilling. 'current_time_ns' // indicates the current time in nanoseconds. - int64 RefillBucket(int64 available_tokens, int64 current_time_ns); + int64_t RefillBucket(int64_t available_tokens, int64_t current_time_ns); // Atomically increment "tokens_". - inline int64 AtomicIncrementTokens(int64 increment) { + inline int64_t AtomicIncrementTokens(int64_t increment) { return tokens_.fetch_add(increment, std::memory_order_relaxed) + increment; } // Atomically load the value of "tokens_". - inline int64 AtomicLoadTokens() const { + inline int64_t AtomicLoadTokens() const { return tokens_.load(std::memory_order_relaxed); } @@ -84,33 +85,33 @@ class LeakyBucket { // // Tokens can be momentarily negative, either via TakeTokens or // during a normal RequestTokens that was not satisfied. - std::atomic tokens_; + std::atomic tokens_; // Capacity of the bucket. - const int64 capacity_; + const int64_t capacity_; // Although the main token count is an integer we also track fractional tokens // for increased precision. double fractional_tokens_; // Fill rate in tokens per second. - const int64 fill_rate_; + const int64_t fill_rate_; // Time in nanoseconds of the last refill. - int64 fill_time_ns_; + int64_t fill_time_ns_; DISALLOW_COPY_AND_ASSIGN(LeakyBucket); }; // Inline fast-path. -inline bool LeakyBucket::RequestTokens(int64 requested_tokens) { +inline bool LeakyBucket::RequestTokens(int64_t requested_tokens) { if (requested_tokens > capacity_) { return false; } // Try and grab some tokens. remaining is how many tokens are // left after subtracting out requested tokens. - int64 remaining = AtomicIncrementTokens(-requested_tokens); + int64_t remaining = AtomicIncrementTokens(-requested_tokens); if (remaining >= 0) { // We had at least as much as we needed. return true; From 91a4145e0cce3a58d787c0e3f476ec3d44836cc2 Mon Sep 17 00:00:00 2001 From: Dmitry Tsarkov Date: Thu, 11 Mar 2021 07:51:41 -0800 Subject: [PATCH 184/241] Internal change. PiperOrigin-RevId: 362291112 Change-Id: Iaf651b304d22a9af3b5107830f52d967e4ce8278 --- .../bytecode_breakpoint.cc | 6 ++- .../bytecode_manipulator.cc | 49 ++++++++----------- .../bytecode_manipulator.h | 15 +++--- .../conditional_breakpoint.cc | 4 +- .../immutability_tracer.cc | 21 ++++---- src/googleclouddebugger/immutability_tracer.h | 11 +++-- src/googleclouddebugger/native_module.cc | 35 +++++-------- src/googleclouddebugger/python_util.cc | 16 +++--- src/googleclouddebugger/python_util.h | 13 ++--- src/googleclouddebugger/rate_limit.cc | 5 +- 10 files changed, 81 insertions(+), 94 deletions(-) diff --git a/src/googleclouddebugger/bytecode_breakpoint.cc b/src/googleclouddebugger/bytecode_breakpoint.cc index 58447b1..40939ee 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.cc +++ b/src/googleclouddebugger/bytecode_breakpoint.cc @@ -19,6 +19,8 @@ #include "bytecode_breakpoint.h" +#include + #include "bytecode_manipulator.h" #include "python_callback.h" #include "python_util.h" @@ -227,10 +229,10 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { return; } - std::vector bytecode = PyBytesToByteArray(code->original_code.get()); + std::vector bytecode = PyBytesToByteArray(code->original_code.get()); bool has_lnotab = false; - std::vector lnotab; + std::vector lnotab; if (!code->original_lnotab.is_null() && PyBytes_CheckExact(code->original_lnotab.get())) { has_lnotab = true; diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index e5d9ebb..3277a5a 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -20,6 +20,7 @@ #include "bytecode_manipulator.h" #include +#include namespace devtools { namespace cdbg { @@ -51,8 +52,8 @@ enum PythonOpcodeType { // actual instruction. The argument of the EXTENDED_ARG instruction is combined // with the argument of the next instruction to form the full argument. struct PythonInstruction { - uint8 opcode; - uint32 argument; + uint8_t opcode; + uint32_t argument; int size; }; @@ -60,7 +61,7 @@ struct PythonInstruction { static const PythonInstruction kInvalidInstruction { 0xFF, 0xFFFFFFFF, 0 }; // Creates an instance of PythonInstruction for instruction with no arguments. -static PythonInstruction PythonInstructionNoArg(uint8 opcode) { +static PythonInstruction PythonInstructionNoArg(uint8_t opcode) { DCHECK(!HAS_ARG(opcode)); PythonInstruction instruction; @@ -76,9 +77,9 @@ static PythonInstruction PythonInstructionNoArg(uint8 opcode) { return instruction; } - // Creates an instance of PythonInstruction for instruction with an argument. -static PythonInstruction PythonInstructionArg(uint8 opcode, uint32 argument) { +static PythonInstruction PythonInstructionArg(uint8_t opcode, + uint32_t argument) { DCHECK(HAS_ARG(opcode)); PythonInstruction instruction; @@ -102,7 +103,6 @@ static PythonInstruction PythonInstructionArg(uint8 opcode, uint32 argument) { return instruction; } - // Calculates the size of a set of instructions. static int GetInstructionsSize( const std::vector& instructions) { @@ -116,7 +116,7 @@ static int GetInstructionsSize( // Classification of an opcode. -static PythonOpcodeType GetOpcodeType(uint8 opcode) { +static PythonOpcodeType GetOpcodeType(uint8_t opcode) { switch (opcode) { case YIELD_VALUE: #if PY_MAJOR_VERSION >= 3 @@ -154,7 +154,6 @@ static PythonOpcodeType GetOpcodeType(uint8 opcode) { } } - // Gets the target offset of a branch instruction. static int GetBranchTarget(int offset, PythonInstruction instruction) { switch (GetOpcodeType(instruction.opcode)) { @@ -191,8 +190,8 @@ static void WritePythonBytecodeUInt16( // Read instruction at the specified offset. Returns kInvalidInstruction // buffer underflow. static PythonInstruction ReadInstruction( - const std::vector& bytecode, - std::vector::const_iterator it) { + const std::vector& bytecode, + std::vector::const_iterator it) { PythonInstruction instruction { 0, 0, 0 }; #if PY_MAJOR_VERSION >= 3 @@ -251,21 +250,19 @@ static PythonInstruction ReadInstruction( return instruction; } - // Writes instruction to the specified destination. The caller is responsible // to make sure the target vector has enough space. Returns size of an // instruction. -static int WriteInstruction( - std::vector::iterator it, - const PythonInstruction& instruction) { +static int WriteInstruction(std::vector::iterator it, + const PythonInstruction& instruction) { #if PY_MAJOR_VERSION >= 3 - uint32 arg = instruction.argument; + uint32_t arg = instruction.argument; int size_written = 0; // Start writing backwards from the real instruction, followed by any // EXTENDED_ARG instructions if needed. for (int i = instruction.size - 2; i >= 0; i -= 2) { it[i] = size_written == 0 ? instruction.opcode : EXTENDED_ARG; - it[i + 1] = static_cast(arg); + it[i + 1] = static_cast(arg); arg = arg >> 8; size_written += 2; } @@ -295,10 +292,9 @@ static int WriteInstruction( #endif } - // Write set of instructions to the specified destination. static void WriteInstructions( - std::vector::iterator it, + std::vector::iterator it, const std::vector& instructions) { for (auto it_instruction = instructions.begin(); it_instruction != instructions.end(); @@ -309,7 +305,6 @@ static void WriteInstructions( } } - // Returns set of instructions to invoke a method with no arguments. The // method is assumed to be defined in the specified item of a constants tuple. static std::vector BuildMethodCall(int const_index) { @@ -321,11 +316,9 @@ static std::vector BuildMethodCall(int const_index) { return instructions; } - -BytecodeManipulator::BytecodeManipulator( - std::vector bytecode, - const bool has_lnotab, - std::vector lnotab) +BytecodeManipulator::BytecodeManipulator(std::vector bytecode, + const bool has_lnotab, + std::vector lnotab) : has_lnotab_(has_lnotab) { data_.bytecode = std::move(bytecode); data_.lnotab = std::move(lnotab); @@ -347,7 +340,6 @@ BytecodeManipulator::BytecodeManipulator( } } - bool BytecodeManipulator::InjectMethodCall( int offset, int callable_const_index) { @@ -417,7 +409,7 @@ static const int kMaxInsertionIterations = 10; // instruction before is an EXTENDED_ARG which will now be applied to the first // instruction inserted instead of its original target. static void InsertAndUpdateLnotab(int offset, int size, - std::vector* lnotab) { + std::vector* lnotab) { int current_offset = 0; for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) { current_offset += it[0]; @@ -438,7 +430,6 @@ static void InsertAndUpdateLnotab(int offset, int size, } } - // Reserves space for instructions to be inserted into the bytecode, and // calculates the new offsets and arguments of branch instructions. // Returns true if the calculation was successful, and false if too many @@ -504,7 +495,7 @@ static bool InsertAndUpdateBranchInstructions( for (auto it = instructions.begin(); it < instructions.end(); it++) { PythonInstruction instruction = it->instruction; - int32 arg = static_cast(instruction.argument); + int32_t arg = static_cast(instruction.argument); bool need_to_update = false; PythonOpcodeType opcode_type = GetOpcodeType(instruction.opcode); if (opcode_type == BRANCH_DELTA_OPCODE) { @@ -514,7 +505,7 @@ static bool InsertAndUpdateBranchInstructions( // argument of 0 even when it is not required. This needs to be taken // into account when calculating the target of a branch instruction. int inst_size = std::max(instruction.size, it->original_size); - int32 target = it->current_offset + inst_size + arg; + int32_t target = it->current_offset + inst_size + arg; need_to_update = it->current_offset < insertion.current_offset && insertion.current_offset < target; } else if (opcode_type == BRANCH_ABSOLUTE_OPCODE) { diff --git a/src/googleclouddebugger/bytecode_manipulator.h b/src/googleclouddebugger/bytecode_manipulator.h index 5046506..d3a7de4 100644 --- a/src/googleclouddebugger/bytecode_manipulator.h +++ b/src/googleclouddebugger/bytecode_manipulator.h @@ -17,6 +17,7 @@ #ifndef DEVTOOLS_CDBG_DEBUGLETS_PYTHON_BYTECODE_MANIPULATOR_H_ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_BYTECODE_MANIPULATOR_H_ +#include #include #include "common.h" @@ -70,19 +71,17 @@ namespace cdbg { // 19 JUMP_ABSOLUTE 3 class BytecodeManipulator { public: - BytecodeManipulator( - std::vector bytecode, - const bool has_lnotab, - std::vector lnotab); + BytecodeManipulator(std::vector bytecode, const bool has_lnotab, + std::vector lnotab); // Gets the transformed method bytecode. - const std::vector& bytecode() const { return data_.bytecode; } + const std::vector& bytecode() const { return data_.bytecode; } // Returns true if this class was initialized with line numbers table. bool has_lnotab() const { return has_lnotab_; } // Gets the method line numbers table or empty vector if not available. - const std::vector& lnotab() const { return data_.lnotab; } + const std::vector& lnotab() const { return data_.lnotab; } // Rewrites the method bytecode to invoke callable at the specified offset. // Return false if the method call could not be inserted. The bytecode @@ -108,10 +107,10 @@ class BytecodeManipulator { struct Data { // Bytecode of a transformed method. - std::vector bytecode; + std::vector bytecode; // Method line numbers table or empty vector if "has_lnotab_" is false. - std::vector lnotab; + std::vector lnotab; }; // Insert space into the bytecode. This space is later used to add new diff --git a/src/googleclouddebugger/conditional_breakpoint.cc b/src/googleclouddebugger/conditional_breakpoint.cc index ca531cc..9d66474 100644 --- a/src/googleclouddebugger/conditional_breakpoint.cc +++ b/src/googleclouddebugger/conditional_breakpoint.cc @@ -19,6 +19,8 @@ #include "conditional_breakpoint.h" +#include + #include "immutability_tracer.h" #include "rate_limit.h" @@ -64,7 +66,7 @@ bool ConditionalBreakpoint::EvaluateCondition(PyFrameObject* frame) { ScopedPyObject result; bool is_mutable_code_detected = false; - int32 line_count = 0; + int32_t line_count = 0; { ScopedImmutabilityTracer immutability_tracer; diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index 8f7f14a..950ac65 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -19,6 +19,8 @@ #include "immutability_tracer.h" +#include + #include "python_util.h" ABSL_FLAG(int32, max_expression_lines, 10000, @@ -250,8 +252,8 @@ void ImmutabilityTracer::ProcessCodeLine( PyCodeObject* code_object, int line_number) { int size = PyBytes_Size(code_object->co_code); - const uint8* opcodes = - reinterpret_cast(PyBytes_AsString(code_object->co_code)); + const uint8_t* opcodes = + reinterpret_cast(PyBytes_AsString(code_object->co_code)); DCHECK(opcodes != nullptr); @@ -283,7 +285,7 @@ enum OpcodeMutableStatus { OPCODE_MAYBE_MUTABLE }; -static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { +static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { // Notes: // * We allow changing local variables (i.e. STORE_FAST). Expression // evaluation doesn't let changing local variables of the top frame @@ -486,12 +488,12 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8 opcode) { } } -void ImmutabilityTracer::ProcessCodeRange(const uint8* code_start, - const uint8* opcodes, int size) { - const uint8* end = opcodes + size; +void ImmutabilityTracer::ProcessCodeRange(const uint8_t* code_start, + const uint8_t* opcodes, int size) { + const uint8_t* end = opcodes + size; while (opcodes < end) { // Read opcode. - const uint8 opcode = *opcodes; + const uint8_t opcode = *opcodes; switch (IsOpcodeMutable(opcode)) { case OPCODE_NOT_MUTABLE: // We don't worry about the sizes of instructions with EXTENDED_ARG. @@ -524,7 +526,7 @@ void ImmutabilityTracer::ProcessCodeRange(const uint8* code_start, break; } #endif - LOG(WARNING) << "Unknown opcode " << static_cast(opcode); + LOG(WARNING) << "Unknown opcode " << static_cast(opcode); mutable_code_detected_ = true; return; @@ -535,7 +537,6 @@ void ImmutabilityTracer::ProcessCodeRange(const uint8* code_start, } } - void ImmutabilityTracer::ProcessCCall(PyObject* function) { if (PyCFunction_Check(function)) { // TODO: the application code can define its own "str" function @@ -549,7 +550,7 @@ void ImmutabilityTracer::ProcessCCall(PyObject* function) { auto c_function = reinterpret_cast(function); const char* name = c_function->m_ml->ml_name; - for (uint32 i = 0; i < arraysize(kWhitelistedCFunctions); ++i) { + for (uint32_t i = 0; i < arraysize(kWhitelistedCFunctions); ++i) { if (!strcmp(name, kWhitelistedCFunctions[i])) { return; } diff --git a/src/googleclouddebugger/immutability_tracer.h b/src/googleclouddebugger/immutability_tracer.h index 6f9b9b1..e0cbd4d 100644 --- a/src/googleclouddebugger/immutability_tracer.h +++ b/src/googleclouddebugger/immutability_tracer.h @@ -17,6 +17,7 @@ #ifndef DEVTOOLS_CDBG_DEBUGLETS_PYTHON_IMMUTABILITY_TRACER_H_ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_IMMUTABILITY_TRACER_H_ +#include #include #include "common.h" @@ -56,7 +57,7 @@ class ImmutabilityTracer { // Gets the number of lines executed while the tracer was enabled. Native // functions calls are counted as a single line. - int32 GetLineCount() const { return line_count_; } + int32_t GetLineCount() const { return line_count_; } private: // Python tracer callback function. @@ -79,7 +80,7 @@ class ImmutabilityTracer { void ProcessCodeLine(PyCodeObject* code_object, int line_number); // Verifies immutability of block of opcodes. - void ProcessCodeRange(const uint8* code_start, const uint8* opcodes, + void ProcessCodeRange(const uint8_t* code_start, const uint8_t* opcodes, int size); // Verifies that the called C function is whitelisted. @@ -107,11 +108,11 @@ class ImmutabilityTracer { // Original value of PyThreadState::tracing. We revert it to 0 to enforce // trace callback on this thread, even if the whole thing was executed from // within another trace callback (that caught the breakpoint). - int32 original_thread_state_tracing_; + int32_t original_thread_state_tracing_; // Counts the number of lines executed while the tracer was enabled. Native // functions calls are counted as a single line. - int32 line_count_; + int32_t line_count_; // Set to true after immutable statement is detected. When it happens we // want to stop execution of the entire construct entirely. @@ -144,7 +145,7 @@ class ScopedImmutabilityTracer { // Gets the number of lines executed while the tracer was enabled. Native // functions calls are counted as a single line. - int32 GetLineCount() const { return Instance()->GetLineCount(); } + int32_t GetLineCount() const { return Instance()->GetLineCount(); } private: ImmutabilityTracer* Instance() { diff --git a/src/googleclouddebugger/native_module.cc b/src/googleclouddebugger/native_module.cc index 162854c..4a66c4f 100644 --- a/src/googleclouddebugger/native_module.cc +++ b/src/googleclouddebugger/native_module.cc @@ -19,6 +19,8 @@ #include "native_module.h" +#include + #include "bytecode_breakpoint.h" #include "common.h" #include "conditional_breakpoint.h" @@ -38,31 +40,18 @@ const LogSeverity LOG_SEVERITY_ERROR = ::google::ERROR; struct INTEGER_CONSTANT { const char* name; - int32 value; + int32_t value; }; static const INTEGER_CONSTANT kIntegerConstants[] = { - { - "BREAKPOINT_EVENT_HIT", - static_cast(BreakpointEvent::Hit) - }, - { - "BREAKPOINT_EVENT_ERROR", - static_cast(BreakpointEvent::Error) - }, - { - "BREAKPOINT_EVENT_GLOBAL_CONDITION_QUOTA_EXCEEDED", - static_cast(BreakpointEvent::GlobalConditionQuotaExceeded) - }, - { - "BREAKPOINT_EVENT_BREAKPOINT_CONDITION_QUOTA_EXCEEDED", - static_cast(BreakpointEvent::BreakpointConditionQuotaExceeded) - }, - { - "BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE", - static_cast(BreakpointEvent::ConditionExpressionMutable) - } -}; + {"BREAKPOINT_EVENT_HIT", static_cast(BreakpointEvent::Hit)}, + {"BREAKPOINT_EVENT_ERROR", static_cast(BreakpointEvent::Error)}, + {"BREAKPOINT_EVENT_GLOBAL_CONDITION_QUOTA_EXCEEDED", + static_cast(BreakpointEvent::GlobalConditionQuotaExceeded)}, + {"BREAKPOINT_EVENT_BREAKPOINT_CONDITION_QUOTA_EXCEEDED", + static_cast(BreakpointEvent::BreakpointConditionQuotaExceeded)}, + {"BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE", + static_cast(BreakpointEvent::ConditionExpressionMutable)}}; // Class to set zero overhead breakpoints. static BytecodeBreakpoint g_bytecode_breakpoint; @@ -438,7 +427,7 @@ PyObject* InitDebuggerNativeModuleInternal() { } // Add constants we want to share with the Python code. - for (uint32 i = 0; i < arraysize(kIntegerConstants); ++i) { + for (uint32_t i = 0; i < arraysize(kIntegerConstants); ++i) { if (PyModule_AddObject( module, kIntegerConstants[i].name, diff --git a/src/googleclouddebugger/python_util.cc b/src/googleclouddebugger/python_util.cc index 9007e44..90b67ce 100644 --- a/src/googleclouddebugger/python_util.cc +++ b/src/googleclouddebugger/python_util.cc @@ -21,6 +21,8 @@ #include +#include + namespace devtools { namespace cdbg { @@ -47,8 +49,7 @@ void CodeObjectLinesEnumerator::Initialize( offset_ = 0; line_number_ = firstlineno; remaining_entries_ = PyBytes_Size(lnotab) / 2; - next_entry_ = - reinterpret_cast(PyBytes_AsString(lnotab)); + next_entry_ = reinterpret_cast(PyBytes_AsString(lnotab)); // If the line table starts with offset 0, the first line is not // "code_object->co_firstlineno", but the following line. @@ -239,7 +240,7 @@ std::string CodeObjectDebugString(PyCodeObject* code_object) { } str += ':'; - str += std::to_string(static_cast(code_object->co_firstlineno)); + str += std::to_string(static_cast(code_object->co_firstlineno)); if ((code_object->co_filename != nullptr) && PyBytes_CheckExact(code_object->co_filename)) { @@ -250,16 +251,15 @@ std::string CodeObjectDebugString(PyCodeObject* code_object) { return str; } -std::vector PyBytesToByteArray(PyObject* obj) { +std::vector PyBytesToByteArray(PyObject* obj) { DCHECK(PyBytes_CheckExact(obj)); const size_t bytecode_size = PyBytes_GET_SIZE(obj); - const uint8* const bytecode_data = - reinterpret_cast(PyBytes_AS_STRING(obj)); - return std::vector(bytecode_data, bytecode_data + bytecode_size); + const uint8_t* const bytecode_data = + reinterpret_cast(PyBytes_AS_STRING(obj)); + return std::vector(bytecode_data, bytecode_data + bytecode_size); } - // Creates a new tuple by appending "items" to elements in "tuple". ScopedPyObject AppendTuple( PyObject* tuple, diff --git a/src/googleclouddebugger/python_util.h b/src/googleclouddebugger/python_util.h index 139d9dc..57b5425 100644 --- a/src/googleclouddebugger/python_util.h +++ b/src/googleclouddebugger/python_util.h @@ -17,6 +17,7 @@ #ifndef DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYTHON_UTIL_H_ #define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYTHON_UTIL_H_ +#include #include #include @@ -183,10 +184,10 @@ class CodeObjectLinesEnumerator { bool Next(); // Gets the bytecode offset of the current line. - int32 offset() const { return offset_; } + int32_t offset() const { return offset_; } // Gets the current source code line number. - int32 line_number() const { return line_number_; } + int32_t line_number() const { return line_number_; } private: void Initialize(int firstlineno, PyObject* lnotab); @@ -196,13 +197,13 @@ class CodeObjectLinesEnumerator { int remaining_entries_; // Pointer to the next entry of line table. - const uint8* next_entry_; + const uint8_t* next_entry_; // Bytecode offset of the current line. - int32 offset_; + int32_t offset_; // Current source code line number - int32 line_number_; + int32_t line_number_; DISALLOW_COPY_AND_ASSIGN(CodeObjectLinesEnumerator); }; @@ -314,7 +315,7 @@ std::string CodeObjectDebugString(PyCodeObject* code_object); // Reads Python string as a byte array. The function does not verify that // "obj" is of a string type. -std::vector PyBytesToByteArray(PyObject* obj); +std::vector PyBytesToByteArray(PyObject* obj); // Creates a new tuple by appending "items" to elements in "tuple". ScopedPyObject AppendTuple( diff --git a/src/googleclouddebugger/rate_limit.cc b/src/googleclouddebugger/rate_limit.cc index 4fbb72c..80b7c47 100644 --- a/src/googleclouddebugger/rate_limit.cc +++ b/src/googleclouddebugger/rate_limit.cc @@ -19,6 +19,8 @@ #include "rate_limit.h" +#include + ABSL_FLAG( int32, max_condition_lines_rate, 5000, "maximum number of Python lines/sec to spend on condition evaluation"); @@ -56,8 +58,7 @@ static std::unique_ptr g_global_condition_quota; static std::unique_ptr g_global_dynamic_log_quota; static std::unique_ptr g_global_dynamic_log_bytes_quota; - -static int64 GetBaseConditionQuotaCapacity() { +static int64_t GetBaseConditionQuotaCapacity() { return absl::GetFlag(FLAGS_max_condition_lines_rate) * kConditionCostCapacityFactor; } From 67e3b8c3dede37ca39fd495dc1e042b3a8044631 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 8 Apr 2021 10:46:07 -0700 Subject: [PATCH 185/241] Add support for Python 3.9 Thanks to https://github.com/ermakov-oleg for initiating this contribution. PiperOrigin-RevId: 367461664 Change-Id: Ifd56e622a969ed171395b29516a45175202964d3 --- .../bytecode_manipulator.cc | 3 +- .../immutability_tracer.cc | 32 +++++++++++++++++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 3277a5a..9c646e3 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -133,7 +133,8 @@ static PythonOpcodeType GetOpcodeType(uint8_t opcode) { #endif case SETUP_FINALLY: case SETUP_WITH: -#if PY_VERSION_HEX >= 0x03080000 +#if PY_VERSION_HEX >= 0x03080000 && PY_VERSION_HEX < 0x03090000 + // Added in Python 3.8 and removed in 3.9 case CALL_FINALLY: #endif return BRANCH_DELTA_OPCODE; diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index 950ac65..6cfa66c 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -374,15 +374,25 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { case UNPACK_EX: case CALL_FUNCTION_EX: case LOAD_CLASSDEREF: +#if PY_VERSION_HEX < 0x03090000 + // Removed in Python 3.9. case BUILD_LIST_UNPACK: case BUILD_MAP_UNPACK: case BUILD_MAP_UNPACK_WITH_CALL: case BUILD_TUPLE_UNPACK: + case BUILD_TUPLE_UNPACK_WITH_CALL: case BUILD_SET_UNPACK: +#endif +#if PY_VERSION_HEX > 0x03090000 + // Added in Python 3.9. + case LIST_TO_TUPLE: + case IS_OP: + case CONTAINS_OP: + case JUMP_IF_NOT_EXC_MATCH: +#endif case FORMAT_VALUE: case BUILD_CONST_KEY_MAP: case BUILD_STRING: - case BUILD_TUPLE_UNPACK_WITH_CALL: #if PY_VERSION_HEX >= 0x03070000 // Added in Python 3.7. case LOAD_METHOD: @@ -433,7 +443,6 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { case STORE_DEREF: // TODO: allow exception handling case RAISE_VARARGS: - case END_FINALLY: case SETUP_WITH: // TODO: allow closures case LOAD_CLOSURE: @@ -447,8 +456,12 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { case BEFORE_ASYNC_WITH: case LOAD_BUILD_CLASS: case GET_AWAITABLE: +#if PY_VERSION_HEX < 0x03090000 + // Removed in 3.9. case WITH_CLEANUP_START: case WITH_CLEANUP_FINISH: + case END_FINALLY: +#endif case SETUP_ANNOTATIONS: case POP_EXCEPT: #if PY_VERSION_HEX < 0x03070000 @@ -459,11 +472,24 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { case SETUP_ASYNC_WITH: #if PY_VERSION_HEX >= 0x03080000 // Added in Python 3.8. - case BEGIN_FINALLY: case END_ASYNC_FOR: +#endif +#if PY_VERSION_HEX >= 0x03080000 && PY_VERSION_HEX < 0x03090000 + // Added in Python 3.8 and removed in 3.9 + case BEGIN_FINALLY: case CALL_FINALLY: case POP_FINALLY: #endif +#if PY_VERSION_HEX >= 0x03090000 + // Added in 3.9. + case DICT_MERGE: + case DICT_UPDATE: + case LIST_EXTEND: + case SET_UPDATE: + case RERAISE: + case WITH_EXCEPT_START: + case LOAD_ASSERTION_ERROR: +#endif #else case STORE_SLICE+0: case STORE_SLICE+1: From 37c4486e3944ee24e123d4dc4b8a7bc940a6d671 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 8 Apr 2021 13:45:55 -0700 Subject: [PATCH 186/241] Add poetry support. Thanks to https://github.com/ermakov-oleg for this contribution. Resolves https://github.com/GoogleCloudPlatform/cloud-debug-python/issues/24 PiperOrigin-RevId: 367498809 Change-Id: I46983c94ba82829b6a2bc328789a9166a4fe6561 --- src/setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/setup.py b/src/setup.py index bef4a37..80c1414 100644 --- a/src/setup.py +++ b/src/setup.py @@ -108,10 +108,10 @@ def ReadConfig(section, value, default): author='Google Inc.', version=version, install_requires=[ - 'google-api-python-client==1.8.4' - if sys.version_info.major < 3 else 'google-api-python-client', - 'google-auth==1.8.2' - if sys.version_info.major < 3 else 'google-auth>=1.0.0', + 'google-api-python-client==1.8.4; python_version < "3.0"', + 'google-api-python-client; python_version > "3.0"', + 'google-auth==1.8.2; python_version < "3.0"', + 'google-auth>=1.0.0; python_version > "3.0"', 'google-auth-httplib2', 'google-api-core==1.15.0' if sys.version_info.major < 3 else 'google-api-core', From b3737e11816b0c7c55ab02db4a94c7e80e36c553 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 9 Apr 2021 06:26:35 -0700 Subject: [PATCH 187/241] Release python agent version 2.16, with Python 3.9 support PiperOrigin-RevId: 367621582 Change-Id: I1ffa31552c54e489ef3d2677c324fe0463ebb16d --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index da8e00f..84e8243 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.15' +__version__ = '2.16' From 9f20afe9cce98185bfeb89be1cc205976538b7da Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 9 Apr 2021 10:24:18 -0700 Subject: [PATCH 188/241] Update readme to indicate support for Python 3.9 PiperOrigin-RevId: 367657091 Change-Id: I2b66dac0f86688cac47410c6954354e593e6fab9 --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 29679ae..836a7ee 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Python Cloud Debugger Agent Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7, -Python 3.6, Python 3.7 and Python 3.8. +Python 3.6, Python 3.7, Python 3.8 and Python 3.9. ## Overview @@ -28,7 +28,7 @@ tested on Debian Linux, but it should work on other distributions as well. Cloud Debugger consists of 3 primary components: 1. The Python debugger agent (this repo implements one for CPython 2.7, 3.6, - 3.7 and 3.8). + 3.7, 3.8 and 3.9). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the APIs using [APIs Explorer](https://cloud.google.com/debugger/api/reference/rest/). @@ -82,8 +82,8 @@ sudo apt-get -y -q --no-install-recommends install \ ### Python 3 -There is support for Python 3.6, Python 3.7 and Python 3.8. Python 3.0 to 3.5 -are not supported, and newer versions have not been tested. +There is support for Python 3.6, Python 3.7, Python 3.8 and Python 3.9. Python +3.0 to 3.5 are not supported, and newer versions have not been tested. To build for Python 3.x (x in [6-8]), the `python3.x` and `python3.x-dev` packages are additionally needed. If Python 3.x is not the default version of From f3cf7d17508a46301eb80a9295f474069a8ce890 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 20 Apr 2021 11:27:26 -0700 Subject: [PATCH 189/241] Add Python 3.9 to classifiers. PiperOrigin-RevId: 369482619 Change-Id: I1d9196b4bebb5ed539d2502b2d89aa7df2631a94 --- src/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/setup.py b/src/setup.py index 80c1414..e4f5873 100644 --- a/src/setup.py +++ b/src/setup.py @@ -127,6 +127,7 @@ def ReadConfig(section, value, default): 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', ]) From 0040dbe72168752e82ba1300913a03ef70545f53 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 20 Apr 2021 12:49:32 -0700 Subject: [PATCH 190/241] Use conditional requirements for google-api-core. PiperOrigin-RevId: 369500403 Change-Id: I0eca3f902a45b8b8ef1f8648f0c7e6797bdef6b0 --- src/setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/setup.py b/src/setup.py index e4f5873..97b9d3d 100644 --- a/src/setup.py +++ b/src/setup.py @@ -23,7 +23,6 @@ from glob import glob import os import re -import sys from distutils import sysconfig from setuptools import Extension from setuptools import setup @@ -113,8 +112,8 @@ def ReadConfig(section, value, default): 'google-auth==1.8.2; python_version < "3.0"', 'google-auth>=1.0.0; python_version > "3.0"', 'google-auth-httplib2', - 'google-api-core==1.15.0' - if sys.version_info.major < 3 else 'google-api-core', + 'google-api-core==1.15.0; python_version < "3.0"', + 'google-api-core; python_version > "3.0"', 'pyyaml', 'six>=1.10.0', ], From cb1015f7aeb232e6163f64e39bf8cb3b13756b33 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 21 Apr 2021 09:06:00 -0700 Subject: [PATCH 191/241] Release v2.17 PiperOrigin-RevId: 369667244 Change-Id: I59ff9da471a0940b1e6acba03f42af9eca3d4834 --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 84e8243..d70e9d2 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.16' +__version__ = '2.17' From fe3d134bb862ce3bbd19b98820f5d6fbb28be465 Mon Sep 17 00:00:00 2001 From: Cloud Debugger Team Date: Thu, 10 Jun 2021 12:43:15 -0700 Subject: [PATCH 192/241] Internal changes PiperOrigin-RevId: 378712533 Change-Id: Id4b8bc599fd81af78fc4daf6a0840ee4c859861a --- src/googleclouddebugger/immutability_tracer.cc | 2 +- src/googleclouddebugger/rate_limit.cc | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index 6cfa66c..1e64cfd 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -23,7 +23,7 @@ #include "python_util.h" -ABSL_FLAG(int32, max_expression_lines, 10000, +ABSL_FLAG(int32_t, max_expression_lines, 10000, "maximum number of Python lines to allow in a single expression"); namespace devtools { diff --git a/src/googleclouddebugger/rate_limit.cc b/src/googleclouddebugger/rate_limit.cc index 80b7c47..caad939 100644 --- a/src/googleclouddebugger/rate_limit.cc +++ b/src/googleclouddebugger/rate_limit.cc @@ -22,16 +22,16 @@ #include ABSL_FLAG( - int32, max_condition_lines_rate, 5000, + int32_t, max_condition_lines_rate, 5000, "maximum number of Python lines/sec to spend on condition evaluation"); ABSL_FLAG( - int32, max_dynamic_log_rate, + int32_t, max_dynamic_log_rate, 50, // maximum of 50 log entries per second on average "maximum rate of dynamic log entries in this process; short bursts are " "allowed to exceed this limit"); -ABSL_FLAG(int32, max_dynamic_log_bytes_rate, +ABSL_FLAG(int32_t, max_dynamic_log_bytes_rate, 20480, // maximum of 20K bytes per second on average "maximum rate of dynamic log bytes in this process; short bursts are " "allowed to exceed this limit"); From 676ddadcbc403c24b18b16138d8e2e226f22cbec Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 22 Jun 2021 09:14:18 -0700 Subject: [PATCH 193/241] Internal code transformation change PiperOrigin-RevId: 380821292 Change-Id: I0f7cf8285f816dc7ed3986aaf248c55d3b434094 --- src/googleclouddebugger/immutability_tracer.cc | 2 +- src/googleclouddebugger/rate_limit.cc | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index 1e64cfd..6cfa66c 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -23,7 +23,7 @@ #include "python_util.h" -ABSL_FLAG(int32_t, max_expression_lines, 10000, +ABSL_FLAG(int32, max_expression_lines, 10000, "maximum number of Python lines to allow in a single expression"); namespace devtools { diff --git a/src/googleclouddebugger/rate_limit.cc b/src/googleclouddebugger/rate_limit.cc index caad939..80b7c47 100644 --- a/src/googleclouddebugger/rate_limit.cc +++ b/src/googleclouddebugger/rate_limit.cc @@ -22,16 +22,16 @@ #include ABSL_FLAG( - int32_t, max_condition_lines_rate, 5000, + int32, max_condition_lines_rate, 5000, "maximum number of Python lines/sec to spend on condition evaluation"); ABSL_FLAG( - int32_t, max_dynamic_log_rate, + int32, max_dynamic_log_rate, 50, // maximum of 50 log entries per second on average "maximum rate of dynamic log entries in this process; short bursts are " "allowed to exceed this limit"); -ABSL_FLAG(int32_t, max_dynamic_log_bytes_rate, +ABSL_FLAG(int32, max_dynamic_log_bytes_rate, 20480, // maximum of 20K bytes per second on average "maximum rate of dynamic log bytes in this process; short bursts are " "allowed to exceed this limit"); From cb67477ece61e5665143ef1f7a91fff4686cd0db Mon Sep 17 00:00:00 2001 From: Mahmoud Taha Date: Tue, 20 Jul 2021 19:05:50 +0000 Subject: [PATCH 194/241] Add GCF platform detection in the python agent Detects GCF using environment variables set, otherwise returns 'default'. Functionality is the same as the nodejs agent. PiperOrigin-RevId: 385587486 Change-Id: I72ed22e7d6266e9e014a0dfb9a614496c322b94b --- src/googleclouddebugger/application_info.py | 51 +++++++++++++++++++++ src/googleclouddebugger/gcp_hub_client.py | 21 +++++++-- src/googleclouddebugger/labels.py | 6 ++- 3 files changed, 74 insertions(+), 4 deletions(-) create mode 100644 src/googleclouddebugger/application_info.py diff --git a/src/googleclouddebugger/application_info.py b/src/googleclouddebugger/application_info.py new file mode 100644 index 0000000..f054996 --- /dev/null +++ b/src/googleclouddebugger/application_info.py @@ -0,0 +1,51 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module to fetch information regarding the current application. + +Some examples of the information the methods in this module fetch are platform +and region of the application. +""" + +import enum +import os + +# These environment variables will be set automatically by cloud functions +# depending on the runtime. If one of these values is set, we can infer that +# the current environment is GCF. Reference: +# https://cloud.google.com/functions/docs/env-var#runtime_environment_variables_set_automatically +_GCF_ENV_VARIABLES = ['FUNCTION_NAME', 'FUNCTION_TARGET'] + + +class PlatformType(enum.Enum): + """The type of platform the application is running on. + + TODO: Define this enum in a common format for all agents to + share. This enum needs to be maintained between the labels code generator + and other agents, until there is a unified way to generate it. + """ + CLOUD_FUNCTION = 'cloud_function' + DEFAULT = 'default' + + +def GetPlatform(): + """Returns PlatformType for the current application.""" + + # Check if it's a cloud function. + for name in _GCF_ENV_VARIABLES: + if name in os.environ: + return PlatformType.CLOUD_FUNCTION + + # If we weren't able to identify the platform, fall back to default value. + return PlatformType.DEFAULT diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 5b95367..8e89b04 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -43,6 +43,7 @@ from . import cdbg_native as native from . import labels from . import uniquifier_computer +from . import application_info from . import version # This module catches all exception. This is safe because it runs in # a daemon thread (so we are not blocking Ctrl+C). We need to catch all @@ -57,9 +58,12 @@ # a map is optional environment variable that can be used to set the flag # (flags still take precedence). _DEBUGGEE_LABELS = { - labels.Debuggee.MODULE: ['GAE_SERVICE', 'GAE_MODULE_NAME', 'K_SERVICE'], + labels.Debuggee.MODULE: [ + 'GAE_SERVICE', 'GAE_MODULE_NAME', 'K_SERVICE', 'FUNCTION_NAME' + ], labels.Debuggee.VERSION: [ - 'GAE_VERSION', 'GAE_MODULE_VERSION', 'K_REVISION' + 'GAE_VERSION', 'GAE_MODULE_VERSION', 'K_REVISION', + 'X_GOOGLE_FUNCTION_VERSION' ], labels.Debuggee.MINOR_VERSION: ['GAE_DEPLOYMENT_ID', 'GAE_MINOR_VERSION'] } @@ -182,12 +186,23 @@ def InitializeDebuggeeLabels(self, flags): self._debuggee_labels[label] = value break + # Special case when FUNCTION_NAME is set and X_GOOGLE_FUNCTION_VERSION + # isn't set. We set the version to 'unversioned' to be consistent with other + # agents. + # TODO: Stop assigning 'unversioned' to a GCF and find the + # actual version. + if ('FUNCTION_NAME' in os.environ and + labels.Debuggee.VERSION not in self._debuggee_labels): + self._debuggee_labels[labels.Debuggee.VERSION] = 'unversioned' + if flags: self._debuggee_labels.update( {name: value for (name, value) in six.iteritems(flags) if name in _DEBUGGEE_LABELS}) - self._debuggee_labels['projectid'] = self._project_id + self._debuggee_labels[labels.Debuggee.PROJECT_ID] = self._project_id + self._debuggee_labels[ + labels.Debuggee.PLATFORM] = application_info.GetPlatform().value def SetupAuth(self, project_id=None, diff --git a/src/googleclouddebugger/labels.py b/src/googleclouddebugger/labels.py index 8070f82..041b50e 100644 --- a/src/googleclouddebugger/labels.py +++ b/src/googleclouddebugger/labels.py @@ -14,7 +14,9 @@ """Defines the keys of the well known labels used by the cloud debugger. -DO NOT EDIT +TODO: Define these strings in a common format for all agents to +share. This file needs to be maintained with the code generator file +being used in the UI, until the labels are unified. """ @@ -32,6 +34,7 @@ class Debuggee(object): MODULE = 'module' VERSION = 'version' MINOR_VERSION = 'minorversion' + PLATFORM = 'platform' SET_ALL = frozenset([ 'domain', @@ -39,4 +42,5 @@ class Debuggee(object): 'module', 'version', 'minorversion', + 'platform', ]) From 0dca04a35c5f85cfdc60c4b639c9be0668931733 Mon Sep 17 00:00:00 2001 From: Mahmoud Taha Date: Tue, 20 Jul 2021 19:06:03 +0000 Subject: [PATCH 195/241] Detect the region in the python agent for GCF Detects the region using 'FUNCTION_REGION' environment variable, which is only found in old runtimes. For newer runtimes, queries the metadata server for the region. Functionality is the same as the nodejs agent. PiperOrigin-RevId: 385593923 Change-Id: I987cf39e09af404296af4173a1c38310a76d770a --- src/googleclouddebugger/application_info.py | 28 +++++++++++++++++++-- src/googleclouddebugger/gcp_hub_client.py | 10 ++++++-- src/googleclouddebugger/labels.py | 2 ++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/googleclouddebugger/application_info.py b/src/googleclouddebugger/application_info.py index f054996..9909f37 100644 --- a/src/googleclouddebugger/application_info.py +++ b/src/googleclouddebugger/application_info.py @@ -20,12 +20,17 @@ import enum import os +import requests # These environment variables will be set automatically by cloud functions # depending on the runtime. If one of these values is set, we can infer that # the current environment is GCF. Reference: # https://cloud.google.com/functions/docs/env-var#runtime_environment_variables_set_automatically -_GCF_ENV_VARIABLES = ['FUNCTION_NAME', 'FUNCTION_TARGET'] +_GCF_EXISTENCE_ENV_VARIABLES = ['FUNCTION_NAME', 'FUNCTION_TARGET'] +_GCF_REGION_ENV_VARIABLE = 'FUNCTION_REGION' + +_GCP_METADATA_REGION_URL = 'http://metadata/computeMetadata/v1/instance/region' +_GCP_METADATA_HEADER = {'Metadata-Flavor': 'Google'} class PlatformType(enum.Enum): @@ -43,9 +48,28 @@ def GetPlatform(): """Returns PlatformType for the current application.""" # Check if it's a cloud function. - for name in _GCF_ENV_VARIABLES: + for name in _GCF_EXISTENCE_ENV_VARIABLES: if name in os.environ: return PlatformType.CLOUD_FUNCTION # If we weren't able to identify the platform, fall back to default value. return PlatformType.DEFAULT + + +def GetRegion(): + """Returns region of the current application.""" + + # If it's running cloud function with an old runtime. + if _GCF_REGION_ENV_VARIABLE in os.environ: + return os.environ.get(_GCF_REGION_ENV_VARIABLE) + + # Otherwise try fetching it from the metadata server. + try: + response = requests.get(_GCP_METADATA_REGION_URL, + headers=_GCP_METADATA_HEADER) + response.raise_for_status() + # Example of response text: projects/id/regions/us-central1. So we strip + # everything before the last /. + return response.text.split('/')[-1] + except requests.exceptions.RequestException: + return None diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 8e89b04..6214479 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -201,8 +201,14 @@ def InitializeDebuggeeLabels(self, flags): if name in _DEBUGGEE_LABELS}) self._debuggee_labels[labels.Debuggee.PROJECT_ID] = self._project_id - self._debuggee_labels[ - labels.Debuggee.PLATFORM] = application_info.GetPlatform().value + + platform_enum = application_info.GetPlatform() + self._debuggee_labels[labels.Debuggee.PLATFORM] = platform_enum.value + + if platform_enum == application_info.PlatformType.CLOUD_FUNCTION: + region = application_info.GetRegion() + if region: + self._debuggee_labels[labels.Debuggee.REGION] = region def SetupAuth(self, project_id=None, diff --git a/src/googleclouddebugger/labels.py b/src/googleclouddebugger/labels.py index 041b50e..d22129a 100644 --- a/src/googleclouddebugger/labels.py +++ b/src/googleclouddebugger/labels.py @@ -35,6 +35,7 @@ class Debuggee(object): VERSION = 'version' MINOR_VERSION = 'minorversion' PLATFORM = 'platform' + REGION = 'region' SET_ALL = frozenset([ 'domain', @@ -43,4 +44,5 @@ class Debuggee(object): 'version', 'minorversion', 'platform', + 'region', ]) From 7747becebe21d4bb356e27cf6e919308bde10da4 Mon Sep 17 00:00:00 2001 From: Mahmoud Taha Date: Tue, 20 Jul 2021 19:06:07 +0000 Subject: [PATCH 196/241] Add dependency on enum34 for Python 2.7 agent PiperOrigin-RevId: 385640592 Change-Id: Iebac8549b91a588a6ca12df7ebaf0fde3d7317d1 --- src/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/setup.py b/src/setup.py index 97b9d3d..c6a1c4d 100644 --- a/src/setup.py +++ b/src/setup.py @@ -107,6 +107,7 @@ def ReadConfig(section, value, default): author='Google Inc.', version=version, install_requires=[ + 'enum34; python_version < "3.4"', 'google-api-python-client==1.8.4; python_version < "3.0"', 'google-api-python-client; python_version > "3.0"', 'google-auth==1.8.2; python_version < "3.0"', From 54b9ce8e315f523f8f39c15bb716397ddc86e16d Mon Sep 17 00:00:00 2001 From: Mahmoud Taha Date: Tue, 20 Jul 2021 19:06:12 +0000 Subject: [PATCH 197/241] Release v2.18 PiperOrigin-RevId: 385793574 Change-Id: I22824e3a202cab07e98b6e30bc33479c7641b22e --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index d70e9d2..cb89582 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.17' +__version__ = '2.18' From 21c1f672a9d81c3f311853261cc4034559123c23 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 13 Jun 2022 13:53:16 -0400 Subject: [PATCH 198/241] Add support to build release candidates (#39) * Remove Python 2.7 support from setup.py * Add a script to build wheels for all supported Python versions * Patch a bugfix for create/activate breakpoint and add related tests. * Add Python unit tests * Disable tests that don't run on all supported versions of Python --- .gitignore | 8 +- build-dist.sh | 4 + requirements.txt | 5 + requirements_dev.txt | 3 + src/build-wheels.sh | 80 + src/build.sh | 2 +- .../bytecode_breakpoint.cc | 56 +- src/googleclouddebugger/bytecode_breakpoint.h | 81 +- src/googleclouddebugger/native_module.cc | 45 +- src/googleclouddebugger/python_breakpoint.py | 4 +- src/setup.py | 18 +- tests/application_info_test.py | 72 + tests/backoff_test.py | 35 + tests/breakpoints_manager_test.py | 200 +++ tests/capture_collector_test.py | 1462 +++++++++++++++++ tests/error_data_visibility_policy_test.py | 17 + tests/gcp_hub_client_test.py | 495 ++++++ tests/glob_data_visibility_policy_test.py | 39 + tests/imphook2_test.py | 521 ++++++ tests/integration_test_disabled.py | 596 +++++++ tests/integration_test_helper.py | 5 + tests/labels_test.py | 29 + tests/module_explorer_test_disabled.py | 319 ++++ tests/module_search2_test.py | 127 ++ tests/module_utils2_test.py | 177 ++ tests/native_module_test.py | 302 ++++ tests/python_breakpoint_test_disabled.py | 607 +++++++ tests/python_test_util.py | 186 +++ tests/uniquifier_computer_test.py | 125 ++ ...yaml_data_visibility_config_reader_test.py | 117 ++ 30 files changed, 5692 insertions(+), 45 deletions(-) create mode 100755 build-dist.sh create mode 100644 requirements.txt create mode 100644 requirements_dev.txt create mode 100755 src/build-wheels.sh create mode 100644 tests/application_info_test.py create mode 100644 tests/backoff_test.py create mode 100644 tests/breakpoints_manager_test.py create mode 100644 tests/capture_collector_test.py create mode 100644 tests/error_data_visibility_policy_test.py create mode 100644 tests/gcp_hub_client_test.py create mode 100644 tests/glob_data_visibility_policy_test.py create mode 100644 tests/imphook2_test.py create mode 100644 tests/integration_test_disabled.py create mode 100644 tests/integration_test_helper.py create mode 100644 tests/labels_test.py create mode 100644 tests/module_explorer_test_disabled.py create mode 100644 tests/module_search2_test.py create mode 100644 tests/module_utils2_test.py create mode 100644 tests/native_module_test.py create mode 100644 tests/python_breakpoint_test_disabled.py create mode 100644 tests/python_test_util.py create mode 100644 tests/uniquifier_computer_test.py create mode 100644 tests/yaml_data_visibility_config_reader_test.py diff --git a/.gitignore b/.gitignore index 139597f..7064ec3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ - - +/dist/ +/src/build/ +/src/dist/ +/src/setup.cfg +__pycache__/ +*.egg-info/ diff --git a/build-dist.sh b/build-dist.sh new file mode 100755 index 0000000..b0fc6d3 --- /dev/null +++ b/build-dist.sh @@ -0,0 +1,4 @@ +DOCKER_IMAGE='quay.io/pypa/manylinux2010_x86_64' + +docker pull "$DOCKER_IMAGE" +docker container run -t --rm -v "$(pwd)":/io "$DOCKER_IMAGE" /io/src/build-wheels.sh diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5bb7131 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +six +google-auth-httplib2 +google-api-python-client +google-api-core +pyyaml diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..14662f3 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,3 @@ +-r requirements.txt +absl-py +pytest diff --git a/src/build-wheels.sh b/src/build-wheels.sh new file mode 100755 index 0000000..8ad6c29 --- /dev/null +++ b/src/build-wheels.sh @@ -0,0 +1,80 @@ +#!/bin/bash -e + +GFLAGS_URL=https://github.com/gflags/gflags/archive/v2.1.2.tar.gz +GLOG_URL=https://github.com/google/glog/archive/v0.3.4.tar.gz + +SUPPORTED_VERSIONS=(cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39) + +ROOT=$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null; /bin/pwd -P) + +# Parallelize the build over N threads where N is the number of cores * 1.5. +PARALLEL_BUILD_OPTION="-j $(($(nproc 2> /dev/null || echo 4)*3/2))" + +# Clean up any previous build files. +rm -rf ${ROOT}/build ${ROOT}/dist ${ROOT}/setup.cfg + +# Create directory for third-party libraries. +mkdir -p ${ROOT}/build/third_party + +# Build and install gflags to build/third_party. +pushd ${ROOT}/build/third_party +curl -Lk ${GFLAGS_URL} -o gflags.tar.gz +tar xzvf gflags.tar.gz +cd gflags-* +mkdir build +cd build +cmake -DCMAKE_CXX_FLAGS=-fpic \ + -DGFLAGS_NAMESPACE=google \ + -DCMAKE_INSTALL_PREFIX:PATH=${ROOT}/build/third_party \ + .. +make ${PARALLEL_BUILD_OPTION} +make install +popd + +# Build and install glog to build/third_party. +pushd ${ROOT}/build/third_party +curl -L ${GLOG_URL} -o glog.tar.gz +tar xzvf glog.tar.gz +cd glog-* +./configure --with-pic \ + --prefix=${ROOT}/build/third_party \ + --with-gflags=${ROOT}/build/third_party +make ${PARALLEL_BUILD_OPTION} +make install +popd + +# Extract build version from version.py +grep "^ *__version__ *=" "/io/src/googleclouddebugger/version.py" | grep -Eo "[0-9.]+" > "version.txt" +AGENT_VERSION=$(cat "version.txt") +echo "Building distribution packages for python agent version ${AGENT_VERSION}" + +# Create setup.cfg file and point to the third_party libraries we just build. +echo "[global] +verbose=1 + +[build_ext] +include_dirs=${ROOT}/build/third_party/include +library_dirs=${ROOT}/build/third_party/lib" > ${ROOT}/setup.cfg + +# Build the Python Cloud Debugger agent. +pushd ${ROOT} + +for PY_VERSION in ${SUPPORTED_VERSIONS[@]}; do + echo "Building the ${PY_VERSION} agent" + "/opt/python/${PY_VERSION}/bin/pip" install -r /io/requirements_dev.txt + "/opt/python/${PY_VERSION}/bin/pip" wheel /io/src --no-deps -w /tmp/dist/ + PACKAGE_NAME="google_python_cloud_debugger-${AGENT_VERSION}" + WHL_FILENAME="${PACKAGE_NAME}-${PY_VERSION}-linux_x86_64.whl" + auditwheel repair "/tmp/dist/${WHL_FILENAME}" -w /io/dist/ + + echo "Running tests" + "/opt/python/${PY_VERSION}/bin/pip" install google-python-cloud-debugger --no-index -f /io/dist + "/opt/python/${PY_VERSION}/bin/pytest" /io/tests +done + +popd + +# Clean up temporary directories. +rm -rf ${ROOT}/build ${ROOT}/setup.cfg +echo "Build artifacts are in the dist directory" + diff --git a/src/build.sh b/src/build.sh index 7c86c71..19837d3 100755 --- a/src/build.sh +++ b/src/build.sh @@ -85,6 +85,6 @@ library_dirs=${ROOT}/build/third_party/lib" > ${ROOT}/setup.cfg # Build the Python Cloud Debugger agent. pushd ${ROOT} # Use custom python command if variable is set -"${PYTHON:-python}" setup.py bdist_egg +"${PYTHON:-python3}" setup.py bdist_wheel popd diff --git a/src/googleclouddebugger/bytecode_breakpoint.cc b/src/googleclouddebugger/bytecode_breakpoint.cc index 40939ee..8b782d7 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.cc +++ b/src/googleclouddebugger/bytecode_breakpoint.cc @@ -66,7 +66,7 @@ void BytecodeBreakpoint::Detach() { } -int BytecodeBreakpoint::SetBreakpoint( +int BytecodeBreakpoint::CreateBreakpoint( PyCodeObject* code_object, int line, std::function hit_callback, @@ -102,6 +102,7 @@ int BytecodeBreakpoint::SetBreakpoint( breakpoint->hit_callable = PythonCallback::Wrap(hit_callback); breakpoint->error_callback = error_callback; breakpoint->cookie = cookie; + breakpoint->status = BreakpointStatus::kInactive; code_object_breakpoints->breakpoints.insert( std::make_pair(breakpoint->offset, breakpoint.get())); @@ -109,15 +110,44 @@ int BytecodeBreakpoint::SetBreakpoint( DCHECK(cookie_map_[cookie] == nullptr); cookie_map_[cookie] = breakpoint.release(); - PatchCodeObject(code_object_breakpoints); - return cookie; } +void BytecodeBreakpoint::ActivateBreakpoint(int cookie) { + if (cookie == -1) return; // no-op if invalid cookie. + + auto it_breakpoint = cookie_map_.find(cookie); + if (it_breakpoint == cookie_map_.end()) { + LOG(WARNING) << "Trying to activate a breakpoint with an unknown cookie: " + << cookie; + return; // No breakpoint with this cookie. + } + + auto it_code = patches_.find(it_breakpoint->second->code_object); + if (it_code != patches_.end()) { + CodeObjectBreakpoints* code = it_code->second; + // Ensure that there is a new breakpoint that was added. + if (it_breakpoint->second->status == BreakpointStatus::kInactive) { + // Set breakpoint to active. + it_breakpoint->second->status = BreakpointStatus::kActive; + // Patch code. + PatchCodeObject(code); + } else { + LOG(WARNING) << "Breakpoint with cookie: " << cookie + << " has already been activated"; + } + } else { + LOG(DFATAL) << "Missing code object"; + } +} void BytecodeBreakpoint::ClearBreakpoint(int cookie) { + if (cookie == -1) return; // no-op if invalid cookie + auto it_breakpoint = cookie_map_.find(cookie); if (it_breakpoint == cookie_map_.end()) { + LOG(WARNING) << "Trying to clear a breakpoint with an unknown cookie: " + << cookie; return; // No breakpoint with this cookie. } @@ -141,6 +171,9 @@ void BytecodeBreakpoint::ClearBreakpoint(int cookie) { DCHECK_EQ(1, erase_count); + // Set breakpoint as done, as it was removed from code->breakpoints map. + it_breakpoint->second->status = BreakpointStatus::kDone; + PatchCodeObject(code); if (code->breakpoints.empty() && code->zombie_refs.empty()) { @@ -148,13 +181,22 @@ void BytecodeBreakpoint::ClearBreakpoint(int cookie) { patches_.erase(it_code); } } else { - DCHECK(false) << "Missing code object"; + LOG(DFATAL) << "Missing code object"; } delete it_breakpoint->second; cookie_map_.erase(it_breakpoint); } +BreakpointStatus BytecodeBreakpoint::GetBreakpointStatus(int cookie) { + auto it_breakpoint = cookie_map_.find(cookie); + if (it_breakpoint == cookie_map_.end()) { + // No breakpoint with this cookie. + return BreakpointStatus::kUnknown; + } + + return it_breakpoint->second->status; +} BytecodeBreakpoint::CodeObjectBreakpoints* BytecodeBreakpoint::PreparePatchCodeObject( @@ -254,6 +296,9 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { for (auto it_entry = code->breakpoints.begin(); it_entry != code->breakpoints.end(); ++it_entry, ++const_index) { + // Skip breakpoint if it still hasn't been activated. + if (it_entry->second->status == BreakpointStatus::kInactive) continue; + int offset = it_entry->first; bool offset_found = true; const Breakpoint& breakpoint = *it_entry->second; @@ -287,6 +332,9 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { LOG(WARNING) << "Failed to insert bytecode for breakpoint " << breakpoint.cookie << " at line " << breakpoint.line; errors.push_back(breakpoint.error_callback); + it_entry->second->status = BreakpointStatus::kError; + } else { + it_entry->second->status = BreakpointStatus::kActive; } } diff --git a/src/googleclouddebugger/bytecode_breakpoint.h b/src/googleclouddebugger/bytecode_breakpoint.h index f7ecccf..057766f 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.h +++ b/src/googleclouddebugger/bytecode_breakpoint.h @@ -27,6 +27,43 @@ namespace devtools { namespace cdbg { +// Enum representing the status of a breakpoint. State tracking is helpful +// for testing and debugging the bytecode breakpoints. +// ======================================================================= +// State transition map: +// +// (start) kUnknown +// |- [CreateBreakpoint] +// | +// | +// | [ActivateBreakpoint] [PatchCodeObject] +// v | | +// kInactive ----> kActive <---> kError +// | | | +// |-------| | |-------| +// | | | +// |- |- |- [ClearBreakpoint] +// v v v +// kDone +// +// ======================================================================= +enum class BreakpointStatus { + // Unknown status for the breakpoint + kUnknown = 0, + + // Breakpoint is created and is patched in the bytecode. + kActive, + + // Breakpoint is created but is currently not patched in the bytecode. + kInactive, + + // Breakpoint has been cleared. + kDone, + + // Breakpoint is created but failed to be activated (patched in the bytecode). + kError +}; + // Sets breakpoints in Python code with zero runtime overhead. // BytecodeBreakpoint rewrites Python bytecode to insert a breakpoint. The // implementation is specific to CPython 2.7. @@ -41,21 +78,36 @@ class BytecodeBreakpoint { // Clears all the set breakpoints. void Detach(); - // Sets a new breakpoint in the specified code object. More than one - // breakpoint can be set at the same source location. When the breakpoint - // hits, the "callback" parameter is invoked. Every time this class fails to - // install the breakpoint, "error_callback" is invoked. Returns cookie used - // to clear the breakpoint. - int SetBreakpoint( - PyCodeObject* code_object, - int line, - std::function hit_callback, - std::function error_callback); - - // Removes a previously set breakpoint. If the cookie is invalid, this - // function does nothing. + // Creates a new breakpoint in the specified code object. More than one + // breakpoint can be created at the same source location. When the breakpoint + // hits, the "callback" parameter is invoked. Every time this method fails to + // create the breakpoint, "error_callback" is invoked and a cookie value of + // -1 is returned. If it succeeds in creating the breakpoint, returns the + // unique cookie used to activate and clear the breakpoint. Note this method + // only creates the breakpoint, to activate it you must call + // "ActivateBreakpoint". + int CreateBreakpoint(PyCodeObject* code_object, int line, + std::function hit_callback, + std::function error_callback); + + // Activates a previously created breakpoint. If it fails to set any + // breakpoint, the error callback will be invoked. This method is kept + // separate from "CreateBreakpoint" to ensure that the cookie is available + // before the "error_callback" is invoked. Calling this method with a cookie + // value of -1 is a no-op. Note that any breakpoints in the same function that + // previously failed to activate will retry to activate during this call. + // TODO: Provide a method "ActivateAllBreakpoints" to optimize + // the code and patch the code once, instead of multiple times. + void ActivateBreakpoint(int cookie); + + // Removes a previously set breakpoint. Calling this method with a cookie + // value of -1 is a no-op. Note that any breakpoints in the same function that + // previously failed to activate will retry to activate during this call. void ClearBreakpoint(int cookie); + // Get the status of a breakpoint. + BreakpointStatus GetBreakpointStatus(int cookie); + private: // Information about the breakpoint. struct Breakpoint { @@ -77,6 +129,9 @@ class BytecodeBreakpoint { // Breakpoint ID used to clear the breakpoint. int cookie; + + // Status of the breakpoint. + BreakpointStatus status; }; // Set of breakpoints in a particular code object and original data of diff --git a/src/googleclouddebugger/native_module.cc b/src/googleclouddebugger/native_module.cc index 4a66c4f..60a9a8a 100644 --- a/src/googleclouddebugger/native_module.cc +++ b/src/googleclouddebugger/native_module.cc @@ -176,7 +176,7 @@ static PyObject* LogError(PyObject* self, PyObject* py_args) { } -// Sets a new breakpoint in Python code. The breakpoint may have an optional +// Creates a new breakpoint in Python code. The breakpoint may have an optional // condition to evaluate. When the breakpoint hits (and the condition matches) // a callable object will be invoked from that thread. // @@ -196,7 +196,8 @@ static PyObject* LogError(PyObject* self, PyObject* py_args) { // Returns: // Integer cookie identifying this breakpoint. It needs to be specified when // clearing the breakpoint. -static PyObject* SetConditionalBreakpoint(PyObject* self, PyObject* py_args) { +static PyObject* CreateConditionalBreakpoint(PyObject* self, + PyObject* py_args) { PyCodeObject* code_object = nullptr; int line = -1; PyCodeObject* condition = nullptr; @@ -238,7 +239,7 @@ static PyObject* SetConditionalBreakpoint(PyObject* self, PyObject* py_args) { int cookie = -1; - cookie = g_bytecode_breakpoint.SetBreakpoint( + cookie = g_bytecode_breakpoint.CreateBreakpoint( code_object, line, std::bind( @@ -255,11 +256,11 @@ static PyObject* SetConditionalBreakpoint(PyObject* self, PyObject* py_args) { } -// Clears the breakpoint previously set by "SetConditionalBreakpoint". Must be -// called exactly once per each call to "SetConditionalBreakpoint". +// Clears a breakpoint previously created by "CreateConditionalBreakpoint". Must +// be called exactly once per each call to "CreateConditionalBreakpoint". // // Args: -// cookie: breakpoint identifier returned by "SetConditionalBreakpoint". +// cookie: breakpoint identifier returned by "CreateConditionalBreakpoint". static PyObject* ClearConditionalBreakpoint(PyObject* self, PyObject* py_args) { int cookie = -1; if (!PyArg_ParseTuple(py_args, "i", &cookie)) { @@ -271,6 +272,24 @@ static PyObject* ClearConditionalBreakpoint(PyObject* self, PyObject* py_args) { Py_RETURN_NONE; } +// Activates a previously created breakpoint by "CreateConditionalBreakpoint" +// and that haven't been cleared yet using "ClearConditionalBreakpoint". +// TODO: Optimize breakpoint activation by having one method +// "ActivateAllConditionalBreakpoints" for all previously created breakpoints. +// +// Args: +// cookie: breakpoint identifier returned by "CreateConditionalBreakpoint". +static PyObject* ActivateConditionalBreakpoint(PyObject* self, + PyObject* py_args) { + int cookie = -1; + if (!PyArg_ParseTuple(py_args, "i", &cookie)) { + return nullptr; + } + + g_bytecode_breakpoint.ActivateBreakpoint(cookie); + + Py_RETURN_NONE; +} // Invokes a Python callable object with immutability tracer. // @@ -369,16 +388,22 @@ static PyMethodDef g_module_functions[] = { "ERROR level logging from Python code." }, { - "SetConditionalBreakpoint", - SetConditionalBreakpoint, + "CreateConditionalBreakpoint", + CreateConditionalBreakpoint, + METH_VARARGS, + "Creates a new breakpoint in Python code." + }, + { + "ActivateConditionalBreakpoint", + ActivateConditionalBreakpoint, METH_VARARGS, - "Sets a new breakpoint in Python code." + "Activates previously created breakpoint in Python code." }, { "ClearConditionalBreakpoint", ClearConditionalBreakpoint, METH_VARARGS, - "Clears previously set breakpoint in Python code." + "Clears previously created breakpoint in Python code." }, { "CallImmutable", diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index a3ed6bb..6a96a34 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -334,12 +334,14 @@ def _ActivateBreakpoint(self, module): native.LogInfo('Creating new Python breakpoint %s in %s, line %d' % ( self.GetBreakpointId(), codeobj, line)) - self._cookie = native.SetConditionalBreakpoint( + self._cookie = native.CreateConditionalBreakpoint( codeobj, line, condition, self._BreakpointEvent) + native.ActivateConditionalBreakpoint(self._cookie) + def _RemoveImportHook(self): """Removes the import hook if one was installed.""" if self._import_hook_cleanup: diff --git a/src/setup.py b/src/setup.py index c6a1c4d..64a5d46 100644 --- a/src/setup.py +++ b/src/setup.py @@ -14,12 +14,7 @@ """Python Cloud Debugger build and packaging script.""" -# pylint: disable=g-statement-before-imports,g-import-not-at-top -try: - from ConfigParser import ConfigParser # Python 2 -except ImportError: - from configparser import ConfigParser # Python 3 -# pylint: enable=g-statement-before-imports,g-import-not-at-top +from configparser import ConfigParser from glob import glob import os import re @@ -107,14 +102,10 @@ def ReadConfig(section, value, default): author='Google Inc.', version=version, install_requires=[ - 'enum34; python_version < "3.4"', - 'google-api-python-client==1.8.4; python_version < "3.0"', - 'google-api-python-client; python_version > "3.0"', - 'google-auth==1.8.2; python_version < "3.0"', - 'google-auth>=1.0.0; python_version > "3.0"', + 'google-api-python-client', + 'google-auth>=1.0.0', 'google-auth-httplib2', - 'google-api-core==1.15.0; python_version < "3.0"', - 'google-api-core; python_version > "3.0"', + 'google-api-core', 'pyyaml', 'six>=1.10.0', ], @@ -123,7 +114,6 @@ def ReadConfig(section, value, default): license='Apache License, Version 2.0', keywords='google cloud debugger', classifiers=[ - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', diff --git a/tests/application_info_test.py b/tests/application_info_test.py new file mode 100644 index 0000000..d247a25 --- /dev/null +++ b/tests/application_info_test.py @@ -0,0 +1,72 @@ +"""Tests for application_info.""" + +import os +from unittest import mock + +import requests + +from googleclouddebugger import application_info +from absl.testing import absltest + + +class ApplicationInfoTest(absltest.TestCase): + + def test_get_platform_default(self): + """Returns default platform when no platform is detected.""" + self.assertEqual(application_info.PlatformType.DEFAULT, + application_info.GetPlatform()) + + def test_get_platform_gcf_name(self): + """Returns cloud_function when the FUNCTION_NAME env variable is set.""" + try: + os.environ['FUNCTION_NAME'] = 'function-name' + self.assertEqual(application_info.PlatformType.CLOUD_FUNCTION, + application_info.GetPlatform()) + finally: + del os.environ['FUNCTION_NAME'] + + def test_get_platform_gcf_target(self): + """Returns cloud_function when the FUNCTION_TARGET env variable is set.""" + try: + os.environ['FUNCTION_TARGET'] = 'function-target' + self.assertEqual(application_info.PlatformType.CLOUD_FUNCTION, + application_info.GetPlatform()) + finally: + del os.environ['FUNCTION_TARGET'] + + def test_get_region_none(self): + """Returns None when no region is detected.""" + self.assertIsNone(application_info.GetRegion()) + + def test_get_region_gcf(self): + """Returns correct region when the FUNCTION_REGION env variable is set.""" + try: + os.environ['FUNCTION_REGION'] = 'function-region' + self.assertEqual('function-region', + application_info.GetRegion()) + finally: + del os.environ['FUNCTION_REGION'] + + @mock.patch('requests.get') + def test_get_region_metadata_server(self, mock_requests_get): + """Returns correct region if found in metadata server.""" + success_response = mock.Mock(requests.Response) + success_response.status_code = 200 + success_response.text = 'a/b/function-region' + mock_requests_get.return_value = success_response + + self.assertEqual('function-region', application_info.GetRegion()) + + @mock.patch('requests.get') + def test_get_region_metadata_server_fail(self, mock_requests_get): + """Returns None if region not found in metadata server.""" + exception = requests.exceptions.HTTPError() + failed_response = mock.Mock(requests.Response) + failed_response.status_code = 400 + failed_response.raise_for_status.side_effect = exception + mock_requests_get.return_value = failed_response + + self.assertIsNone(application_info.GetRegion()) + +if __name__ == '__main__': + absltest.main() diff --git a/tests/backoff_test.py b/tests/backoff_test.py new file mode 100644 index 0000000..262976c --- /dev/null +++ b/tests/backoff_test.py @@ -0,0 +1,35 @@ +"""Unit test for backoff module.""" + +from absl.testing import absltest + +from googleclouddebugger import backoff + + +class BackoffTest(absltest.TestCase): + """Unit test for backoff module.""" + + def setUp(self): + self._backoff = backoff.Backoff(10, 100, 1.5) + + def testInitial(self): + self.assertEqual(10, self._backoff.Failed()) + + def testIncrease(self): + self._backoff.Failed() + self.assertEqual(15, self._backoff.Failed()) + + def testMaximum(self): + for _ in range(100): + self._backoff.Failed() + + self.assertEqual(100, self._backoff.Failed()) + + def testResetOnSuccess(self): + for _ in range(4): + self._backoff.Failed() + self._backoff.Succeeded() + self.assertEqual(10, self._backoff.Failed()) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/breakpoints_manager_test.py b/tests/breakpoints_manager_test.py new file mode 100644 index 0000000..36ec946 --- /dev/null +++ b/tests/breakpoints_manager_test.py @@ -0,0 +1,200 @@ +"""Unit test for breakpoints_manager module.""" + +from datetime import datetime +from datetime import timedelta +from unittest import mock + +from absl.testing import absltest + +from googleclouddebugger import breakpoints_manager + + +class BreakpointsManagerTest(absltest.TestCase): + """Unit test for breakpoints_manager module.""" + + def setUp(self): + self._breakpoints_manager = breakpoints_manager.BreakpointsManager( + self, None) + + path = 'googleclouddebugger.breakpoints_manager.' + breakpoint_class = path + 'python_breakpoint.PythonBreakpoint' + + patcher = mock.patch(breakpoint_class) + self._mock_breakpoint = patcher.start() + self.addCleanup(patcher.stop) + + def testEmpty(self): + self.assertEmpty(self._breakpoints_manager._active) + + def testSetSingle(self): + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self._mock_breakpoint.assert_has_calls([ + mock.call({'id': 'ID1'}, self, self._breakpoints_manager, None)]) + self.assertLen(self._breakpoints_manager._active, 1) + + def testSetDouble(self): + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self._mock_breakpoint.assert_has_calls([ + mock.call({'id': 'ID1'}, self, self._breakpoints_manager, None)]) + self.assertLen(self._breakpoints_manager._active, 1) + + self._breakpoints_manager.SetActiveBreakpoints( + [{'id': 'ID1'}, {'id': 'ID2'}]) + self._mock_breakpoint.assert_has_calls([ + mock.call({'id': 'ID1'}, self, self._breakpoints_manager, None), + mock.call({'id': 'ID2'}, self, self._breakpoints_manager, None)]) + self.assertLen(self._breakpoints_manager._active, 2) + + def testSetRepeated(self): + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self.assertEqual(1, self._mock_breakpoint.call_count) + + def testClear(self): + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self._breakpoints_manager.SetActiveBreakpoints([]) + self.assertEqual(1, self._mock_breakpoint.return_value.Clear.call_count) + self.assertEmpty(self._breakpoints_manager._active) + + def testCompleteInvalidId(self): + self._breakpoints_manager.CompleteBreakpoint('ID_INVALID') + + def testComplete(self): + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self._breakpoints_manager.CompleteBreakpoint('ID1') + self.assertEqual(1, self._mock_breakpoint.return_value.Clear.call_count) + + def testSetCompleted(self): + self._breakpoints_manager.CompleteBreakpoint('ID1') + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self.assertEqual(0, self._mock_breakpoint.call_count) + + def testCompletedCleanup(self): + self._breakpoints_manager.CompleteBreakpoint('ID1') + self._breakpoints_manager.SetActiveBreakpoints([]) + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self.assertEqual(1, self._mock_breakpoint.call_count) + + def testMultipleSetDelete(self): + self._breakpoints_manager.SetActiveBreakpoints( + [{'id': 'ID1'}, {'id': 'ID2'}, {'id': 'ID3'}, {'id': 'ID4'}]) + self.assertLen(self._breakpoints_manager._active, 4) + + self._breakpoints_manager.SetActiveBreakpoints( + [{'id': 'ID1'}, {'id': 'ID2'}, {'id': 'ID3'}, {'id': 'ID4'}]) + self.assertLen(self._breakpoints_manager._active, 4) + + self._breakpoints_manager.SetActiveBreakpoints([]) + self.assertEmpty(self._breakpoints_manager._active) + + def testCombination(self): + self._breakpoints_manager.SetActiveBreakpoints( + [{'id': 'ID1'}, {'id': 'ID2'}, {'id': 'ID3'}]) + self.assertLen(self._breakpoints_manager._active, 3) + + self._breakpoints_manager.CompleteBreakpoint('ID2') + self.assertEqual(1, self._mock_breakpoint.return_value.Clear.call_count) + self.assertLen(self._breakpoints_manager._active, 2) + + self._breakpoints_manager.SetActiveBreakpoints( + [{'id': 'ID2'}, {'id': 'ID3'}, {'id': 'ID4'}]) + self.assertEqual(2, self._mock_breakpoint.return_value.Clear.call_count) + self.assertLen(self._breakpoints_manager._active, 2) + + self._breakpoints_manager.CompleteBreakpoint('ID2') + self.assertEqual(2, self._mock_breakpoint.return_value.Clear.call_count) + self.assertLen(self._breakpoints_manager._active, 2) + + self._breakpoints_manager.SetActiveBreakpoints([]) + self.assertEqual(4, self._mock_breakpoint.return_value.Clear.call_count) + self.assertEmpty(self._breakpoints_manager._active) + + def testCheckExpirationNoBreakpoints(self): + self._breakpoints_manager.CheckBreakpointsExpiration() + + def testCheckNotExpired(self): + self._breakpoints_manager.SetActiveBreakpoints( + [{'id': 'ID1'}, {'id': 'ID2'}]) + self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( + datetime.utcnow() + timedelta(minutes=1)) + self._breakpoints_manager.CheckBreakpointsExpiration() + self.assertEqual( + 0, + self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + + def testCheckExpired(self): + self._breakpoints_manager.SetActiveBreakpoints( + [{'id': 'ID1'}, {'id': 'ID2'}]) + self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( + datetime.utcnow() - timedelta(minutes=1)) + self._breakpoints_manager.CheckBreakpointsExpiration() + self.assertEqual( + 2, + self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + + def testCheckExpirationReset(self): + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( + datetime.utcnow() + timedelta(minutes=1)) + self._breakpoints_manager.CheckBreakpointsExpiration() + self.assertEqual( + 0, + self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + + self._breakpoints_manager.SetActiveBreakpoints( + [{'id': 'ID1'}, {'id': 'ID2'}]) + self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( + datetime.utcnow() - timedelta(minutes=1)) + self._breakpoints_manager.CheckBreakpointsExpiration() + self.assertEqual( + 2, + self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + + def testCheckExpirationCacheNegative(self): + base = datetime(2015, 1, 1) + + with mock.patch.object(breakpoints_manager.BreakpointsManager, + 'GetCurrentTime') as mock_time: + mock_time.return_value = base + + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( + base + timedelta(minutes=1)) + + self._breakpoints_manager.CheckBreakpointsExpiration() + self.assertEqual( + 0, + self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + + # The nearest expiration time is cached, so this should have no effect. + self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( + base - timedelta(minutes=1)) + self._breakpoints_manager.CheckBreakpointsExpiration() + self.assertEqual( + 0, + self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + + def testCheckExpirationCachePositive(self): + base = datetime(2015, 1, 1) + + with mock.patch.object(breakpoints_manager.BreakpointsManager, + 'GetCurrentTime') as mock_time: + self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) + self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( + base + timedelta(minutes=1)) + + mock_time.return_value = base + self._breakpoints_manager.CheckBreakpointsExpiration() + self.assertEqual( + 0, + self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + + mock_time.return_value = base + timedelta(minutes=2) + self._breakpoints_manager.CheckBreakpointsExpiration() + self.assertEqual( + 1, + self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/capture_collector_test.py b/tests/capture_collector_test.py new file mode 100644 index 0000000..935173f --- /dev/null +++ b/tests/capture_collector_test.py @@ -0,0 +1,1462 @@ +"""Unit test for capture_collector module.""" + +import copy +import datetime +import inspect +import logging +import os +import time +from unittest import mock + +import six + +from absl.testing import absltest + +from googleclouddebugger import capture_collector +from googleclouddebugger import labels + +LOGPOINT_PAUSE_MSG = ( + 'LOGPOINT: Logpoint is paused due to high log rate until log ' + 'quota is restored') + + +def CaptureCollectorWithDefaultLocation( + definition, data_visibility_policy=None): + """Makes a LogCollector with a default location. + + Args: + definition: the rest of the breakpoint definition + data_visibility_policy: optional visibility policy + + Returns: + A LogCollector + """ + definition['location'] = {'path': 'capture_collector_test.py', 'line': 10} + return capture_collector.CaptureCollector(definition, data_visibility_policy) + + +def LogCollectorWithDefaultLocation(definition): + """Makes a LogCollector with a default location. + + Args: + definition: the rest of the breakpoint definition + + Returns: + A LogCollector + """ + definition['location'] = {'path': 'capture_collector_test.py', 'line': 10} + return capture_collector.LogCollector(definition) + + +class CaptureCollectorTest(absltest.TestCase): + """Unit test for capture collector.""" + + def tearDown(self): + capture_collector.CaptureCollector.pretty_printers = [] + + def testCallStackUnlimitedFrames(self): + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.max_frames = 1000 + self._collector.Collect(inspect.currentframe()) + + self.assertGreater(len(self._collector.breakpoint['stackFrames']), 1) + self.assertLess(len(self._collector.breakpoint['stackFrames']), 100) + + def testCallStackLimitedFrames(self): + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.max_frames = 2 + self._collector.Collect(inspect.currentframe()) + + self.assertLen(self._collector.breakpoint['stackFrames'], 2) + + top_frame = self._collector.breakpoint['stackFrames'][0] + self.assertEqual('CaptureCollectorTest.testCallStackLimitedFrames', + top_frame['function']) + self.assertIn('capture_collector_test.py', top_frame['location']['path']) + self.assertGreater(top_frame['location']['line'], 1) + + frame_below = self._collector.breakpoint['stackFrames'][1] + frame_below_line = inspect.currentframe().f_back.f_lineno + self.assertEqual(frame_below_line, frame_below['location']['line']) + + def testCallStackLimitedExpandedFrames(self): + def CountLocals(frame): + return len(frame['arguments']) + len(frame['locals']) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.max_frames = 3 + self._collector.max_expand_frames = 2 + self._collector.Collect(inspect.currentframe()) + + frames = self._collector.breakpoint['stackFrames'] + self.assertLen(frames, 3) + self.assertGreater(CountLocals(frames[0]), 0) + self.assertGreater(CountLocals(frames[1]), 1) + self.assertEqual(0, CountLocals(frames[2])) + + def testSimpleArguments(self): + + def Method(unused_a, unused_b): + self._collector.Collect(inspect.currentframe()) + top_frame = self._collector.breakpoint['stackFrames'][0] + self.assertListEqual( + [{'name': 'unused_a', 'value': '158', 'type': 'int'}, + {'name': 'unused_b', 'value': "'hello'", 'type': 'str'}], + top_frame['arguments']) + self.assertEqual('Method', top_frame['function']) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + Method(158, 'hello') + + def testMethodWithFirstArgumentNamedSelf(self): + this = self + + def Method(self, unused_a, unused_b): # pylint: disable=unused-argument + this._collector.Collect(inspect.currentframe()) + top_frame = this._collector.breakpoint['stackFrames'][0] + this.assertListEqual( + [{'name': 'self', 'value': "'world'", 'type': 'str'}, + {'name': 'unused_a', 'value': '158', 'type': 'int'}, + {'name': 'unused_b', 'value': "'hello'", 'type': 'str'}], + top_frame['arguments']) + # This is the incorrect function name, but we are validating that no + # exceptions are thrown here. + this.assertEqual('str.Method', top_frame['function']) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + Method('world', 158, 'hello') + + def testMethodWithArgumentNamedSelf(self): + this = self + + def Method(unused_a, unused_b, self): # pylint: disable=unused-argument + this._collector.Collect(inspect.currentframe()) + top_frame = this._collector.breakpoint['stackFrames'][0] + this.assertListEqual( + [{'name': 'unused_a', 'value': '158', 'type': 'int'}, + {'name': 'unused_b', 'value': "'hello'", 'type': 'str'}, + {'name': 'self', 'value': "'world'", 'type': 'str'}], + top_frame['arguments']) + this.assertEqual('Method', top_frame['function']) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + Method(158, 'hello', 'world') + + def testClassMethod(self): + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + top_frame = self._collector.breakpoint['stackFrames'][0] + self.assertListEqual( + [{'name': 'self', 'varTableIndex': 1}], + top_frame['arguments']) + self.assertEqual('CaptureCollectorTest.testClassMethod', + top_frame['function']) + + def testClassMethodWithOptionalArguments(self): + + def Method(unused_a, unused_optional='notneeded'): + self._collector.Collect(inspect.currentframe()) + top_frame = self._collector.breakpoint['stackFrames'][0] + self.assertListEqual( + [{'name': 'unused_a', 'varTableIndex': 1}, + {'name': 'unused_optional', 'value': "'notneeded'", 'type': 'str'}], + top_frame['arguments']) + self.assertEqual('Method', top_frame['function']) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + Method(self) + + def testClassMethodWithPositionalArguments(self): + + def Method(*unused_pos): + self._collector.Collect(inspect.currentframe()) + top_frame = self._collector.breakpoint['stackFrames'][0] + self.assertListEqual( + [{'name': 'unused_pos', + 'type': 'tuple', + 'members': [{'name': '[0]', 'value': '1', 'type': 'int'}]}], + top_frame['arguments']) + self.assertEqual('Method', top_frame['function']) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + Method(1) + + def testClassMethodWithKeywords(self): + + def Method(**unused_kwd): + self._collector.Collect(inspect.currentframe()) + top_frame = self._collector.breakpoint['stackFrames'][0] + self.assertCountEqual( + [{'name': "'first'", 'value': '1', 'type': 'int'}, + {'name': "'second'", 'value': '2', 'type': 'int'}], + top_frame['arguments'][0]['members']) + self.assertEqual('Method', top_frame['function']) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + Method(first=1, second=2) + + def testNoLocalVariables(self): + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + top_frame = self._collector.breakpoint['stackFrames'][0] + self.assertEmpty(top_frame['locals']) + self.assertEqual('CaptureCollectorTest.testNoLocalVariables', + top_frame['function']) + + def testRuntimeError(self): + class BadDict(dict): + + def __init__(self, d): + d['foo'] = 'bar' + super(BadDict, self).__init__(d) + + def __getattribute__(self, attr): + raise RuntimeError('Bogus error') + + class BadType(object): + + def __init__(self): + self.__dict__ = BadDict(self.__dict__) + + unused_a = BadType() + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + var_a = self._Pack(self._LocalByName('unused_a')) + self.assertDictEqual( + { + 'name': 'unused_a', + 'status': { + 'isError': True, + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': 'Failed to capture variable: $0', + 'parameters': ['Bogus error'] + }, + } + }, var_a) + + def testBadDictionary(self): + class BadDict(dict): + + def items(self): + raise AttributeError('attribute error') + + class BadType(object): + + def __init__(self): + self.good = 1 + self.bad = BadDict() + + unused_a = BadType() + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + var_a = self._Pack(self._LocalByName('unused_a')) + members = var_a['members'] + self.assertLen(members, 2) + self.assertIn({'name': 'good', 'value': '1', 'type': 'int'}, members) + self.assertIn( + { + 'name': 'bad', + 'status': { + 'isError': True, + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': 'Failed to capture variable: $0', + 'parameters': ['attribute error'] + }, + } + }, members) + + def testLocalVariables(self): + unused_a = 8 + unused_b = True + unused_nothing = None + unused_s = 'hippo' + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + top_frame = self._collector.breakpoint['stackFrames'][0] + self.assertLen(top_frame['arguments'], 1) # just self. + self.assertCountEqual( + [{'name': 'unused_a', 'value': '8', 'type': 'int'}, + {'name': 'unused_b', 'value': 'True', 'type': 'bool'}, + {'name': 'unused_nothing', 'value': 'None'}, + {'name': 'unused_s', 'value': "'hippo'", 'type': 'str'}], + top_frame['locals']) + + def testLocalVariablesWithBlacklist(self): + unused_a = capture_collector.LineNoFilter() + unused_b = 5 + + # Side effect logic for the mock data visibility object + def IsDataVisible(name): + path_prefix = 'googleclouddebugger.capture_collector.' + if name == path_prefix + 'LineNoFilter': + return (False, 'data blocked') + return (True, None) + + mock_policy = mock.MagicMock() + mock_policy.IsDataVisible.side_effect = IsDataVisible + + self._collector = CaptureCollectorWithDefaultLocation( + {'id': 'BP_ID'}, + mock_policy) + self._collector.Collect(inspect.currentframe()) + top_frame = self._collector.breakpoint['stackFrames'][0] + # Should be blocked + self.assertIn( + { + 'name': 'unused_a', + 'status': { + 'description': {'format': 'data blocked'}, + 'refersTo': 'VARIABLE_NAME', + 'isError': True + } + }, + top_frame['locals']) + # Should not be blocked + self.assertIn( + { + 'name': 'unused_b', + 'value': '5', + 'type': 'int' + }, + top_frame['locals']) + + def testWatchedExpressionsBlacklisted(self): + class TestClass(object): + + def __init__(self): + self.a = 5 + + unused_a = TestClass() + + # Side effect logic for the mock data visibility object + def IsDataVisible(name): + if name == 'capture_collector_test.TestClass': + return (False, 'data blocked') + return (True, None) + mock_policy = mock.MagicMock() + mock_policy.IsDataVisible.side_effect = IsDataVisible + + self._collector = CaptureCollectorWithDefaultLocation( + { + 'id': 'BP_ID', + 'expressions': ['unused_a', 'unused_a.a'] + }, + mock_policy) + self._collector.Collect(inspect.currentframe()) + # Class should be blocked + self.assertIn( + { + 'name': 'unused_a', + 'status': { + 'description': {'format': 'data blocked'}, + 'refersTo': 'VARIABLE_NAME', + 'isError': True + } + }, + self._collector.breakpoint['evaluatedExpressions']) + # TODO: Explicit member SHOULD also be blocked but this is + # currently not implemented. After fixing the implementation, change + # the test below to assert that it's blocked too. + self.assertIn( + { + 'name': 'unused_a.a', + 'type': 'int', + 'value': '5' + }, + self._collector.breakpoint['evaluatedExpressions']) + + def testLocalsNonTopFrame(self): + + def Method(): + self._collector.Collect(inspect.currentframe()) + self.assertListEqual( + [{'name': 'self', 'varTableIndex': 1}], + self._collector.breakpoint['stackFrames'][1]['arguments']) + self.assertCountEqual( + [{'name': 'unused_a', 'value': '47', 'type': 'int'}, + {'name': 'Method', 'value': 'function Method'}], + self._collector.breakpoint['stackFrames'][1]['locals']) + + unused_a = 47 + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + Method() + + def testDictionaryMaxDepth(self): + d = {} + t = d + for _ in range(10): + t['inner'] = {} + t = t['inner'] + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.default_capture_limits.max_depth = 3 + self._collector.Collect(inspect.currentframe()) + self.assertDictEqual( + {'name': 'd', + 'type': 'dict', + 'members': [{'name': "'inner'", + 'type': 'dict', + 'members': [{'name': "'inner'", 'varTableIndex': 0}]}]}, + self._LocalByName('d')) + + def testVectorMaxDepth(self): + l = [] + t = l + for _ in range(10): + t.append([]) + t = t[0] + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.default_capture_limits.max_depth = 3 + self._collector.Collect(inspect.currentframe()) + self.assertDictEqual( + {'name': 'l', + 'type': 'list', + 'members': [{'name': '[0]', + 'type': 'list', + 'members': [{'name': '[0]', 'varTableIndex': 0}]}]}, + self._LocalByName('l')) + + def testStringTrimming(self): + unused_s = '123456789' + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.default_capture_limits.max_value_len = 8 + self._collector.Collect(inspect.currentframe()) + self.assertListEqual( + [{'name': 'unused_s', 'value': "'12345678...", 'type': 'str'}], + self._collector.breakpoint['stackFrames'][0]['locals']) + + def testBytearrayTrimming(self): + unused_bytes = bytearray(range(20)) + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.default_capture_limits.max_value_len = 20 + self._collector.Collect(inspect.currentframe()) + self.assertListEqual( + [{'name': 'unused_bytes', 'value': r"bytearray(b'\x00\x01\...", + 'type': 'bytearray'}], + self._collector.breakpoint['stackFrames'][0]['locals']) + + def testObject(self): + + class MyClass(object): + + def __init__(self): + self.a = 1 + self.b = 2 + + unused_my = MyClass() + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + var_index = self._LocalByName('unused_my')['varTableIndex'] + self.assertEqual( + __name__ + '.MyClass', + self._collector.breakpoint['variableTable'][var_index]['type']) + self.assertCountEqual( + [{'name': 'a', 'value': '1', 'type': 'int'}, + {'name': 'b', 'value': '2', 'type': 'int'}], + self._collector.breakpoint['variableTable'][var_index]['members']) + + def testBufferFullLocalRef(self): + + class MyClass(object): + + def __init__(self, data): + self.data = data + + def Method(): + unused_m1 = MyClass('1' * 10000) + unused_m2 = MyClass('2' * 10000) + unused_m3 = MyClass('3' * 10000) + unused_m4 = MyClass('4' * 10000) + unused_m5 = MyClass('5' * 10000) + unused_m6 = MyClass('6' * 10000) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.max_frames = 1 + self._collector.max_size = 48000 + self._collector.default_capture_limits.max_value_len = 10009 + self._collector.Collect(inspect.currentframe()) + + # Verify that 5 locals fit and 1 is out of buffer. + count = {True: 0, False: 0} # captured, not captured + for local in self._collector.breakpoint['stackFrames'][0]['locals']: + var_index = local['varTableIndex'] + self.assertLess(var_index, + len(self._collector.breakpoint['variableTable'])) + if local['name'].startswith('unused_m'): + count[var_index != 0] += 1 + self.assertDictEqual({True: 5, False: 1}, count) + + Method() + + def testBufferFullDictionaryRef(self): + + class MyClass(object): + + def __init__(self, data): + self.data = data + + def Method(): + unused_d1 = {'a': MyClass('1' * 10000)} + unused_d2 = {'b': MyClass('2' * 10000)} + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.max_frames = 1 + self._collector.max_size = 9000 + self._collector.default_capture_limits.max_value_len = 10009 + self._collector.Collect(inspect.currentframe()) + + # Verify that one of {d1,d2} could fit and the other didn't. + var_indexes = [self._LocalByName(n)['members'][0]['varTableIndex'] == 0 + for n in ['unused_d1', 'unused_d2']] + self.assertEqual(1, sum(var_indexes)) + + Method() + + def testClassCrossReference(self): + + class MyClass(object): + pass + + m1 = MyClass() + m2 = MyClass() + m1.other = m2 + m2.other = m1 + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + m1_var_index = self._LocalByName('m1')['varTableIndex'] + m2_var_index = self._LocalByName('m2')['varTableIndex'] + + var_table = self._collector.breakpoint['variableTable'] + self.assertDictEqual( + {'type': __name__ + '.MyClass', + 'members': [{'name': 'other', 'varTableIndex': m1_var_index}]}, + var_table[m2_var_index]) + self.assertDictEqual( + {'type': __name__ + '.MyClass', + 'members': [{'name': 'other', 'varTableIndex': m2_var_index}]}, + var_table[m1_var_index]) + + def testCaptureVector(self): + unused_my_list = [1, 2, 3, 4, 5] + unused_my_slice = unused_my_list[1:4] + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertDictEqual( + {'name': 'unused_my_list', + 'type': 'list', + 'members': [{'name': '[0]', 'value': '1', 'type': 'int'}, + {'name': '[1]', 'value': '2', 'type': 'int'}, + {'name': '[2]', 'value': '3', 'type': 'int'}, + {'name': '[3]', 'value': '4', 'type': 'int'}, + {'name': '[4]', 'value': '5', 'type': 'int'}]}, + self._LocalByName('unused_my_list')) + self.assertDictEqual( + {'name': 'unused_my_slice', + 'type': 'list', + 'members': [{'name': '[0]', 'value': '2', 'type': 'int'}, + {'name': '[1]', 'value': '3', 'type': 'int'}, + {'name': '[2]', 'value': '4', 'type': 'int'}]}, + self._LocalByName('unused_my_slice')) + + def testCaptureDictionary(self): + unused_my_dict = { + 'first': 1, + 3.14: 'pi', + (5, 6): 7, + frozenset([5, 6]): 'frozen', + 'vector': ['odin', 'dva', 'tri'], + 'inner': {1: 'one'}, + 'empty': {}} + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + frozenset_name = 'frozenset({5, 6})' if six.PY3 else 'frozenset([5, 6])' + self.assertCountEqual( + [{'name': "'first'", 'value': '1', 'type': 'int'}, + {'name': '3.14', 'value': "'pi'", 'type': 'str'}, + {'name': '(5, 6)', 'value': '7', 'type': 'int'}, + {'name': frozenset_name, 'value': "'frozen'", 'type': 'str'}, + {'name': "'vector'", + 'type': 'list', + 'members': [{'name': '[0]', 'value': "'odin'", 'type': 'str'}, + {'name': '[1]', 'value': "'dva'", 'type': 'str'}, + {'name': '[2]', 'value': "'tri'", 'type': 'str'}]}, + {'name': "'inner'", + 'type': 'dict', + 'members': [{'name': '1', 'value': "'one'", 'type': 'str'}]}, + {'name': "'empty'", + 'type': 'dict', + 'members': [ + {'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': {'format': 'Empty dictionary'}}}]}], + self._LocalByName('unused_my_dict')['members']) + + def testEscapeDictionaryKey(self): + unused_dict = {} + unused_dict[u'\xe0'] = u'\xe0' + unused_dict['\x88'] = '\x88' + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + unicode_type = 'str' if six.PY3 else 'unicode' + unicode_name = "'\xe0'" if six.PY3 else "u'\\xe0'" + unicode_value = "'\xe0'" if six.PY3 else "u'\\xe0'" + + self.assertCountEqual( + [{'type': 'str', 'name': "'\\x88'", 'value': "'\\x88'"}, + {'type': unicode_type, 'name': unicode_name, 'value': unicode_value}], + self._LocalByName('unused_dict')['members']) + + def testOversizedList(self): + unused_big_list = ['x'] * 10000 + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + members = self._LocalByName('unused_big_list')['members'] + + self.assertLen(members, 26) + self.assertDictEqual({'name': '[7]', 'value': "'x'", 'type': 'str'}, + members[7]) + self.assertDictEqual( + {'status': { + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': + ('Only first $0 items were captured. Use in an expression' + ' to see all items.'), + 'parameters': ['25']}}}, + members[25]) + + def testOversizedDictionary(self): + unused_big_dict = {'item' + str(i): i**2 for i in range(26)} + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + members = self._LocalByName('unused_big_dict')['members'] + + self.assertLen(members, 26) + self.assertDictEqual( + {'status': { + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': + ('Only first $0 items were captured. Use in an expression' + ' to see all items.'), + 'parameters': ['25']}}}, + members[25]) + + def testEmptyDictionary(self): + unused_empty_dict = {} + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertEqual( + {'name': 'unused_empty_dict', + 'type': 'dict', + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': {'format': 'Empty dictionary'}}}]}, + self._LocalByName('unused_empty_dict')) + + def testEmptyCollection(self): + for unused_c, object_type in [([], 'list'), ((), 'tuple'), (set(), 'set')]: + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertEqual( + {'name': 'unused_c', + 'type': object_type, + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': {'format': 'Empty collection'}}}]}, + self._Pack(self._LocalByName('unused_c'))) + + def testEmptyClass(self): + + class EmptyObject(object): + pass + + unused_empty_object = EmptyObject() + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertEqual( + {'name': 'unused_empty_object', + 'type': __name__ + '.EmptyObject', + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': {'format': 'Object has no fields'}}}]}, + self._Pack(self._LocalByName('unused_empty_object'))) + + def testWatchedExpressionsSuccess(self): + unused_dummy_a = 'x' + unused_dummy_b = {1: 2, 3: 'a'} + + self._collector = CaptureCollectorWithDefaultLocation({ + 'id': 'BP_ID', + 'expressions': ['1+2', 'unused_dummy_a*8', 'unused_dummy_b']}) + self._collector.Collect(inspect.currentframe()) + self.assertListEqual( + [{'name': '1+2', 'value': '3', 'type': 'int'}, + {'name': 'unused_dummy_a*8', 'value': "'xxxxxxxx'", 'type': 'str'}, + {'name': 'unused_dummy_b', + 'type': 'dict', + 'members': [{'name': '1', 'value': '2', 'type': 'int'}, + {'name': '3', 'value': "'a'", 'type': 'str'}]}], + self._collector.breakpoint['evaluatedExpressions']) + + def testOversizedStringExpression(self): + # This test checks that string expressions are collected first, up to the + # max size. The last 18 characters of the string will be missing due to the + # size for the name (14 bytes), type name (3 bytes), and the opening quote + # (1 byte). This test may be sensitive to minor changes in the collector + # code. If it turns out to break easily, consider simply verifying + # that the first 400 characters are collected, since that should suffice to + # ensure that we're not using the normal limit of 256 bytes. + self._collector = CaptureCollectorWithDefaultLocation({ + 'id': 'BP_ID', + 'expressions': ['unused_dummy_a']}) + self._collector.max_size = 500 + unused_dummy_a = '|'.join(['%04d' % i for i in range(5, 510, 5)]) + self._collector.Collect(inspect.currentframe()) + self.assertListEqual( + [{'name': 'unused_dummy_a', + 'type': 'str', + 'value': "'{0}...".format(unused_dummy_a[0:-18])}], + self._collector.breakpoint['evaluatedExpressions']) + + def testOversizedListExpression(self): + self._collector = CaptureCollectorWithDefaultLocation({ + 'id': 'BP_ID', + 'expressions': ['unused_dummy_a']}) + unused_dummy_a = list(range(0, 100)) + self._collector.Collect(inspect.currentframe()) + # Verify that the list did not get truncated. + self.assertListEqual( + [{'name': 'unused_dummy_a', 'type': 'list', 'members': [ + {'type': 'int', 'value': str(a), 'name': '[{0}]'.format(a)} + for a in unused_dummy_a]}], + self._collector.breakpoint['evaluatedExpressions']) + + def testExpressionNullBytes(self): + self._collector = CaptureCollectorWithDefaultLocation({ + 'id': 'BP_ID', + 'expressions': ['\0']}) + self._collector.Collect(inspect.currentframe()) + + evaluated_expressions = self._collector.breakpoint['evaluatedExpressions'] + self.assertLen(evaluated_expressions, 1) + self.assertTrue(evaluated_expressions[0]['status']['isError']) + + def testSyntaxErrorExpression(self): + self._collector = CaptureCollectorWithDefaultLocation({ + 'id': 'BP_ID', + 'expressions': ['2+']}) + self._collector.Collect(inspect.currentframe()) + + evaluated_expressions = self._collector.breakpoint['evaluatedExpressions'] + self.assertLen(evaluated_expressions, 1) + self.assertTrue(evaluated_expressions[0]['status']['isError']) + self.assertEqual( + 'VARIABLE_NAME', + evaluated_expressions[0]['status']['refersTo']) + + def testExpressionException(self): + unused_dummy_a = 1 + unused_dummy_b = 0 + self._collector = CaptureCollectorWithDefaultLocation({ + 'id': 'BP_ID', + 'expressions': ['unused_dummy_a/unused_dummy_b']}) + self._collector.Collect(inspect.currentframe()) + + zero_division_msg = ('division by zero' + if six.PY3 else 'integer division or modulo by zero') + + self.assertListEqual( + [{'name': 'unused_dummy_a/unused_dummy_b', + 'status': { + 'isError': True, + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': 'Exception occurred: $0', + 'parameters': [zero_division_msg]}}}], + self._collector.breakpoint['evaluatedExpressions']) + + def testMutableExpression(self): + + def ChangeA(): + self._a += 1 + + self._a = 0 + ChangeA() + self._collector = CaptureCollectorWithDefaultLocation({ + 'id': 'BP_ID', + 'expressions': ['ChangeA()']}) + self._collector.Collect(inspect.currentframe()) + + self.assertEqual(1, self._a) + self.assertListEqual( + [{'name': 'ChangeA()', + 'status': { + 'isError': True, + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': 'Exception occurred: $0', + 'parameters': [('Only immutable methods can be ' + 'called from expressions')]}}}], + self._collector.breakpoint['evaluatedExpressions']) + + def testPrettyPrinters(self): + + class MyClass(object): + pass + + def PrettyPrinter1(obj): + if obj != unused_obj1: + return None + return ((('name1_%d' % i, '1_%d' % i) for i in range(2)), 'pp-type1') + + def PrettyPrinter2(obj): + if obj != unused_obj2: + return None + return ((('name2_%d' % i, '2_%d' % i) for i in range(3)), 'pp-type2') + + capture_collector.CaptureCollector.pretty_printers += [ + PrettyPrinter1, PrettyPrinter2] + + unused_obj1 = MyClass() + unused_obj2 = MyClass() + unused_obj3 = MyClass() + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + obj_vars = [self._Pack(self._LocalByName('unused_obj%d' % i)) + for i in range(1, 4)] + + self.assertListEqual( + [ + {'name': 'unused_obj1', + 'type': 'pp-type1', + 'members': [ + {'name': 'name1_0', 'value': "'1_0'", 'type': 'str'}, + {'name': 'name1_1', 'value': "'1_1'", 'type': 'str'}]}, + {'name': 'unused_obj2', + 'type': 'pp-type2', + 'members': [ + {'name': 'name2_0', 'value': "'2_0'", 'type': 'str'}, + {'name': 'name2_1', 'value': "'2_1'", 'type': 'str'}, + {'name': 'name2_2', 'value': "'2_2'", 'type': 'str'}]}, + {'name': 'unused_obj3', + 'type': __name__ + '.MyClass', + 'members': [ + {'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': {'format': 'Object has no fields'}}}]}], + obj_vars) + + def testDateTime(self): + unused_datetime = datetime.datetime(2014, 6, 11, 2, 30) + unused_date = datetime.datetime(1980, 3, 1) + unused_time = datetime.time(18, 43, 11) + unused_timedelta = datetime.timedelta(days=3, microseconds=8237) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertDictEqual( + {'name': 'unused_datetime', + 'type': 'datetime.datetime', + 'value': '2014-06-11 02:30:00'}, + self._Pack(self._LocalByName('unused_datetime'))) + + self.assertDictEqual( + {'name': 'unused_date', + 'type': 'datetime.datetime', + 'value': '1980-03-01 00:00:00'}, + self._Pack(self._LocalByName('unused_date'))) + + self.assertDictEqual( + {'name': 'unused_time', + 'type': 'datetime.time', + 'value': '18:43:11'}, + self._Pack(self._LocalByName('unused_time'))) + + self.assertDictEqual( + {'name': 'unused_timedelta', + 'type': 'datetime.timedelta', + 'value': '3 days, 0:00:00.008237'}, + self._Pack(self._LocalByName('unused_timedelta'))) + + def testException(self): + unused_exception = ValueError('arg1', 2, [3]) + + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + obj = self._Pack(self._LocalByName('unused_exception')) + + self.assertEqual('unused_exception', obj['name']) + self.assertEqual('ValueError', obj['type']) + self.assertListEqual([ + {'value': "'arg1'", 'type': 'str', 'name': '[0]'}, + {'value': '2', 'type': 'int', 'name': '[1]'}, + {'members': [{'value': '3', 'type': 'int', 'name': '[0]'}], + 'type': 'list', + 'name': '[2]'}], obj['members']) + + def testRequestLogIdCapturing(self): + capture_collector.request_log_id_collector = lambda: 'test_log_id' + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertIn('labels', self._collector.breakpoint) + self.assertEqual( + 'test_log_id', + self._collector.breakpoint['labels'][labels.Breakpoint.REQUEST_LOG_ID]) + + def testRequestLogIdCapturingNoId(self): + capture_collector.request_log_id_collector = lambda: None + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + def testRequestLogIdCapturingNoCollector(self): + capture_collector.request_log_id_collector = None + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + def testUserIdSuccess(self): + capture_collector.user_id_collector = lambda: ('mdb_user', 'noogler') + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertIn('evaluatedUserId', self._collector.breakpoint) + self.assertEqual( + { + 'kind': 'mdb_user', + 'id': 'noogler' + }, + self._collector.breakpoint['evaluatedUserId']) + + def testUserIdIsNone(self): + capture_collector.user_id_collector = lambda: (None, None) + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertNotIn('evaluatedUserId', self._collector.breakpoint) + + def testUserIdNoKind(self): + capture_collector.user_id_collector = lambda: (None, 'noogler') + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertNotIn('evaluatedUserId', self._collector.breakpoint) + + def testUserIdNoValue(self): + capture_collector.user_id_collector = lambda: ('mdb_user', None) + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) + self._collector.Collect(inspect.currentframe()) + + self.assertNotIn('evaluatedUserId', self._collector.breakpoint) + + def _LocalByName(self, name, frame=0): + for local in self._collector.breakpoint['stackFrames'][frame]['locals']: + if local['name'] == name: + return local + self.fail('Local %s not found in frame %d' % (name, frame)) + + def _Pack(self, variable): + """Embeds variables referenced through var_index.""" + packed_variable = copy.copy(variable) + + var_index = variable.get('varTableIndex') + if var_index is not None: + packed_variable.update( + self._collector.breakpoint['variableTable'][var_index]) + del packed_variable['varTableIndex'] + + if 'members' in packed_variable: + packed_variable['members'] = [self._Pack(member) for member + in packed_variable['members']] + + return packed_variable + + +class LogCollectorTest(absltest.TestCase): + """Unit test for log collector.""" + + def setUp(self): + self._logger = logging.getLogger('test') + + class LogVerifier(logging.Handler): + + def __init__(self): + super(LogVerifier, self).__init__() + self._received_records = [] + + def emit(self, record): + self._received_records.append(record) + + def GotMessage(self, msg, level=logging.INFO, line_number=10, + func_name=None): + """Checks that the given message was logged correctly. + + This method verifies both the contents and the source location of the + message match expectations. + + Args: + msg: The expected message + level: The expected logging level. + line_number: The expected line number. + func_name: If specified, the expected log record must have a funcName + equal to this value. + Returns: + True iff the oldest unverified message matches the given attributes. + """ + record = self._received_records.pop(0) + frame = inspect.currentframe().f_back + if level != record.levelno: + logging.error('Expected log level %d, got %d (%s)', + level, record.levelno, record.levelname) + return False + if msg != record.msg: + logging.error('Expected msg "%s", received "%s"', msg, record.msg) + return False + pathname = capture_collector.NormalizePath(frame.f_code.co_filename) + if pathname != record.pathname: + logging.error('Expected pathname "%s", received "%s"', pathname, + record.pathname) + return False + if os.path.basename(pathname) != record.filename: + logging.error('Expected filename "%s", received "%s"', + os.path.basename(pathname), record.filename) + return False + if func_name and func_name != record.funcName: + logging.error('Expected function "%s", received "%s"', + func_name, record.funcName) + return False + if line_number and record.lineno != line_number: + logging.error('Expected lineno %d, received %d', + line_number, record.lineno) + return False + for attr in ['cdbg_pathname', 'cdbg_lineno']: + if hasattr(record, attr): + logging.error('Attribute %s still present in log record', attr) + return False + return True + + def CheckMessageSafe(self, msg): + """Checks that the given message was logged correctly. + + Unlike GotMessage, this will only check the contents, and will not log + an error or pop the record if the message does not match. + + Args: + msg: The expected message + Returns: + True iff the oldest unverified message matches the given attributes. + """ + record = self._received_records[0] + if msg != record.msg: + print(record.msg) + return False + self._received_records.pop(0) + return True + + self._verifier = LogVerifier() + self._logger.addHandler(self._verifier) + self._logger.setLevel(logging.INFO) + capture_collector.SetLogger(self._logger) + + # Give some time for the global quota to recover + time.sleep(0.1) + + def tearDown(self): + self._logger.removeHandler(self._verifier) + + def ResetGlobalLogQuota(self): + # The global log quota takes up to 5 seconds to fully fill back up to + # capacity (kDynamicLogCapacityFactor is 5). The capacity is 5 times the per + # second fill rate. The best we can do is a sleep, since the global + # leaky_bucket instance is inaccessible to the test. + time.sleep(5.0) + + def ResetGlobalLogBytesQuota(self): + # The global log bytes quota takes up to 2 seconds to fully fill back up to + # capacity (kDynamicLogBytesCapacityFactor is 2). The capacity is twice the + # per second fill rate. The best we can do is a sleep, since the global + # leaky_bucket instance is inaccessible to the test. + time.sleep(2.0) + + def testLogQuota(self): + # Attempt to get to a known starting state by letting the global quota fully + # recover so the ordering of tests ideally doesn't affect this test. + self.ResetGlobalLogQuota() + bucket_max_capacity = 250 + collector = LogCollectorWithDefaultLocation( + {'logMessageFormat': '$0', 'expressions': ['i']}) + for i in range(0, bucket_max_capacity * 2): + self.assertIsNone(collector.Log(inspect.currentframe())) + if not self._verifier.CheckMessageSafe('LOGPOINT: %s' % i): + self.assertGreaterEqual( + i, bucket_max_capacity, + 'Log quota exhausted earlier than expected') + self.assertTrue(self._verifier.CheckMessageSafe(LOGPOINT_PAUSE_MSG), + 'Quota hit message not logged') + time.sleep(0.6) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.CheckMessageSafe('LOGPOINT: %s' % i), + 'Logging not resumed after quota recovery time') + return + self.fail('Logging was never paused when quota was exceeded') + + def testLogBytesQuota(self): + # Attempt to get to a known starting state by letting the global quota fully + # recover so the ordering of tests ideally doesn't affect this test. + self.ResetGlobalLogBytesQuota() + + # Default capacity is 40960, though based on how the leaky bucket is + # implemented, it can allow effectively twice that amount to go out in a + # very short time frame. So the third 30k message should pause. + msg = ' ' * 30000 + collector = LogCollectorWithDefaultLocation( + {'logMessageFormat': msg}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage('LOGPOINT: ' + msg)) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage('LOGPOINT: ' + msg)) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.CheckMessageSafe(LOGPOINT_PAUSE_MSG), + 'Quota hit message not logged') + time.sleep(0.6) + collector._definition['logMessageFormat'] = 'hello' + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue( + self._verifier.GotMessage('LOGPOINT: hello'), + 'Logging was not resumed after quota recovery time') + + def testMissingLogLevel(self): + # Missing is equivalent to INFO. + collector = LogCollectorWithDefaultLocation( + {'logMessageFormat': 'hello'}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage('LOGPOINT: hello')) + + def testUndefinedLogLevel(self): + capture_collector.log_info_message = None + collector = LogCollectorWithDefaultLocation({'logLevel': 'INFO'}) + self.assertDictEqual( + {'isError': True, + 'description': {'format': 'Log action on a breakpoint not supported'}}, + collector.Log(inspect.currentframe())) + + def testLogInfo(self): + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', 'logMessageFormat': 'hello'}) + collector._definition['location']['line'] = 20 + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: hello', + func_name='LogCollectorTest.testLogInfo', + line_number=20)) + + def testLogWarning(self): + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'WARNING', 'logMessageFormat': 'hello'}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: hello', + level=logging.WARNING, + func_name='LogCollectorTest.testLogWarning')) + + def testLogError(self): + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'ERROR', 'logMessageFormat': 'hello'}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: hello', + level=logging.ERROR, + func_name='LogCollectorTest.testLogError')) + + def testBadExpression(self): + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': 'a=$0, b=$1', + 'expressions': ['-', '+']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + 'LOGPOINT: a=, b=')) + + def testDollarEscape(self): + unused_integer = 12345 + + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$ $$ $$$ $$$$ $0 $$0 $$$0 $$$$0 $1 hello', + 'expressions': ['unused_integer'] + }) + self.assertIsNone(collector.Log(inspect.currentframe())) + msg = 'LOGPOINT: $ $ $$ $$ 12345 $0 $12345 $$0 hello' + self.assertTrue(self._verifier.GotMessage(msg)) + + def testInvalidExpressionIndex(self): + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': 'a=$0'}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage('LOGPOINT: a=')) + + def testException(self): + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['[][1]']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + 'LOGPOINT: ')) + + def testMutableExpression(self): + + def MutableMethod(): # pylint: disable=unused-variable + self.abc = None + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['MutableMethod()']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + 'LOGPOINT: ')) + + def testNone(self): + unused_none = None + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_none']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage('LOGPOINT: None')) + + def testPrimitives(self): + unused_boolean = True + unused_integer = 12345 + unused_string = 'hello' + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0,$1,$2', + 'expressions': ['unused_boolean', 'unused_integer', 'unused_string']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage("LOGPOINT: True,12345,'hello'")) + + def testLongString(self): + unused_string = '1234567890' + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_string']}) + collector.max_value_len = 9 + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage("LOGPOINT: '123456789...")) + + def testLongBytes(self): + unused_bytes = bytearray([i for i in range(20)]) + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_bytes']}) + collector.max_value_len = 20 + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + r"LOGPOINT: bytearray(b'\x00\x01\...")) + + def testDate(self): + unused_datetime = datetime.datetime(2014, 6, 11, 2, 30) + unused_date = datetime.datetime(1980, 3, 1) + unused_time = datetime.time(18, 43, 11) + unused_timedelta = datetime.timedelta(days=3, microseconds=8237) + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0;$1;$2;$3', + 'expressions': ['unused_datetime', 'unused_date', + 'unused_time', 'unused_timedelta']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + 'LOGPOINT: 2014-06-11 02:30:00;1980-03-01 00:00:00;' + '18:43:11;3 days, 0:00:00.008237')) + + def testSet(self): + unused_set = set(['a']) + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_set']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage("LOGPOINT: {'a'}")) + + def testTuple(self): + unused_tuple = (1, 2, 3, 4, 5) + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_tuple']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage('LOGPOINT: (1, 2, 3, 4, 5)')) + + def testList(self): + unused_list = ['a', 'b', 'c'] + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage("LOGPOINT: ['a', 'b', 'c']")) + + def testOversizedList(self): + unused_list = [1, 2, 3, 4] + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list']}) + collector.max_list_items = 3 + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage('LOGPOINT: [1, 2, 3, ...]')) + + def testSlice(self): + unused_slice = slice(1, 10) + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_slice']}) + collector.max_list_items = 3 + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage('LOGPOINT: slice(1, 10, None)')) + + def testMap(self): + unused_map = {'a': 1} + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_map']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage("LOGPOINT: {'a': 1}")) + + def testObject(self): + + class MyClass(object): + + def __init__(self): + self.some = 'thing' + + unused_my = MyClass() + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_my']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage("LOGPOINT: {'some': 'thing'}")) + + def testNestedBelowLimit(self): + unused_list = [1, [2], [1, 2, 3], [1, [1, 2, 3]], 5] + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + 'LOGPOINT: [1, [2], [1, 2, 3], [1, [1, 2, 3]], 5]')) + + def testNestedAtLimits(self): + unused_list = [ + 1, [1, 2, 3, 4, 5], [[1, 2, 3, 4, 5], 2, 3, 4, 5], 4, 5, 6, 7, 8, 9] + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + 'LOGPOINT: [1, [1, 2, 3, 4, 5], [[1, 2, 3, 4, 5], 2, 3, 4, 5], ' + '4, 5, 6, 7, 8, 9]')) + + def testNestedRecursionLimit(self): + unused_list = [1, [[2, [3]], 4], 5] + + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list']}) + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + 'LOGPOINT: [1, [[2, %s], 4], 5]' % type([]))) + + def testNestedRecursionItemLimits(self): + unused_list = [1, [1, [1, [2], 3, 4], 3, 4], 3, 4] + + list_type = "" if six.PY3 else "" + collector = LogCollectorWithDefaultLocation( + {'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list']}) + collector.max_list_items = 3 + collector.max_sublist_items = 3 + self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertTrue(self._verifier.GotMessage( + 'LOGPOINT: [1, [1, [1, %s, 3, ...], 3, ...], 3, ...]' % list_type)) + + def testDetermineType(self): + builtin_prefix = 'builtins.' if six.PY3 else '__builtin__.' + path_prefix = 'googleclouddebugger.capture_collector.' + test_data = ( + (builtin_prefix + 'int', 5), + (builtin_prefix + 'str', 'hello'), + (builtin_prefix + 'function', capture_collector.DetermineType), + (path_prefix + 'LineNoFilter', + capture_collector.LineNoFilter()), + ) + + for type_string, value in test_data: + self.assertEqual(type_string, capture_collector.DetermineType(value)) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/error_data_visibility_policy_test.py b/tests/error_data_visibility_policy_test.py new file mode 100644 index 0000000..6c8b6d9 --- /dev/null +++ b/tests/error_data_visibility_policy_test.py @@ -0,0 +1,17 @@ +"""Tests for googleclouddebugger.error_data_visibility_policy.""" + +from absl.testing import absltest +from googleclouddebugger import error_data_visibility_policy + + +class ErrorDataVisibilityPolicyTest(absltest.TestCase): + + def testIsDataVisible(self): + policy = error_data_visibility_policy.ErrorDataVisibilityPolicy( + 'An error message.') + + self.assertEqual((False, 'An error message.'), policy.IsDataVisible('foo')) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/gcp_hub_client_test.py b/tests/gcp_hub_client_test.py new file mode 100644 index 0000000..7fc7720 --- /dev/null +++ b/tests/gcp_hub_client_test.py @@ -0,0 +1,495 @@ +"""Unit test for gcp_hub_client_test module.""" + +import datetime +import errno +import os +import socket +import sys +import tempfile +import time +from unittest import mock + +from googleapiclient import discovery +from googleapiclient.errors import HttpError +from googleclouddebugger import version + +import google.auth +from google.oauth2 import service_account +from absl.testing import absltest +from absl.testing import parameterized + +from googleclouddebugger import gcp_hub_client + + +TEST_DEBUGGEE_ID = 'gcp:debuggee-id' +TEST_AGENT_ID = 'abc-123-d4' +TEST_PROJECT_ID = 'test-project-id' +TEST_PROJECT_NUMBER = '123456789' +TEST_SERVICE_ACCOUNT_EMAIL = 'a@developer.gserviceaccount.com' + + +class HttpResponse(object): + + def __init__(self, status): + self.status = status + self.reason = None + + +def HttpErrorTimeout(): + return HttpError(HttpResponse(408), b'Fake timeout') + + +def HttpConnectionReset(): + return socket.error(errno.ECONNRESET, 'Fake connection reset') + + +class GcpHubClientTest(parameterized.TestCase): + """Simulates service account authentication.""" + + def setUp(self): + version.__version__ = 'test' + + self._client = gcp_hub_client.GcpHubClient() + + for backoff in [self._client.register_backoff, + self._client.list_backoff, + self._client.update_backoff]: + backoff.min_interval_sec /= 100000.0 + backoff.max_interval_sec /= 100000.0 + backoff._current_interval_sec /= 100000.0 + + self._client.on_idle = self._OnIdle + self._client.on_active_breakpoints_changed = mock.Mock() + + patcher = mock.patch.object(google.auth, 'default') + self._default_auth_mock = patcher.start() + self._default_auth_mock.return_value = (None, TEST_PROJECT_ID) + self.addCleanup(patcher.stop) + + self._service = mock.Mock() + self._iterations = 0 + + patcher = mock.patch.object(discovery, 'build') + self._mock_build = patcher.start() + self._mock_build.return_value = self._service + self.addCleanup(patcher.stop) + + controller = self._service.controller.return_value + debuggees = controller.debuggees.return_value + breakpoints = debuggees.breakpoints.return_value + self._register_call = debuggees.register + self._register_execute = self._register_call.return_value.execute + self._list_call = breakpoints.list + self._list_execute = self._list_call.return_value.execute + self._update_execute = breakpoints.update.return_value.execute + + # Default responses for API requests. + self._register_execute.return_value = { + 'debuggee': { + 'id': TEST_DEBUGGEE_ID, + 'project': TEST_PROJECT_NUMBER, + }, + 'agentId': TEST_AGENT_ID, + } + self._list_execute.return_value = {} + + self._start_time = datetime.datetime.utcnow() + + def tearDown(self): + self._client.Stop() + + def testDefaultAuth(self): + self._client.SetupAuth() + + self._default_auth_mock.assert_called_with( + scopes=['https://www.googleapis.com/auth/cloud-platform']) + self.assertEqual(TEST_PROJECT_ID, self._client._project_id) + self.assertEqual(TEST_PROJECT_ID, self._client._project_number) + + def testOverrideProjectIdNumber(self): + project_id = 'project2' + project_number = '456' + self._client.SetupAuth(project_id=project_id, project_number=project_number) + + self._default_auth_mock.assert_called_with( + scopes=['https://www.googleapis.com/auth/cloud-platform']) + self.assertEqual(project_id, self._client._project_id) + self.assertEqual(project_number, self._client._project_number) + + def testServiceAccountJsonAuth(self): + with mock.patch.object( + service_account.Credentials, + 'from_service_account_file') as from_service_account_file: + json_file = tempfile.NamedTemporaryFile() + with open(json_file.name, 'w') as f: + f.write('{"project_id": "%s"}' % TEST_PROJECT_ID) + self._client.SetupAuth(service_account_json_file=json_file.name) + + self._default_auth_mock.assert_not_called() + from_service_account_file.assert_called_with( + json_file.name, + scopes=['https://www.googleapis.com/auth/cloud-platform']) + self.assertEqual(TEST_PROJECT_ID, self._client._project_id) + self.assertEqual(TEST_PROJECT_ID, self._client._project_number) + + def testNoProjectId(self): + self._default_auth_mock.return_value = (None, None) + + with self.assertRaises(gcp_hub_client.NoProjectIdError): + self._Start() + + def testContinuousSuccess(self): + self._Start() + self._SkipIterations(10) + self.assertTrue(self._mock_build.called) + self.assertEqual(TEST_PROJECT_NUMBER, self._client._project_number) + + def testBreakpointsChanged(self): + self._Start() + self._SkipIterations(5) + self.assertEqual(0, self._client.on_active_breakpoints_changed.call_count) + + self._list_execute.return_value = ({'breakpoints': [{'id': 'bp1'}]}) + self._SkipIterations() + self.assertEqual(1, self._client.on_active_breakpoints_changed.call_count) + + self._list_execute.return_value = ({'breakpoints': [{'id': 'bp2'}]}) + self._SkipIterations() + self.assertEqual(2, self._client.on_active_breakpoints_changed.call_count) + + self._list_execute.return_value = ({'breakpoints': [{}]}) + self._SkipIterations() + self.assertEqual(3, self._client.on_active_breakpoints_changed.call_count) + + @parameterized.named_parameters( + ('DefaultEnabled', True, True, 'CANARY_MODE_DEFAULT_ENABLED'), + ('AlwaysEnabled', True, False, 'CANARY_MODE_ALWAYS_ENABLED'), + ('DefaultDisabled', False, True, 'CANARY_MODE_DEFAULT_DISABLED'), + ('AlwaysDisabled', False, False, 'CANARY_MODE_ALWAYS_DISABLED'), + ('AlwaysEnabledWithStringFlags', 'True', + 'a-value-should-be-treated-as-false', 'CANARY_MODE_ALWAYS_ENABLED')) + def testRegisterDebuggeeCanaryMode(self, breakpoint_enable_canary, + breakpoint_allow_canary_override, + expected_canary_mode): + self._client.SetupCanaryMode(breakpoint_enable_canary, + breakpoint_allow_canary_override) + self._Start() + self._SkipIterations(5) + self.assertEqual( + expected_canary_mode, + self._register_call.call_args[1]['body']['debuggee']['canaryMode']) + + def testRegisterDebuggeeFailure(self): + self._register_execute.side_effect = HttpErrorTimeout() + self._Start() + self._SkipIterations(5) + self.assertGreaterEqual(self._register_execute.call_count, 5) + + def testListActiveBreakpointsFailure(self): + self._Start() + self._SkipIterations(5) + self.assertEqual(1, self._register_execute.call_count) + + # If the these 2 lines are executed between _ListActiveBreakpoints() and + # on_idle() in _MainThreadProc, then there will be 1 iteration incremented + # where _ListActiveBreakpoints is still a success and registration is not + # required, leading to only 4 _register_execute calls instead of 5. + self._list_execute.side_effect = HttpErrorTimeout() + self._SkipIterations(5) + + self.assertGreaterEqual(self._register_execute.call_count, 4) + + def testListActiveBreakpointsNoUpdate(self): + self._Start() + self._SkipIterations(5) + self.assertEqual(1, self._register_execute.call_count) + self.assertEqual(0, self._client.on_active_breakpoints_changed.call_count) + + self._list_execute.return_value = ({'breakpoints': [{'id': 'bp1'}]}) + self._SkipIterations() + self.assertEqual(1, self._client.on_active_breakpoints_changed.call_count) + + self._list_execute.return_value = ({'waitExpired': 'True'}) + self._SkipIterations(20) + self.assertEqual(1, self._register_execute.call_count) + self.assertEqual(1, self._client.on_active_breakpoints_changed.call_count) + + def testListActiveBreakpointsSendAgentId(self): + self._Start() + self._SkipIterations(5) + self.assertEqual(1, self._register_execute.call_count) + self.assertGreater(self._list_execute.call_count, 0) + self.assertEqual(TEST_AGENT_ID, self._list_call.call_args[1]['agentId']) + + def testTransmitBreakpointUpdateSuccess(self): + self._Start() + self._client.EnqueueBreakpointUpdate({'id': 'A'}) + while not self._update_execute.call_count: + self._SkipIterations() + self.assertEmpty(self._client._transmission_queue) + + def testPoisonousMessage(self): + self._update_execute.side_effect = HttpErrorTimeout() + self._Start() + self._SkipIterations(5) + self._client.EnqueueBreakpointUpdate({'id': 'A'}) + while self._update_execute.call_count < 10: + self._SkipIterations() + self._SkipIterations(10) + self.assertEmpty(self._client._transmission_queue) + + def testTransmitBreakpointUpdateSocketError(self): + # It would be nice to ensure that the retries will succeed if the error + # stops, but that would make this test setup flaky. + self._update_execute.side_effect = HttpConnectionReset() + self._Start() + self._client.EnqueueBreakpointUpdate({'id': 'A'}) + while self._update_execute.call_count < 10: + self._SkipIterations() + self._SkipIterations(10) + self.assertEmpty(self._client._transmission_queue) + + def _TestInitializeLabels(self, module_var, version_var, minor_var): + self._Start() + + self._client.InitializeDebuggeeLabels( + {'module': 'my_module', + 'version': '1', + 'minorversion': '23', + 'something_else': 'irrelevant'}) + self.assertEqual( + {'projectid': 'test-project-id', + 'module': 'my_module', + 'version': '1', + 'minorversion': '23', + 'platform': 'default'}, + self._client._debuggee_labels) + self.assertEqual( + 'test-project-id-my_module-1', + self._client._GetDebuggeeDescription()) + + uniquifier1 = self._client._ComputeUniquifier({ + 'labels': self._client._debuggee_labels}) + self.assertTrue(uniquifier1) # Not empty string. + + try: + os.environ[module_var] = 'env_module' + os.environ[version_var] = '213' + os.environ[minor_var] = '3476734' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual( + {'projectid': 'test-project-id', + 'module': 'env_module', + 'version': '213', + 'minorversion': '3476734', + 'platform': 'default'}, + self._client._debuggee_labels) + self.assertEqual( + 'test-project-id-env_module-213', + self._client._GetDebuggeeDescription()) + + os.environ[module_var] = 'default' + os.environ[version_var] = '213' + os.environ[minor_var] = '3476734' + self._client.InitializeDebuggeeLabels({'minorversion': 'something else'}) + self.assertEqual( + {'projectid': 'test-project-id', + 'version': '213', + 'minorversion': 'something else', + 'platform': 'default'}, + self._client._debuggee_labels) + self.assertEqual( + 'test-project-id-213', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ[module_var] + del os.environ[version_var] + del os.environ[minor_var] + + def testInitializeLegacyDebuggeeLabels(self): + self._TestInitializeLabels( + 'GAE_MODULE_NAME', 'GAE_MODULE_VERSION', 'GAE_MINOR_VERSION') + + def testInitializeDebuggeeLabels(self): + self._TestInitializeLabels( + 'GAE_SERVICE', 'GAE_VERSION', 'GAE_DEPLOYMENT_ID') + + def testInitializeCloudRunDebuggeeLabels(self): + self._Start() + + try: + os.environ['K_SERVICE'] = 'env_module' + os.environ['K_REVISION'] = '213' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual({ + 'projectid': 'test-project-id', + 'module': 'env_module', + 'version': '213', + 'platform': 'default' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-env_module-213', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ['K_SERVICE'] + del os.environ['K_REVISION'] + + def testInitializeCloudFunctionDebuggeeLabels(self): + self._Start() + + try: + os.environ['FUNCTION_NAME'] = 'fcn-name' + os.environ['X_GOOGLE_FUNCTION_VERSION'] = '213' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual({ + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': '213', + 'platform': 'cloud_function' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-fcn-name-213', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ['FUNCTION_NAME'] + del os.environ['X_GOOGLE_FUNCTION_VERSION'] + + def testInitializeCloudFunctionUnversionedDebuggeeLabels(self): + self._Start() + + try: + os.environ['FUNCTION_NAME'] = 'fcn-name' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual({ + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': 'unversioned', + 'platform': 'cloud_function' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-fcn-name-unversioned', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ['FUNCTION_NAME'] + + def testInitializeCloudFunctionWithRegionDebuggeeLabels(self): + self._Start() + + try: + os.environ['FUNCTION_NAME'] = 'fcn-name' + os.environ['FUNCTION_REGION'] = 'fcn-region' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual({ + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': 'unversioned', + 'platform': 'cloud_function', + 'region': 'fcn-region' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-fcn-name-unversioned', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ['FUNCTION_NAME'] + del os.environ['FUNCTION_REGION'] + + def testAppFilesUniquifierNoMinorVersion(self): + """Verify that uniquifier_computer is used if minor version not defined.""" + self._Start() + + root = tempfile.mkdtemp('', 'fake_app_') + sys.path.insert(0, root) + try: + uniquifier1 = self._client._ComputeUniquifier({}) + + with open(os.path.join(root, 'app.py'), 'w') as f: + f.write('hello') + uniquifier2 = self._client._ComputeUniquifier({}) + finally: + del sys.path[0] + + self.assertNotEqual(uniquifier1, uniquifier2) + + def testAppFilesUniquifierWithMinorVersion(self): + """Verify that uniquifier_computer not used if minor version is defined.""" + self._Start() + + root = tempfile.mkdtemp('', 'fake_app_') + + os.environ['GAE_MINOR_VERSION'] = '12345' + sys.path.insert(0, root) + try: + self._client.InitializeDebuggeeLabels(None) + + uniquifier1 = self._client._GetDebuggee()['uniquifier'] + + with open(os.path.join(root, 'app.py'), 'w') as f: + f.write('hello') + uniquifier2 = self._client._GetDebuggee()['uniquifier'] + finally: + del os.environ['GAE_MINOR_VERSION'] + del sys.path[0] + + self.assertEqual(uniquifier1, uniquifier2) + + def testSourceContext(self): + self._Start() + + root = tempfile.mkdtemp('', 'fake_app_') + source_context_path = os.path.join(root, 'source-context.json') + + sys.path.insert(0, root) + try: + debuggee_no_source_context1 = self._client._GetDebuggee() + + with open(source_context_path, 'w') as f: + f.write('not a valid JSON') + debuggee_bad_source_context = self._client._GetDebuggee() + + with open(os.path.join(root, 'fake_app.py'), 'w') as f: + f.write('pretend') + debuggee_no_source_context2 = self._client._GetDebuggee() + + with open(source_context_path, 'w') as f: + f.write('{"what": "source context"}') + debuggee_with_source_context = self._client._GetDebuggee() + + os.remove(source_context_path) + finally: + del sys.path[0] + + self.assertNotIn('sourceContexts', debuggee_no_source_context1) + self.assertNotIn('sourceContexts', debuggee_bad_source_context) + self.assertListEqual([{'what': 'source context'}], + debuggee_with_source_context['sourceContexts']) + + uniquifiers = set() + uniquifiers.add(debuggee_no_source_context1['uniquifier']) + uniquifiers.add(debuggee_with_source_context['uniquifier']) + uniquifiers.add(debuggee_bad_source_context['uniquifier']) + self.assertLen(uniquifiers, 1) + uniquifiers.add(debuggee_no_source_context2['uniquifier']) + self.assertLen(uniquifiers, 2) + + def _Start(self): + self._client.SetupAuth() + self._client.Start() + + def _OnIdle(self): + self._iterations += 1 + + def _SkipIterations(self, n=1): + target = self._iterations + n + while self._iterations < target: + self._CheckTestTimeout() + time.sleep(0.01) + + def _CheckTestTimeout(self): + elapsed_time = datetime.datetime.utcnow() - self._start_time + if elapsed_time > datetime.timedelta(seconds=15): + self.fail('Test case timed out while waiting for state transition') + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/glob_data_visibility_policy_test.py b/tests/glob_data_visibility_policy_test.py new file mode 100644 index 0000000..acf9625 --- /dev/null +++ b/tests/glob_data_visibility_policy_test.py @@ -0,0 +1,39 @@ +"""Tests for glob_data_visibility_policy.""" + +from absl.testing import absltest +from googleclouddebugger import glob_data_visibility_policy + +RESPONSES = glob_data_visibility_policy.RESPONSES +UNKNOWN_TYPE = (False, RESPONSES['UNKNOWN_TYPE']) +BLACKLISTED = (False, RESPONSES['BLACKLISTED']) +NOT_WHITELISTED = (False, RESPONSES['NOT_WHITELISTED']) +VISIBLE = (True, RESPONSES['VISIBLE']) + + +class GlobDataVisibilityPolicyTest(absltest.TestCase): + + def testIsDataVisible(self): + blacklist_patterns = ( + 'wl1.private1', + 'wl2.*', + '*.private2', + '', + ) + whitelist_patterns = ( + 'wl1.*', + 'wl2.*' + ) + + policy = glob_data_visibility_policy.GlobDataVisibilityPolicy( + blacklist_patterns, whitelist_patterns) + + self.assertEqual(BLACKLISTED, policy.IsDataVisible('wl1.private1')) + self.assertEqual(BLACKLISTED, policy.IsDataVisible('wl2.foo')) + self.assertEqual(BLACKLISTED, policy.IsDataVisible('foo.private2')) + self.assertEqual(NOT_WHITELISTED, policy.IsDataVisible('wl3.foo')) + self.assertEqual(VISIBLE, policy.IsDataVisible('wl1.foo')) + self.assertEqual(UNKNOWN_TYPE, policy.IsDataVisible(None)) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/imphook2_test.py b/tests/imphook2_test.py new file mode 100644 index 0000000..38f5cce --- /dev/null +++ b/tests/imphook2_test.py @@ -0,0 +1,521 @@ +"""Unit test for imphook2 module.""" + +import importlib +import os +import sys +import tempfile + +import six +from absl.testing import absltest + +from googleclouddebugger import imphook2 + + +class ImportHookTest2(absltest.TestCase): + """Tests for the new module import hook.""" + + def setUp(self): + self._test_package_dir = tempfile.mkdtemp('', 'imphook_') + sys.path.append(self._test_package_dir) + + self._import_callbacks_log = [] + self._callback_cleanups = [] + + def tearDown(self): + sys.path.remove(self._test_package_dir) + + for cleanup in self._callback_cleanups: + cleanup() + + # Assert no hooks or entries remained in the set. + self.assertEmpty(imphook2._import_callbacks) + + def testPackageImport(self): + self._Hook(self._CreateFile('testpkg1/__init__.py')) + import testpkg1 # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual(['testpkg1/__init__.py'], self._import_callbacks_log) + + def testModuleImport(self): + self._CreateFile('testpkg2/__init__.py') + self._Hook(self._CreateFile('testpkg2/my.py')) + import testpkg2.my # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual(['testpkg2/my.py'], self._import_callbacks_log) + + def testUnrelatedImport(self): + self._CreateFile('testpkg3/__init__.py') + self._Hook(self._CreateFile('testpkg3/first.py')) + self._CreateFile('testpkg3/second.py') + import testpkg3.second # pylint: disable=g-import-not-at-top,unused-variable + self.assertEmpty(self._import_callbacks_log) + + def testDoubleImport(self): + self._Hook(self._CreateFile('testpkg4/__init__.py')) + import testpkg4 # pylint: disable=g-import-not-at-top,unused-variable + import testpkg4 # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual( + ['testpkg4/__init__.py', + 'testpkg4/__init__.py'], + sorted(self._import_callbacks_log)) + + def testRemoveCallback(self): + cleanup = self._Hook(self._CreateFile('testpkg4b/__init__.py')) + cleanup() + import testpkg4b # pylint: disable=g-import-not-at-top,unused-variable + self.assertEmpty(self._import_callbacks_log) + + def testRemoveCallbackAfterImport(self): + cleanup = self._Hook(self._CreateFile('testpkg5/__init__.py')) + import testpkg5 # pylint: disable=g-import-not-at-top,unused-variable + cleanup() + import testpkg5 # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual(['testpkg5/__init__.py'], self._import_callbacks_log) + + def testTransitiveImport(self): + self._CreateFile('testpkg6/__init__.py') + self._Hook(self._CreateFile('testpkg6/first.py', 'import second')) + self._Hook(self._CreateFile('testpkg6/second.py', 'import third')) + self._Hook(self._CreateFile('testpkg6/third.py')) + import testpkg6.first # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual( + ['testpkg6/first.py', + 'testpkg6/second.py', + 'testpkg6/third.py'], + sorted(self._import_callbacks_log)) + + def testPackageDotModuleImport(self): + self._Hook(self._CreateFile('testpkg8/__init__.py')) + self._Hook(self._CreateFile('testpkg8/my.py')) + import testpkg8.my # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual( + ['testpkg8/__init__.py', + 'testpkg8/my.py'], + sorted(self._import_callbacks_log)) + + def testNestedPackageDotModuleImport(self): + self._Hook(self._CreateFile('testpkg9a/__init__.py')) + self._Hook(self._CreateFile('testpkg9a/testpkg9b/__init__.py')) + self._CreateFile('testpkg9a/testpkg9b/my.py') + import testpkg9a.testpkg9b.my # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual( + ['testpkg9a/__init__.py', + 'testpkg9a/testpkg9b/__init__.py'], + sorted(self._import_callbacks_log)) + + def testFromImport(self): + self._Hook(self._CreateFile('testpkg10/__init__.py')) + self._CreateFile('testpkg10/my.py') + from testpkg10 import my # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual(['testpkg10/__init__.py'], self._import_callbacks_log) + + def testTransitiveFromImport(self): + self._CreateFile('testpkg7/__init__.py') + self._Hook(self._CreateFile( + 'testpkg7/first.py', + 'from testpkg7 import second')) + self._Hook(self._CreateFile('testpkg7/second.py')) + from testpkg7 import first # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual( + ['testpkg7/first.py', + 'testpkg7/second.py'], + sorted(self._import_callbacks_log)) + + def testFromNestedPackageImportModule(self): + self._Hook(self._CreateFile('testpkg11a/__init__.py')) + self._Hook(self._CreateFile('testpkg11a/testpkg11b/__init__.py')) + self._Hook(self._CreateFile('testpkg11a/testpkg11b/my.py')) + self._Hook(self._CreateFile('testpkg11a/testpkg11b/your.py')) + from testpkg11a.testpkg11b import my, your # pylint: disable=g-import-not-at-top,unused-variable,g-multiple-import + self.assertEqual( + ['testpkg11a/__init__.py', + 'testpkg11a/testpkg11b/__init__.py', + 'testpkg11a/testpkg11b/my.py', + 'testpkg11a/testpkg11b/your.py'], + sorted(self._import_callbacks_log)) + + def testDoubleNestedImport(self): + self._Hook(self._CreateFile('testpkg12a/__init__.py')) + self._Hook(self._CreateFile('testpkg12a/testpkg12b/__init__.py')) + self._Hook(self._CreateFile('testpkg12a/testpkg12b/my.py')) + from testpkg12a.testpkg12b import my # pylint: disable=g-import-not-at-top,unused-variable,g-multiple-import + from testpkg12a.testpkg12b import my # pylint: disable=g-import-not-at-top,unused-variable,g-multiple-import + self.assertEqual( + ['testpkg12a/__init__.py', + 'testpkg12a/__init__.py', + 'testpkg12a/testpkg12b/__init__.py', + 'testpkg12a/testpkg12b/__init__.py', + 'testpkg12a/testpkg12b/my.py', + 'testpkg12a/testpkg12b/my.py'], + sorted(self._import_callbacks_log)) + + def testFromPackageImportStar(self): + self._Hook(self._CreateFile('testpkg13a/__init__.py')) + self._Hook(self._CreateFile('testpkg13a/my1.py')) + self._Hook(self._CreateFile('testpkg13a/your1.py')) + # Star imports are only allowed at the top level, not inside a function in + # Python 3. Doing so would be a SyntaxError. + exec('from testpkg13a import *') # pylint: disable=exec-used + self.assertEqual(['testpkg13a/__init__.py'], self._import_callbacks_log) + + def testFromPackageImportStarWith__all__(self): + self._Hook(self._CreateFile('testpkg14a/__init__.py', '__all__=["my1"]')) + self._Hook(self._CreateFile('testpkg14a/my1.py')) + self._Hook(self._CreateFile('testpkg14a/your1.py')) + exec('from testpkg14a import *') # pylint: disable=exec-used + self.assertEqual( + ['testpkg14a/__init__.py', + 'testpkg14a/my1.py'], + sorted(self._import_callbacks_log)) + + def testImportFunction(self): + self._Hook(self._CreateFile('testpkg27/__init__.py')) + __import__('testpkg27') + self.assertEqual(['testpkg27/__init__.py'], self._import_callbacks_log) + + def testImportLib(self): + self._Hook(self._CreateFile('zero.py')) + self._Hook(self._CreateFile('testpkg15a/__init__.py')) + self._Hook(self._CreateFile('testpkg15a/first.py')) + self._Hook(self._CreateFile( + 'testpkg15a/testpkg15b/__init__.py', + 'assert False, "unexpected import"')) + self._Hook(self._CreateFile('testpkg15a/testpkg15c/__init__.py')) + self._Hook(self._CreateFile('testpkg15a/testpkg15c/second.py')) + + # Import top level module. + importlib.import_module('zero') + self.assertEqual(['zero.py'], self._import_callbacks_log) + self._import_callbacks_log = [] + + # Import top level package. + importlib.import_module('testpkg15a') + self.assertEqual(['testpkg15a/__init__.py'], self._import_callbacks_log) + self._import_callbacks_log = [] + + # Import package.module. + importlib.import_module('testpkg15a.first') + self.assertEqual( + ['testpkg15a/__init__.py', + 'testpkg15a/first.py'], + sorted(self._import_callbacks_log)) + self._import_callbacks_log = [] + + # Relative module import from package context. + importlib.import_module('.first', 'testpkg15a') + self.assertEqual( + ['testpkg15a/__init__.py', + 'testpkg15a/first.py'], + sorted(self._import_callbacks_log)) + self._import_callbacks_log = [] + + # Relative module import from package context with '..'. + if six.PY3: + # In Python 3, the parent module has to be loaded before a relative import + importlib.import_module('testpkg15a.testpkg15c') + self._import_callbacks_log = [] + importlib.import_module('..first', 'testpkg15a.testpkg15c') + else: + importlib.import_module('..first', 'testpkg15a.testpkg15b') + self.assertEqual( + ['testpkg15a/__init__.py', + # TODO: Importlib may or may not load testpkg15b, + # depending on the implementation. Currently on blaze, it does not + # load testpkg15b, but a similar non-blaze code on my workstation + # loads testpkg15b. We should verify this behavior. + # 'testpkg15a/testpkg15b/__init__.py', + 'testpkg15a/first.py'], + sorted(self._import_callbacks_log)) + self._import_callbacks_log = [] + + # Relative module import from nested package context. + importlib.import_module('.second', 'testpkg15a.testpkg15c') + self.assertEqual( + ['testpkg15a/__init__.py', + 'testpkg15a/testpkg15c/__init__.py', + 'testpkg15a/testpkg15c/second.py'], + sorted(self._import_callbacks_log)) + self._import_callbacks_log = [] + + def testRemoveImportHookFromCallback(self): + def RunCleanup(unused_mod): + cleanup() + + cleanup = self._Hook( + self._CreateFile('testpkg15/__init__.py'), RunCleanup) + import testpkg15 # pylint: disable=g-import-not-at-top,unused-variable + import testpkg15 # pylint: disable=g-import-not-at-top,unused-variable + import testpkg15 # pylint: disable=g-import-not-at-top,unused-variable + + # The first import should have removed the hook, so expect only one entry. + self.assertEqual(['testpkg15/__init__.py'], self._import_callbacks_log) + + def testInitImportNoPrematureCallback(self): + # Verifies that the callback is not invoked before the package is fully + # loaded. Thus, assuring that the all module code is available for lookup. + def CheckFullyLoaded(module): + self.assertEqual(1, getattr(module, 'validate', None), 'premature call') + + self._Hook(self._CreateFile('testpkg16/my1.py')) + self._Hook(self._CreateFile('testpkg16/__init__.py', + 'import my1\nvalidate = 1'), CheckFullyLoaded) + import testpkg16.my1 # pylint: disable=g-import-not-at-top,unused-variable + + self.assertEqual( + ['testpkg16/__init__.py', + 'testpkg16/my1.py'], + sorted(self._import_callbacks_log)) + + def testCircularImportNoPrematureCallback(self): + # Verifies that the callback is not invoked before the first module is fully + # loaded. Thus, assuring that the all module code is available for lookup. + def CheckFullyLoaded(module): + self.assertEqual(1, getattr(module, 'validate', None), 'premature call') + + self._CreateFile('testpkg17/__init__.py') + self._Hook( + self._CreateFile( + 'testpkg17/c1.py', + 'import testpkg17.c2\nvalidate = 1', False), + CheckFullyLoaded) + self._Hook( + self._CreateFile( + 'testpkg17/c2.py', + 'import testpkg17.c1\nvalidate = 1', False), + CheckFullyLoaded) + + import testpkg17.c1 # pylint: disable=g-import-not-at-top,unused-variable + + self.assertEqual( + ['testpkg17/c1.py', + 'testpkg17/c2.py'], + sorted(self._import_callbacks_log)) + + def testImportException(self): + # An exception is thrown by the builtin importer during import. + self._CreateFile('testpkg18/__init__.py') + self._Hook(self._CreateFile('testpkg18/bad.py', 'assert False, "bad file"')) + self._Hook(self._CreateFile('testpkg18/good.py')) + + try: + import testpkg18.bad # pylint: disable=g-import-not-at-top,unused-variable + except AssertionError: + pass + + import testpkg18.good # pylint: disable=g-import-not-at-top,unused-variable + + self.assertEqual(['testpkg18/good.py'], self._import_callbacks_log) + + def testImportNestedException(self): + # An import exception is thrown and caught inside a module being imported. + self._CreateFile('testpkg19/__init__.py') + self._Hook(self._CreateFile('testpkg19/m19.py', + 'try: import m19b\nexcept ImportError: pass')) + + import testpkg19.m19 # pylint: disable=g-import-not-at-top,unused-variable + + self.assertEqual(['testpkg19/m19.py'], self._import_callbacks_log) + + def testModuleImportByPathSuffix(self): + # Import module by providing only a suffix of the module's file path. + self._CreateFile('testpkg20a/__init__.py') + self._CreateFile('testpkg20a/testpkg20b/__init__.py') + self._CreateFile('testpkg20a/testpkg20b/my1.py') + self._CreateFile('testpkg20a/testpkg20b/my2.py') + self._CreateFile('testpkg20a/testpkg20b/my3.py') + + # Import just by the name of the module file. + self._Hook('my1.py') + import testpkg20a.testpkg20b.my1 # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual(['my1.py'], self._import_callbacks_log) + self._import_callbacks_log = [] + + # Import with only one of the enclosing package names. + self._Hook('testpkg20b/my2.py') + import testpkg20a.testpkg20b.my2 # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual(['testpkg20b/my2.py'], self._import_callbacks_log) + self._import_callbacks_log = [] + + # Import with all enclosing packages (the typical case). + self._Hook('testpkg20b/my3.py') + import testpkg20a.testpkg20b.my3 # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual(['testpkg20b/my3.py'], self._import_callbacks_log) + self._import_callbacks_log = [] + + def testFromImportImportsFunction(self): + self._CreateFile('testpkg21a/__init__.py') + self._CreateFile('testpkg21a/testpkg21b/__init__.py') + self._CreateFile( + 'testpkg21a/testpkg21b/mod.py', + ('def func1():\n' + ' return 5\n' + '\n' + 'def func2():\n' + ' return 7\n')) + + self._Hook('mod.py') + from testpkg21a.testpkg21b.mod import func1, func2 # pylint: disable=g-import-not-at-top,unused-variable,g-multiple-import + self.assertEqual(['mod.py'], self._import_callbacks_log) + + def testImportSibling(self): + self._CreateFile('testpkg22/__init__.py') + self._CreateFile( + 'testpkg22/first.py', + 'import second') + self._CreateFile('testpkg22/second.py') + + self._Hook('testpkg22/second.py') + import testpkg22.first # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual(['testpkg22/second.py'], self._import_callbacks_log) + + def testImportSiblingSamePackage(self): + self._CreateFile('testpkg23/__init__.py') + self._CreateFile('testpkg23/testpkg23/__init__.py') + self._CreateFile( + 'testpkg23/first.py', + 'import testpkg23.second') # This refers to testpkg23.testpkg23.second + self._CreateFile('testpkg23/testpkg23/second.py') + + self._Hook('testpkg23/testpkg23/second.py') + import testpkg23.first # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual( + ['testpkg23/testpkg23/second.py'], + self._import_callbacks_log) + + def testImportSiblingFromInit(self): + self._Hook(self._CreateFile('testpkg23a/__init__.py', 'import testpkg23b')) + self._Hook(self._CreateFile( + 'testpkg23a/testpkg23b/__init__.py', + 'import testpkg23c')) + self._Hook(self._CreateFile('testpkg23a/testpkg23b/testpkg23c/__init__.py')) + import testpkg23a # pylint: disable=g-import-not-at-top,unused-variable + self.assertEqual( + ['testpkg23a/__init__.py', + 'testpkg23a/testpkg23b/__init__.py', + 'testpkg23a/testpkg23b/testpkg23c/__init__.py'], + sorted(self._import_callbacks_log)) + + def testThreadLocalCleanup(self): + self._CreateFile('testpkg24/__init__.py') + self._CreateFile('testpkg24/foo.py', 'import bar') + self._CreateFile('testpkg24/bar.py') + + # Create a hook for any arbitrary module. Doesn't need to hit. + self._Hook('xxx/yyy.py') + + import testpkg24.foo # pylint: disable=g-import-not-at-top,unused-variable + + self.assertEqual(imphook2._import_local.nest_level, 0) + self.assertEmpty(imphook2._import_local.names) + + def testThreadLocalCleanupWithCaughtImportError(self): + self._CreateFile('testpkg25/__init__.py') + self._CreateFile( + 'testpkg25/foo.py', + 'import bar\n' # success. + 'import baz') # success. + self._CreateFile('testpkg25/bar.py') + self._CreateFile( + 'testpkg25/baz.py', + 'try:\n' + ' import testpkg25b\n' + 'except ImportError:\n' + ' pass') + + # Create a hook for any arbitrary module. Doesn't need to hit. + self._Hook('xxx/yyy.py') + + # Successful import at top level. Failed import at inner level. + import testpkg25.foo # pylint: disable=g-import-not-at-top,unused-variable + + self.assertEqual(imphook2._import_local.nest_level, 0) + self.assertEmpty(imphook2._import_local.names) + + def testThreadLocalCleanupWithUncaughtImportError(self): + self._CreateFile('testpkg26/__init__.py') + self._CreateFile( + 'testpkg26/foo.py', + 'import bar\n' # success. + 'import baz') # fail. + self._CreateFile('testpkg26/bar.py') + + # Create a hook for any arbitrary module. Doesn't need to hit. + self._Hook('testpkg26/bar.py') + + # Inner import will fail, and exception will be propagated here. + try: + import testpkg26.foo # pylint: disable=g-import-not-at-top,unused-variable + except ImportError: + pass + + # The hook for bar should be invoked, as bar is already loaded. + self.assertEqual(['testpkg26/bar.py'], self._import_callbacks_log) + + self.assertEqual(imphook2._import_local.nest_level, 0) + self.assertEmpty(imphook2._import_local.names) + + def testCleanup(self): + cleanup1 = self._Hook('a/b/c.py') + cleanup2 = self._Hook('a/b/c.py') + cleanup3 = self._Hook('a/d/f.py') + cleanup4 = self._Hook('a/d/g.py') + cleanup5 = self._Hook('a/d/c.py') + self.assertLen(imphook2._import_callbacks, 4) + + cleanup1() + self.assertLen(imphook2._import_callbacks, 4) + cleanup2() + self.assertLen(imphook2._import_callbacks, 3) + cleanup3() + self.assertLen(imphook2._import_callbacks, 2) + cleanup4() + self.assertLen(imphook2._import_callbacks, 1) + cleanup5() + self.assertLen(imphook2._import_callbacks, 0) + + def _CreateFile(self, path, content='', rewrite_imports_if_py3=True): + full_path = os.path.join(self._test_package_dir, path) + directory, unused_name = os.path.split(full_path) + + if not os.path.isdir(directory): + os.makedirs(directory) + + def RewriteImport(line): + """Converts import statements to relative form. + + Examples: + import x => from . import x + import x.y.z => from .x.y import z + print('') => print('') + + Args: + line: str, the line to convert. + + Returns: + str, the converted import statement or original line. + """ + original_line_length = len(line) + line = line.lstrip(' ') + indent = ' ' * (original_line_length - len(line)) + if line.startswith('import'): + pkg, _, mod = line.split(' ')[1].rpartition('.') + line = 'from .%s import %s' % (pkg, mod) + return indent + line + + with open(full_path, 'w') as writer: + if six.PY3 and rewrite_imports_if_py3: + content = '\n'.join(RewriteImport(l) for l in content.split('\n')) + writer.write(content) + + return path + + # TODO: add test for the module param in the callback. + def _Hook(self, path, callback=lambda m: None): + cleanup = imphook2.AddImportCallbackBySuffix( + path, + lambda mod: (self._import_callbacks_log.append(path), callback(mod))) + self.assertTrue(cleanup, path) + self._callback_cleanups.append(cleanup) + return cleanup + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/integration_test_disabled.py b/tests/integration_test_disabled.py new file mode 100644 index 0000000..434c5e2 --- /dev/null +++ b/tests/integration_test_disabled.py @@ -0,0 +1,596 @@ +"""Complete tests of the debugger mocking the backend.""" + +# TODO: Get this test to work well all supported versions of python. + +from datetime import datetime +from datetime import timedelta +import functools +import inspect +import itertools +import os +import sys +import time +from unittest import mock + +from googleapiclient import discovery +import googleclouddebugger as cdbg + +from six.moves import queue + +import google.auth +from absl.testing import absltest + +from googleclouddebugger import capture_collector +from googleclouddebugger import labels +import python_test_util + +_TEST_DEBUGGEE_ID = 'gcp:integration-test-debuggee-id' +_TEST_AGENT_ID = 'agent-id-123-abc' +_TEST_PROJECT_ID = 'test-project-id' +_TEST_PROJECT_NUMBER = '123456789' + +# Time to sleep before returning the result of an API call. +# Without a delay, the agent will continuously call ListActiveBreakpoints, +# and the mock object will use a lot of memory to record all the calls. +_REQUEST_DELAY_SECS = 0.01 + + +class IntegrationTest(absltest.TestCase): + """Complete tests of the debugger mocking the backend. + + These tests employ all the components of the debugger. The actual + communication channel with the backend is mocked. This allows the test + quickly inject breakpoints and read results. It also makes the test + standalone and independent of the actual backend. + + Uses the new module search algorithm (b/70226488). + """ + + class FakeHub(object): + """Starts the debugger with a mocked communication channel.""" + + def __init__(self): + # Breakpoint updates posted by the debugger that haven't been processed + # by the test case code. + self._incoming_breakpoint_updates = queue.Queue() + + # Running counter used to generate unique breakpoint IDs. + self._id_counter = itertools.count() + + self._service = mock.Mock() + + patcher = mock.patch.object(discovery, 'build') + self._mock_build = patcher.start() + self._mock_build.return_value = self._service + + patcher = mock.patch.object(google.auth, 'default') + self._default_auth_mock = patcher.start() + self._default_auth_mock.return_value = None, _TEST_PROJECT_ID + + controller = self._service.controller.return_value + debuggees = controller.debuggees.return_value + breakpoints = debuggees.breakpoints.return_value + + # Simulate a time delay for calls to the mock API. + def ReturnWithDelay(val): + def GetVal(): + time.sleep(_REQUEST_DELAY_SECS) + return val + return GetVal + + self._register_execute = debuggees.register.return_value.execute + self._register_execute.side_effect = ReturnWithDelay( + {'debuggee': {'id': _TEST_DEBUGGEE_ID}, 'agentId': _TEST_AGENT_ID}) + + self._active_breakpoints = {'breakpoints': []} + self._list_execute = breakpoints.list.return_value.execute + self._list_execute.side_effect = ReturnWithDelay(self._active_breakpoints) + + breakpoints.update = self._UpdateBreakpoint + + # Start the debugger. + cdbg.enable() + + # Increase the polling rate to speed up the test. + cdbg._hub_client.min_interval_sec = 0.001 # Poll every 1 ms + + def SetBreakpoint(self, tag, template=None): + """Sets a new breakpoint in this source file. + + The line number is identified by tag. The optional template may specify + other breakpoint parameters such as condition and watched expressions. + + Args: + tag: label for a source line. + template: optional breakpoint parameters. + """ + path, line = python_test_util.ResolveTag(sys.modules[__name__], tag) + self.SetBreakpointAtPathLine(path, line, template) + + def SetBreakpointAtFile(self, filename, tag, template=None): + """Sets a breakpoint in a file with the given filename. + + The line number is identified by tag. The optional template may specify + other breakpoint parameters such as condition and watched expressions. + + Args: + filename: the name of the file inside which the tag will be searched. + Must be in the same directory as the current file. + tag: label for a source line. + template: optional breakpoint parameters. + + Raises: + Exception: when the given tag does not uniquely identify a line. + """ + # TODO: Move part of this to python_test_utils.py file. + # Find the full path of filename, using the directory of the current file. + module_path = inspect.getsourcefile(sys.modules[__name__]) + directory, unused_name = os.path.split(module_path) + path = os.path.join(directory, filename) + + # Similar to ResolveTag(), but for a module that's not loaded yet. + tags = python_test_util.GetSourceFileTags(path) + if tag not in tags: + raise Exception('tag %s not found' % tag) + lines = tags[tag] + if len(lines) != 1: + raise Exception('tag %s is ambiguous (lines: %s)' % (tag, lines)) + + self.SetBreakpointAtPathLine(path, lines[0], template) + + def SetBreakpointAtPathLine(self, path, line, template=None): + """Sets a new breakpoint at path:line.""" + breakpoint = { + 'id': 'BP_%d' % next(self._id_counter), + 'createTime': + python_test_util.DateTimeToTimestamp(datetime.utcnow()), + 'location': {'path': path, 'line': line}} + breakpoint.update(template or {}) + + self.SetActiveBreakpoints(self.GetActiveBreakpoints() + [breakpoint]) + + def GetActiveBreakpoints(self): + """Returns current list of active breakpoints.""" + return self._active_breakpoints['breakpoints'] + + def SetActiveBreakpoints(self, breakpoints): + """Sets a new list of active breakpoints. + + Args: + breakpoints: list of breakpoints to return to the debuglet. + """ + self._active_breakpoints['breakpoints'] = breakpoints + begin_count = self._list_execute.call_count + while self._list_execute.call_count < begin_count + 2: + time.sleep(_REQUEST_DELAY_SECS) + + def GetNextResult(self): + """Waits for the next breakpoint update from the debuglet. + + Returns: + First breakpoint update sent by the debuglet that hasn't been + processed yet. + + Raises: + queue.Empty: if waiting for breakpoint update times out. + """ + try: + return self._incoming_breakpoint_updates.get(True, 15) + except queue.Empty: + raise AssertionError('Timed out waiting for breakpoint update') + + def TryGetNextResult(self): + """Returns the first unprocessed breakpoint update from the debuglet. + + Returns: + First breakpoint update sent by the debuglet that hasn't been + processed yet. If no updates are pending, returns None. + """ + try: + return self._incoming_breakpoint_updates.get_nowait() + except queue.Empty: + return None + + def _UpdateBreakpoint(self, **keywords): + """Fake implementation of service.debuggees().breakpoints().update().""" + + class FakeBreakpointUpdateCommand(object): + + def __init__(self, q): + self._breakpoint = keywords['body']['breakpoint'] + self._queue = q + + def execute(self): # pylint: disable=invalid-name + self._queue.put(self._breakpoint) + + return FakeBreakpointUpdateCommand(self._incoming_breakpoint_updates) + +# We only need to attach the debugger exactly once. The IntegrationTest class + # is created for each test case, so we need to keep this state global. + _hub = FakeHub() + + def _FakeLog(self, message, extra=None): + del extra # unused + self._info_log.append(message) + + def setUp(self): + self._info_log = [] + capture_collector.log_info_message = self._FakeLog + + def tearDown(self): + IntegrationTest._hub.SetActiveBreakpoints([]) + + while True: + breakpoint = IntegrationTest._hub.TryGetNextResult() + if breakpoint is None: + break + self.fail('Unexpected incoming breakpoint update: %s' % breakpoint) + + def testBackCompat(self): + # Verify that the old AttachDebugger() is the same as enable() + self.assertEqual(cdbg.enable, cdbg.AttachDebugger) + + def testBasic(self): + def Trigger(): + print('Breakpoint trigger') # BPTAG: BASIC + + IntegrationTest._hub.SetBreakpoint('BASIC') + Trigger() + result = IntegrationTest._hub.GetNextResult() + self.assertEqual('Trigger', result['stackFrames'][0]['function']) + self.assertEqual('IntegrationTest.testBasic', + result['stackFrames'][1]['function']) + + # Verify that any pre existing labels present in the breakpoint are preserved + # by the agent. + def testExistingLabelsSurvive(self): + def Trigger(): + print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_SURVIVE + + IntegrationTest._hub.SetBreakpoint( + 'EXISTING_LABELS_SURVIVE', + {'labels': {'label_1': 'value_1', 'label_2': 'value_2'}}) + Trigger() + result = IntegrationTest._hub.GetNextResult() + self.assertIn('labels', result.keys()) + self.assertIn('label_1', result['labels']) + self.assertIn('label_2', result['labels']) + self.assertEqual('value_1', result['labels']['label_1']) + self.assertEqual('value_2', result['labels']['label_2']) + + # Verify that any pre existing labels present in the breakpoint have priority + # if they 'collide' with labels in the agent. + def testExistingLabelsPriority(self): + def Trigger(): + print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_PRIORITY + + current_labels_collector = capture_collector.breakpoint_labels_collector + capture_collector.breakpoint_labels_collector = \ + lambda: {'label_1': 'value_1', 'label_2': 'value_2'} + + IntegrationTest._hub.SetBreakpoint( + 'EXISTING_LABELS_PRIORITY', + {'labels': {'label_1': 'value_foobar', 'label_3': 'value_3'}}) + + Trigger() + + capture_collector.breakpoint_labels_collector = current_labels_collector + + # In this case, label_1 was in both the agent and the pre existing labels, + # the pre existing value of value_foobar should be preserved. + result = IntegrationTest._hub.GetNextResult() + self.assertIn('labels', result.keys()) + self.assertIn('label_1', result['labels']) + self.assertIn('label_2', result['labels']) + self.assertIn('label_3', result['labels']) + self.assertEqual('value_foobar', result['labels']['label_1']) + self.assertEqual('value_2', result['labels']['label_2']) + self.assertEqual('value_3', result['labels']['label_3']) + + def testRequestLogIdLabel(self): + def Trigger(): + print('Breakpoint trigger req id label') # BPTAG: REQUEST_LOG_ID_LABEL + + current_request_log_id_collector = \ + capture_collector.request_log_id_collector + capture_collector.request_log_id_collector = lambda: 'foo_bar_id' + + IntegrationTest._hub.SetBreakpoint('REQUEST_LOG_ID_LABEL') + + Trigger() + + capture_collector.request_log_id_collector = \ + current_request_log_id_collector + + result = IntegrationTest._hub.GetNextResult() + self.assertIn('labels', result.keys()) + self.assertIn(labels.Breakpoint.REQUEST_LOG_ID, result['labels']) + self.assertEqual( + 'foo_bar_id', result['labels'][labels.Breakpoint.REQUEST_LOG_ID]) + + # Tests the issue in b/30876465 + def testSameLine(self): + def Trigger(): + print('Breakpoint trigger same line') # BPTAG: SAME_LINE + + num_breakpoints = 5 + _, line = python_test_util.ResolveTag(sys.modules[__name__], 'SAME_LINE') + for _ in range(0, num_breakpoints): + IntegrationTest._hub.SetBreakpoint('SAME_LINE') + Trigger() + results = [] + for _ in range(0, num_breakpoints): + results.append(IntegrationTest._hub.GetNextResult()) + lines = [result['stackFrames'][0]['location']['line'] for result in results] + self.assertListEqual(lines, [line] * num_breakpoints) + + def testCallStack(self): + def Method1(): + Method2() + + def Method2(): + Method3() + + def Method3(): + Method4() + + def Method4(): + Method5() + + def Method5(): + return 0 # BPTAG: CALL_STACK + + IntegrationTest._hub.SetBreakpoint('CALL_STACK') + Method1() + result = IntegrationTest._hub.GetNextResult() + self.assertEqual( + ['Method5', + 'Method4', + 'Method3', + 'Method2', + 'Method1', + 'IntegrationTest.testCallStack'], + [frame['function'] for frame in result['stackFrames']][:6]) + + def testInnerMethod(self): + def Inner1(): + def Inner2(): + def Inner3(): + print('Inner3') # BPTAG: INNER3 + Inner3() + Inner2() + + IntegrationTest._hub.SetBreakpoint('INNER3') + Inner1() + result = IntegrationTest._hub.GetNextResult() + self.assertEqual('Inner3', result['stackFrames'][0]['function']) + + def testClassMethodWithDecorator(self): + + def MyDecorator(handler): + + def Caller(self): + return handler(self) + + return Caller + + class BaseClass(object): + pass + + class MyClass(BaseClass): + + @MyDecorator + def Get(self): + param = {} # BPTAG: METHOD_WITH_DECORATOR + return str(param) + + IntegrationTest._hub.SetBreakpoint('METHOD_WITH_DECORATOR') + self.assertEqual('{}', MyClass().Get()) + result = IntegrationTest._hub.GetNextResult() + self.assertEqual('MyClass.Get', result['stackFrames'][0]['function']) + self.assertEqual('MyClass.Caller', result['stackFrames'][1]['function']) + self.assertEqual( + {'name': 'self', + 'type': __name__ + '.MyClass', + 'members': [ + {'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': {'format': 'Object has no fields'}}}]}, + python_test_util.PackFrameVariable(result, 'self', + collection='arguments')) + + def testGlobalDecorator(self): + IntegrationTest._hub.SetBreakpoint('WRAPPED_GLOBAL_METHOD') + self.assertEqual('hello', WrappedGlobalMethod()) + result = IntegrationTest._hub.GetNextResult() + + self.assertNotIn('status', result) + + def testNoLambdaExpression(self): + def Trigger(): + cube = lambda x: x**3 # BPTAG: LAMBDA + cube(18) + + num_breakpoints = 5 + for _ in range(0, num_breakpoints): + IntegrationTest._hub.SetBreakpoint('LAMBDA') + Trigger() + results = [] + for _ in range(0, num_breakpoints): + results.append(IntegrationTest._hub.GetNextResult()) + functions = [result['stackFrames'][0]['function'] for result in results] + self.assertListEqual(functions, ['Trigger'] * num_breakpoints) + + def testNoGeneratorExpression(self): + def Trigger(): + gen = (i for i in range(0, 5)) # BPTAG: GENEXPR + next(gen) + next(gen) + next(gen) + next(gen) + next(gen) + + num_breakpoints = 1 + for _ in range(0, num_breakpoints): + IntegrationTest._hub.SetBreakpoint('GENEXPR') + Trigger() + results = [] + for _ in range(0, num_breakpoints): + results.append(IntegrationTest._hub.GetNextResult()) + functions = [result['stackFrames'][0]['function'] for result in results] + self.assertListEqual(functions, ['Trigger'] * num_breakpoints) + + def testTryBlock(self): + def Method(a): + try: + return a * a # BPTAG: TRY_BLOCK + except Exception as unused_e: # pylint: disable=broad-except + return a + + IntegrationTest._hub.SetBreakpoint('TRY_BLOCK') + Method(11) + result = IntegrationTest._hub.GetNextResult() + self.assertEqual('Method', result['stackFrames'][0]['function']) + self.assertEqual( + [{'name': 'a', 'value': '11', 'type': 'int'}], + result['stackFrames'][0]['arguments']) + + def testFrameArguments(self): + def Method(a, b): + return a + str(b) # BPTAG: FRAME_ARGUMENTS + IntegrationTest._hub.SetBreakpoint('FRAME_ARGUMENTS') + Method('hello', 87) + result = IntegrationTest._hub.GetNextResult() + self.assertEqual( + [{'name': 'a', 'value': "'hello'", 'type': 'str'}, + {'name': 'b', 'value': '87', 'type': 'int'}], + result['stackFrames'][0]['arguments']) + self.assertEqual('self', result['stackFrames'][1]['arguments'][0]['name']) + + def testFrameLocals(self): + class Number(object): + + def __init__(self): + self.n = 57 + + def Method(a): + b = a ** 2 + c = str(a) * 3 + return c + str(b) # BPTAG: FRAME_LOCALS + IntegrationTest._hub.SetBreakpoint('FRAME_LOCALS') + x = {'a': 1, 'b': Number()} + Method(8) + result = IntegrationTest._hub.GetNextResult() + self.assertEqual( + {'name': 'b', 'value': '64', 'type': 'int'}, + python_test_util.PackFrameVariable(result, 'b')) + self.assertEqual( + {'name': 'c', 'value': "'888'", 'type': 'str'}, + python_test_util.PackFrameVariable(result, 'c')) + self.assertEqual( + {'name': 'x', + 'type': 'dict', + 'members': [{'name': "'a'", 'value': '1', 'type': 'int'}, + {'name': "'b'", + 'type': __name__ + '.Number', + 'members': [{'name': 'n', + 'value': '57', + 'type': 'int'}]}]}, + python_test_util.PackFrameVariable(result, 'x', frame=1)) + return x + + def testRecursion(self): + def RecursiveMethod(i): + if i == 0: + return 0 # BPTAG: RECURSION + return RecursiveMethod(i - 1) + + IntegrationTest._hub.SetBreakpoint('RECURSION') + RecursiveMethod(5) + result = IntegrationTest._hub.GetNextResult() + + for frame in range(5): + self.assertEqual( + {'name': 'i', 'value': str(frame), 'type': 'int'}, + python_test_util.PackFrameVariable(result, 'i', frame, 'arguments')) + + def testWatchedExpressions(self): + def Trigger(): + + class MyClass(object): + + def __init__(self): + self.a = 1 + self.b = 'bbb' + + unused_my = MyClass() + print('Breakpoint trigger') # BPTAG: WATCHED_EXPRESSION + + IntegrationTest._hub.SetBreakpoint( + 'WATCHED_EXPRESSION', + {'expressions': ['unused_my']}) + Trigger() + result = IntegrationTest._hub.GetNextResult() + + self.assertEqual( + {'name': 'unused_my', + 'type': __name__ + '.MyClass', + 'members': [{'name': 'a', 'value': '1', 'type': 'int'}, + {'name': 'b', 'value': "'bbb'", 'type': 'str'}]}, + python_test_util.PackWatchedExpression(result, 0)) + + def testBreakpointExpiration(self): # BPTAG: BREAKPOINT_EXPIRATION + created_time = datetime.utcnow() - timedelta(hours=25) + IntegrationTest._hub.SetBreakpoint( + 'BREAKPOINT_EXPIRATION', + {'createTime': python_test_util.DateTimeToTimestamp(created_time)}) + result = IntegrationTest._hub.GetNextResult() + + self.assertTrue(result['status']['isError']) + + def testLogAction(self): + def Trigger(): + for i in range(3): + print('Log me %d' % i) # BPTAG: LOG + + IntegrationTest._hub.SetBreakpoint( + 'LOG', + {'action': 'LOG', + 'logLevel': 'INFO', + 'logMessageFormat': 'hello $0', + 'expressions': ['i']}) + Trigger() + self.assertListEqual(['LOGPOINT: hello 0', 'LOGPOINT: hello 1', + 'LOGPOINT: hello 2'], self._info_log) + + def testDeferred(self): + def Trigger(): + import integration_test_helper # pylint: disable=g-import-not-at-top + integration_test_helper.Trigger() + + IntegrationTest._hub.SetBreakpointAtFile( + 'integration_test_helper.py', + 'DEFERRED') + + Trigger() + result = IntegrationTest._hub.GetNextResult() + self.assertEqual('Trigger', result['stackFrames'][0]['function']) + self.assertEqual('Trigger', result['stackFrames'][1]['function']) + self.assertEqual('IntegrationTest.testDeferred', + result['stackFrames'][2]['function']) + + +def MyGlobalDecorator(fn): + @functools.wraps(fn) + def Wrapper(*args, **kwargs): + return fn(*args, **kwargs) + return Wrapper + + +@MyGlobalDecorator +def WrappedGlobalMethod(): + return 'hello' # BPTAG: WRAPPED_GLOBAL_METHOD + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/integration_test_helper.py b/tests/integration_test_helper.py new file mode 100644 index 0000000..5a7e04b --- /dev/null +++ b/tests/integration_test_helper.py @@ -0,0 +1,5 @@ +"""Helper module for integration test to validate deferred breakpoints.""" + + +def Trigger(): + print('bp trigger') # BPTAG: DEFERRED diff --git a/tests/labels_test.py b/tests/labels_test.py new file mode 100644 index 0000000..b7b01dd --- /dev/null +++ b/tests/labels_test.py @@ -0,0 +1,29 @@ +"""Tests for googleclouddebugger.labels""" + +from absl.testing import absltest +from googleclouddebugger import labels + + +class LabelsTest(absltest.TestCase): + + def testDefinesLabelsCorrectly(self): + self.assertEqual(labels.Breakpoint.REQUEST_LOG_ID, 'requestlogid') + + self.assertEqual(labels.Debuggee.DOMAIN, 'domain') + self.assertEqual(labels.Debuggee.PROJECT_ID, 'projectid') + self.assertEqual(labels.Debuggee.MODULE, 'module') + self.assertEqual(labels.Debuggee.VERSION, 'version') + self.assertEqual(labels.Debuggee.MINOR_VERSION, 'minorversion') + self.assertEqual(labels.Debuggee.PLATFORM, 'platform') + self.assertEqual(labels.Debuggee.REGION, 'region') + + def testProvidesAllLabelsSet(self): + self.assertIsNotNone(labels.Breakpoint.SET_ALL) + self.assertLen(labels.Breakpoint.SET_ALL, 1) + + self.assertIsNotNone(labels.Debuggee.SET_ALL) + self.assertLen(labels.Debuggee.SET_ALL, 7) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/module_explorer_test_disabled.py b/tests/module_explorer_test_disabled.py new file mode 100644 index 0000000..e451a79 --- /dev/null +++ b/tests/module_explorer_test_disabled.py @@ -0,0 +1,319 @@ +"""Unit test for module_explorer module.""" + +# TODO: Get this test to run properly on all supported versions of Python + +import dis +import inspect +import os +import py_compile +import shutil +import sys +import tempfile + +import six +from absl.testing import absltest + +from googleclouddebugger import module_explorer +import python_test_util + + +class ModuleExplorerTest(absltest.TestCase): + """Unit test for module_explorer module.""" + + def setUp(self): + self._module = sys.modules[__name__] + self._code_objects = module_explorer._GetModuleCodeObjects(self._module) + + # Populate line cache for this module (neeed .par test). + inspect.getsourcelines(self.testCodeObjectAtLine) + + def testGlobalMethod(self): + """Verify that global method is found.""" + self.assertIn(six.get_function_code(_GlobalMethod), self._code_objects) + + def testInnerMethodOfGlobalMethod(self): + """Verify that inner method defined in a global method is found.""" + self.assertIn(_GlobalMethod(), self._code_objects) + + def testInstanceClassMethod(self): + """Verify that instance class method is found.""" + self.assertIn( + six.get_function_code(self.testInstanceClassMethod), self._code_objects) + + def testInnerMethodOfInstanceClassMethod(self): + """Verify that inner method defined in a class instance method is found.""" + + def InnerMethod(): + pass + + self.assertIn(six.get_function_code(InnerMethod), self._code_objects) + + def testStaticMethod(self): + """Verify that static class method is found.""" + self.assertIn( + six.get_function_code(ModuleExplorerTest._StaticMethod), + self._code_objects) + + def testInnerMethodOfStaticMethod(self): + """Verify that static class method is found.""" + self.assertIn(ModuleExplorerTest._StaticMethod(), self._code_objects) + + def testNonModuleClassMethod(self): + """Verify that instance method defined in a base class is not added.""" + self.assertNotIn(six.get_function_code(self.assertTrue), self._code_objects) + + def testDeepInnerMethod(self): + """Verify that inner of inner of inner, etc. method is found.""" + def Inner1(): + def Inner2(): + def Inner3(): + def Inner4(): + def Inner5(): + pass + return six.get_function_code(Inner5) + return Inner4() + return Inner3() + return Inner2() + + self.assertIn(Inner1(), self._code_objects) + + def testNoLambdaExpression(self): + """Verify that code of lambda expression is not included.""" + + self.assertNotIn(_MethodWithLambdaExpression(), self._code_objects) + + def testNoGeneratorExpression(self): + """Verify that code of generator expression is not included.""" + + self.assertNotIn(_MethodWithGeneratorExpression(), self._code_objects) + + def testMethodOfInnerClass(self): + """Verify that method of inner class is found.""" + + class InnerClass(object): + + def InnerClassMethod(self): + pass + + self.assertIn( + six.get_function_code(InnerClass().InnerClassMethod), + self._code_objects) + + def testMethodOfInnerOldStyleClass(self): + """Verify that method of inner old style class is found.""" + + class InnerClass(): + + def InnerClassMethod(self): + pass + + self.assertIn( + six.get_function_code(InnerClass().InnerClassMethod), + self._code_objects) + + def testGlobalMethodWithClosureDecorator(self): + co = self._GetCodeObjectAtLine(self._module, + 'GLOBAL_METHOD_WITH_CLOSURE_DECORATOR') + self.assertTrue(co) + self.assertEqual('GlobalMethodWithClosureDecorator', co.co_name) + + def testClassMethodWithClosureDecorator(self): + co = self._GetCodeObjectAtLine(self._module, + 'GLOBAL_CLASS_METHOD_WITH_CLOSURE_DECORATOR') + self.assertTrue(co) + self.assertEqual('FnWithClosureDecorator', co.co_name) + + def testGlobalMethodWithClassDecorator(self): + co = self._GetCodeObjectAtLine(self._module, + 'GLOBAL_METHOD_WITH_CLASS_DECORATOR') + self.assertTrue(co) + self.assertEqual('GlobalMethodWithClassDecorator', co.co_name) + + def testClassMethodWithClassDecorator(self): + co = self._GetCodeObjectAtLine(self._module, + 'GLOBAL_CLASS_METHOD_WITH_CLASS_DECORATOR') + self.assertTrue(co) + self.assertEqual('FnWithClassDecorator', co.co_name) + + def testSameFileName(self): + """Verify that all found code objects are defined in the same file.""" + path = next(iter(self._code_objects)).co_filename + self.assertTrue(path) + + for code_object in self._code_objects: + self.assertEqual(path, code_object.co_filename) + + def testCodeObjectAtLine(self): + """Verify that query of code object at a specified source line.""" + test_cases = [ + (six.get_function_code(self.testCodeObjectAtLine), + 'TEST_CODE_OBJECT_AT_ASSERT'), + (ModuleExplorerTest._StaticMethod(), 'INNER_OF_STATIC_METHOD'), + (_GlobalMethod(), 'INNER_OF_GLOBAL_METHOD')] + + for code_object, tag in test_cases: + self.assertEqual( # BPTAG: TEST_CODE_OBJECT_AT_ASSERT + code_object, + self._GetCodeObjectAtLine(code_object, tag)) + + def testCodeObjectWithoutModule(self): + """Verify no crash/hang when module has no file name.""" + global global_code_object # pylint: disable=global-variable-undefined + global_code_object = compile('2+3', '', 'exec') + + self.assertFalse( + module_explorer.GetCodeObjectAtLine(self._module, 111111)[0]) + +# TODO: Re-enable this test, without hardcoding a python version into it. +# def testCodeExtensionMismatch(self): +# """Verify module match when code object points to .py and module to .pyc.""" +# test_dir = tempfile.mkdtemp('', 'module_explorer_') +# sys.path.append(test_dir) +# try: +# # Create and compile module, remove the .py file and leave the .pyc file. +# module_path = os.path.join(test_dir, 'module.py') +# with open(module_path, 'w') as f: +# f.write('def f():\n pass') +# py_compile.compile(module_path) +# if six.PY3: +# module_pyc_path = os.path.join(test_dir, '__pycache__', +# 'module.cpython-37.pyc') +# os.rename(module_pyc_path, module_path + 'c') +# os.remove(module_path) +# +# import module # pylint: disable=g-import-not-at-top +# self.assertEqual('.py', +# os.path.splitext( +# six.get_function_code(module.f).co_filename)[1]) +# self.assertEqual('.pyc', os.path.splitext(module.__file__)[1]) +# +# func_code = six.get_function_code(module.f) +# self.assertEqual(func_code, +# module_explorer.GetCodeObjectAtLine( +# module, +# next(dis.findlinestarts(func_code))[1])[1]) +# finally: +# sys.path.remove(test_dir) +# shutil.rmtree(test_dir) + + def testMaxVisitObjects(self): + default_quota = module_explorer._MAX_VISIT_OBJECTS + try: + module_explorer._MAX_VISIT_OBJECTS = 10 + self.assertLess( + len(module_explorer._GetModuleCodeObjects(self._module)), + len(self._code_objects)) + finally: + module_explorer._MAX_VISIT_OBJECTS = default_quota + + def testMaxReferentsBfsDepth(self): + default_quota = module_explorer._MAX_REFERENTS_BFS_DEPTH + try: + module_explorer._MAX_REFERENTS_BFS_DEPTH = 1 + self.assertLess( + len(module_explorer._GetModuleCodeObjects(self._module)), + len(self._code_objects)) + finally: + module_explorer._MAX_REFERENTS_BFS_DEPTH = default_quota + + def testMaxObjectReferents(self): + class A(object): + pass + + default_quota = module_explorer._MAX_VISIT_OBJECTS + default_referents_quota = module_explorer._MAX_OBJECT_REFERENTS + try: + global large_dict + large_dict = {A(): 0 for i in range(0, 5000)} + + # First test with a referents limit too large, it will visit large_dict + # and exhaust the _MAX_VISIT_OBJECTS quota before finding all the code + # objects + module_explorer._MAX_VISIT_OBJECTS = 5000 + module_explorer._MAX_OBJECT_REFERENTS = sys.maxsize + self.assertLess( + len(module_explorer._GetModuleCodeObjects(self._module)), + len(self._code_objects)) + + # Now test with a referents limit that prevents large_dict from being + # explored, all the code objects should be found now that the large dict + # is skipped and isn't taking up the _MAX_VISIT_OBJECTS quota + module_explorer._MAX_OBJECT_REFERENTS = default_referents_quota + self.assertItemsEqual( + module_explorer._GetModuleCodeObjects(self._module), + self._code_objects) + finally: + module_explorer._MAX_VISIT_OBJECTS = default_quota + module_explorer._MAX_OBJECT_REFERENTS = default_referents_quota + large_dict = None + + @staticmethod + def _StaticMethod(): + def InnerMethod(): + pass # BPTAG: INNER_OF_STATIC_METHOD + + return six.get_function_code(InnerMethod) + + def _GetCodeObjectAtLine(self, fn, tag): + """Wrapper over GetCodeObjectAtLine for tags in this module.""" + unused_path, line = python_test_util.ResolveTag(fn, tag) + return module_explorer.GetCodeObjectAtLine(self._module, line)[1] + + +def _GlobalMethod(): + def InnerMethod(): + pass # BPTAG: INNER_OF_GLOBAL_METHOD + + return six.get_function_code(InnerMethod) + + +def ClosureDecorator(handler): + def Caller(*args): + return handler(*args) + + return Caller + + +class ClassDecorator(object): + + def __init__(self, fn): + self._fn = fn + + def __call__(self, *args): + return self._fn(*args) + + +@ClosureDecorator +def GlobalMethodWithClosureDecorator(): + return True # BPTAG: GLOBAL_METHOD_WITH_CLOSURE_DECORATOR + + +@ClassDecorator +def GlobalMethodWithClassDecorator(): + return True # BPTAG: GLOBAL_METHOD_WITH_CLASS_DECORATOR + + +class GlobalClass(object): + + @ClosureDecorator + def FnWithClosureDecorator(self): + return True # BPTAG: GLOBAL_CLASS_METHOD_WITH_CLOSURE_DECORATOR + + @ClassDecorator + def FnWithClassDecorator(self): + return True # BPTAG: GLOBAL_CLASS_METHOD_WITH_CLASS_DECORATOR + + +def _MethodWithLambdaExpression(): + return six.get_function_code(lambda x: x**3) + + +def _MethodWithGeneratorExpression(): + return (i for i in range(0, 2)).gi_code + +# Used for testMaxObjectReferents, need to be in global scope or else the module +# explorer would not explore this +large_dict = None + +if __name__ == '__main__': + absltest.main() diff --git a/tests/module_search2_test.py b/tests/module_search2_test.py new file mode 100644 index 0000000..51bbc99 --- /dev/null +++ b/tests/module_search2_test.py @@ -0,0 +1,127 @@ +"""Unit test for module_search2 module.""" + +import os +import sys +import tempfile + +from absl.testing import absltest + +from googleclouddebugger import module_search2 + + +# TODO: Add tests for whitespace in location path including in, +# extension, basename, path +class SearchModulesTest(absltest.TestCase): + + def setUp(self): + self._test_package_dir = tempfile.mkdtemp('', 'package_') + sys.path.append(self._test_package_dir) + + def tearDown(self): + sys.path.remove(self._test_package_dir) + + def testSearchValidSourcePath(self): + # These modules are on the sys.path. + self.assertEndsWith( + module_search2.Search( + 'googleclouddebugger/module_search2.py'), + '/site-packages/googleclouddebugger/module_search2.py') + + # inspect and dis are libraries with no real file. So, we + # can no longer match them by file path. + + def testSearchInvalidSourcePath(self): + # This is an invalid module that doesn't exist anywhere. + self.assertEqual(module_search2.Search('aaaaa.py'), 'aaaaa.py') + + # This module exists, but the search input is missing the outer package + # name. + self.assertEqual( + module_search2.Search('absltest.py'), + 'absltest.py') + + def testSearchInvalidExtension(self): + # Test that the module rejects invalid extension in the input. + with self.assertRaises(AssertionError): + module_search2.Search('module_search2.x') + + def testSearchPathStartsWithSep(self): + # Test that module rejects invalid leading os.sep char in the input. + with self.assertRaises(AssertionError): + module_search2.Search('/module_search2') + + def testSearchRelativeSysPath(self): + # An entry in sys.path is in relative form, and represents the same + # directory as as another absolute entry in sys.path. + for directory in ['', 'a', 'a/b']: + self._CreateFile(os.path.join(directory, '__init__.py')) + self._CreateFile('a/b/first.py') + + try: + # Inject a relative path into sys.path that refers to a directory already + # in sys.path. It should produce the same result as the non-relative form. + testdir_alias = os.path.join(self._test_package_dir, 'a/../a') + + # Add 'a/../a' to sys.path so that 'b/first.py' is reachable. + sys.path.insert(0, testdir_alias) + + # Returned result should have a successful file match and relative + # paths should be kept as-is. + result = module_search2.Search('b/first.py') + self.assertEndsWith(result, 'a/../a/b/first.py') + + finally: + sys.path.remove(testdir_alias) + + def testSearchSymLinkInSysPath(self): + # An entry in sys.path is a symlink. + for directory in ['', 'a', 'a/b']: + self._CreateFile(os.path.join(directory, '__init__.py'), '') + self._CreateFile('a/b/first.py') + self._CreateSymLink('a', 'link') + + try: + # Add 'link/' to sys.path so that 'b/first.py' is reachable. + sys.path.append(os.path.join(self._test_package_dir, 'link')) + + # Returned result should have a successful file match and symbolic + # links should be kept. + self.assertEndsWith( + module_search2.Search('b/first.py'), + 'link/b/first.py') + finally: + sys.path.remove(os.path.join(self._test_package_dir, 'link')) + + def _CreateFile(self, path, contents='assert False "Unexpected import"\n'): + full_path = os.path.join(self._test_package_dir, path) + directory, unused_name = os.path.split(full_path) + + if not os.path.isdir(directory): + os.makedirs(directory) + + with open(full_path, 'w') as writer: + writer.write(contents) + + return path + + def _CreateSymLink(self, source, link_name): + full_source_path = os.path.join(self._test_package_dir, source) + full_link_path = os.path.join(self._test_package_dir, link_name) + os.symlink(full_source_path, full_link_path) + + # Since we cannot use os.path.samefile or os.path.realpath to eliminate + # symlinks reliably, we only check suffix equivalence of file paths in these + # unit tests. + def _AssertEndsWith(self, match, path): + """Asserts exactly one match ending with path.""" + self.assertLen(match, 1) + self.assertEndsWith(match[0], path) + + def _AssertEqFile(self, match, path): + """Asserts exactly one match equals to the file created with _CreateFile.""" + self.assertLen(match, 1) + self.assertEqual(match[0], os.path.join(self._test_package_dir, path)) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/module_utils2_test.py b/tests/module_utils2_test.py new file mode 100644 index 0000000..82110ea --- /dev/null +++ b/tests/module_utils2_test.py @@ -0,0 +1,177 @@ +"""Tests for googleclouddebugger.module_utils2.""" + +import os +import sys +import tempfile + +from absl.testing import absltest + +from googleclouddebugger import module_utils2 + + +class TestModule(object): + """Dummy class with __name__ and __file__ attributes.""" + + def __init__(self, name, path): + self.__name__ = name + self.__file__ = path + + +def _AddSysModule(name, path): + sys.modules[name] = TestModule(name, path) + + +class ModuleUtilsTest(absltest.TestCase): + + def setUp(self): + self._test_package_dir = tempfile.mkdtemp('', 'package_') + self.modules = sys.modules.copy() + + def tearDown(self): + sys.modules = self.modules + self.modules = None + + def _CreateFile(self, path): + full_path = os.path.join(self._test_package_dir, path) + directory, unused_name = os.path.split(full_path) + + if not os.path.isdir(directory): + os.makedirs(directory) + + with open(full_path, 'w') as writer: + writer.write('') + + return full_path + + def _CreateSymLink(self, source, link_name): + full_source_path = os.path.join(self._test_package_dir, source) + full_link_path = os.path.join(self._test_package_dir, link_name) + os.symlink(full_source_path, full_link_path) + return full_link_path + + def _AssertEndsWith(self, a, b, msg=None): + """Assert that string a ends with string b.""" + if not a.endswith(b): + standard_msg = '%s does not end with %s' % (a, b) + self.fail(self._formatMessage(msg, standard_msg)) + + def testSimpleLoadedModuleFromSuffix(self): + # Lookup simple module. + _AddSysModule('m1', '/a/b/p1/m1.pyc') + for suffix in [ + 'm1.py', + 'm1.pyc', + 'm1.pyo', + 'p1/m1.py', + 'b/p1/m1.py', + 'a/b/p1/m1.py', + '/a/b/p1/m1.py']: + m1 = module_utils2.GetLoadedModuleBySuffix(suffix) + self.assertTrue(m1, 'Module not found') + self.assertEqual('/a/b/p1/m1.pyc', m1.__file__) + + # Lookup simple package, no ext. + _AddSysModule('p1', '/a/b/p1/__init__.pyc') + for suffix in [ + 'p1/__init__.py', + 'b/p1/__init__.py', + 'a/b/p1/__init__.py', + '/a/b/p1/__init__.py']: + p1 = module_utils2.GetLoadedModuleBySuffix(suffix) + self.assertTrue(p1, 'Package not found') + self.assertEqual('/a/b/p1/__init__.pyc', p1.__file__) + + # Lookup via bad suffix. + for suffix in [ + 'm2.py', + 'p2/m1.py', + 'b2/p1/m1.py', + 'a2/b/p1/m1.py', + '/a2/b/p1/m1.py']: + m1 = module_utils2.GetLoadedModuleBySuffix(suffix) + self.assertFalse(m1, 'Module found unexpectedly') + + def testComplexLoadedModuleFromSuffix(self): + # Lookup complex module. + _AddSysModule('b.p1.m1', '/a/b/p1/m1.pyc') + for suffix in [ + 'm1.py', + 'p1/m1.py', + 'b/p1/m1.py', + 'a/b/p1/m1.py', + '/a/b/p1/m1.py']: + m1 = module_utils2.GetLoadedModuleBySuffix(suffix) + self.assertTrue(m1, 'Module not found') + self.assertEqual('/a/b/p1/m1.pyc', m1.__file__) + + # Lookup complex package, no ext. + _AddSysModule('a.b.p1', '/a/b/p1/__init__.pyc') + for suffix in [ + 'p1/__init__.py', + 'b/p1/__init__.py', + 'a/b/p1/__init__.py', + '/a/b/p1/__init__.py']: + p1 = module_utils2.GetLoadedModuleBySuffix(suffix) + self.assertTrue(p1, 'Package not found') + self.assertEqual('/a/b/p1/__init__.pyc', p1.__file__) + + def testSimilarLoadedModuleFromSuffix(self): + # Lookup similar module, no ext. + _AddSysModule('m1', '/a/b/p2/m1.pyc') + _AddSysModule('p1.m1', '/a/b1/p1/m1.pyc') + _AddSysModule('b.p1.m1', '/a1/b/p1/m1.pyc') + _AddSysModule('a.b.p1.m1', '/a/b/p1/m1.pyc') + + m1 = module_utils2.GetLoadedModuleBySuffix('/a/b/p1/m1.py') + self.assertTrue(m1, 'Module not found') + self.assertEqual('/a/b/p1/m1.pyc', m1.__file__) + + # Lookup similar package, no ext. + _AddSysModule('p1', '/a1/b1/p1/__init__.pyc') + _AddSysModule('b.p1', '/a1/b/p1/__init__.pyc') + _AddSysModule('a.b.p1', '/a/b/p1/__init__.pyc') + p1 = module_utils2.GetLoadedModuleBySuffix('/a/b/p1/__init__.py') + self.assertTrue(p1, 'Package not found') + self.assertEqual('/a/b/p1/__init__.pyc', p1.__file__) + + def testDuplicateLoadedModuleFromSuffix(self): + # Lookup name dup module and package. + _AddSysModule('m1', '/m1/__init__.pyc') + _AddSysModule('m1.m1', '/m1/m1.pyc') + _AddSysModule('m1.m1.m1', '/m1/m1/m1/__init__.pyc') + _AddSysModule('m1.m1.m1.m1', '/m1/m1/m1/m1.pyc') + + # Ambiguous request, multiple modules might have matched. + m1 = module_utils2.GetLoadedModuleBySuffix('/m1/__init__.py') + self.assertTrue(m1, 'Package not found') + self.assertIn( + m1.__file__, + ['/m1/__init__.pyc', '/m1/m1/m1/__init__.pyc']) + + # Ambiguous request, multiple modules might have matched. + m1m1 = module_utils2.GetLoadedModuleBySuffix('/m1/m1.py') + self.assertTrue(m1m1, 'Module not found') + self.assertIn( + m1m1.__file__, + ['/m1/m1.pyc', '/m1/m1/m1/m1.pyc']) + + # Not ambiguous. Only 1 match possible. + m1m1m1 = module_utils2.GetLoadedModuleBySuffix('/m1/m1/m1/__init__.py') + self.assertTrue(m1m1m1, 'Package not found') + self.assertEqual('/m1/m1/m1/__init__.pyc', m1m1m1.__file__) + + # Not ambiguous. Only 1 match possible. + m1m1m1m1 = module_utils2.GetLoadedModuleBySuffix('/m1/m1/m1/m1.py') + self.assertTrue(m1m1m1m1, 'Module not found') + self.assertEqual('/m1/m1/m1/m1.pyc', m1m1m1m1.__file__) + + def testMainLoadedModuleFromSuffix(self): + # Lookup complex module. + _AddSysModule('__main__', '/a/b/p/m.pyc') + m1 = module_utils2.GetLoadedModuleBySuffix('/a/b/p/m.py') + self.assertTrue(m1, 'Module not found') + self.assertEqual('/a/b/p/m.pyc', m1.__file__) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/native_module_test.py b/tests/native_module_test.py new file mode 100644 index 0000000..d235d7f --- /dev/null +++ b/tests/native_module_test.py @@ -0,0 +1,302 @@ +"""Unit tests for native module.""" + +import inspect +import sys +import threading +import time + +import six + +from absl.testing import absltest + +from googleclouddebugger import cdbg_native as native +import python_test_util + + +def _DoHardWork(base): + for i in range(base): + if base * i < 0: + return True + return False + + +class NativeModuleTest(absltest.TestCase): + """Unit tests for native module.""" + + def setUp(self): + # Lock for thread safety. + self._lock = threading.Lock() + + # Count hit count for the breakpoints we set. + self._breakpoint_counter = 0 + + # Registers breakpoint events other than breakpoint hit. + self._breakpoint_events = [] + + # Keep track of breakpoints we set to reset them on cleanup. + self._cookies = [] + + def tearDown(self): + # Verify that we didn't get any breakpoint events that the test did + # not expect. + self.assertEqual([], self._PopBreakpointEvents()) + + self._ClearAllBreakpoints() + + def testUnconditionalBreakpoint(self): + def Trigger(): + unused_lock = threading.Lock() + print('Breakpoint trigger') # BPTAG: UNCONDITIONAL_BREAKPOINT + + self._SetBreakpoint(Trigger, 'UNCONDITIONAL_BREAKPOINT') + Trigger() + self.assertEqual(1, self._breakpoint_counter) + + def testConditionalBreakpoint(self): + def Trigger(): + d = {} + for i in range(1, 10): + d[i] = i**2 # BPTAG: CONDITIONAL_BREAKPOINT + + self._SetBreakpoint(Trigger, 'CONDITIONAL_BREAKPOINT', 'i % 3 == 1') + Trigger() + self.assertEqual(3, self._breakpoint_counter) + + def testClearBreakpoint(self): + """Set two breakpoint on the same line, then clear one.""" + + def Trigger(): + print('Breakpoint trigger') # BPTAG: CLEAR_BREAKPOINT + + self._SetBreakpoint(Trigger, 'CLEAR_BREAKPOINT') + self._SetBreakpoint(Trigger, 'CLEAR_BREAKPOINT') + native.ClearConditionalBreakpoint(self._cookies.pop()) + Trigger() + self.assertEqual(1, self._breakpoint_counter) + + def testMissingModule(self): + def Test(): + native.CreateConditionalBreakpoint(None, 123123, None, + self._BreakpointEvent) + + self.assertRaises(TypeError, Test) + + def testBadModule(self): + def Test(): + native.CreateConditionalBreakpoint('str', 123123, None, + self._BreakpointEvent) + + self.assertRaises(TypeError, Test) + + def testInvalidCondition(self): + def Test(): + native.CreateConditionalBreakpoint(sys.modules[__name__], 123123, '2+2', + self._BreakpointEvent) + + self.assertRaises(TypeError, Test) + + def testMissingCallback(self): + def Test(): + native.CreateConditionalBreakpoint('code.py', 123123, None, None) + + self.assertRaises(TypeError, Test) + + def testInvalidCallback(self): + def Test(): + native.CreateConditionalBreakpoint('code.py', 123123, None, {}) + + self.assertRaises(TypeError, Test) + + def testMissingCookie(self): + self.assertRaises( + TypeError, + lambda: native.ClearConditionalBreakpoint(None)) + + def testInvalidCookie(self): + native.ClearConditionalBreakpoint(387873457) + + def testMutableCondition(self): + def Trigger(): + def MutableMethod(): + self._evil = True + return True + print('MutableMethod = %s' % MutableMethod) # BPTAG: MUTABLE_CONDITION + + self._SetBreakpoint(Trigger, 'MUTABLE_CONDITION', 'MutableMethod()') + Trigger() + self.assertEqual( + [native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE], + self._PopBreakpointEvents()) + + def testGlobalConditionQuotaExceeded(self): + def Trigger(): + print('Breakpoint trigger') # BPTAG: GLOBAL_CONDITION_QUOTA + + self._SetBreakpoint(Trigger, 'GLOBAL_CONDITION_QUOTA', '_DoHardWork(1000)') + Trigger() + self._ClearAllBreakpoints() + + self.assertListEqual( + [native.BREAKPOINT_EVENT_GLOBAL_CONDITION_QUOTA_EXCEEDED], + self._PopBreakpointEvents()) + + # Sleep for some time to let the quota recover. + time.sleep(0.1) + + def testBreakpointConditionQuotaExceeded(self): + def Trigger(): + print('Breakpoint trigger') # BPTAG: PER_BREAKPOINT_CONDITION_QUOTA + + time.sleep(1) + + # Per-breakpoint quota is lower than the global one. Exponentially + # increase the complexity of a condition until we hit it. + base = 100 + while True: + self._SetBreakpoint( + Trigger, + 'PER_BREAKPOINT_CONDITION_QUOTA', + '_DoHardWork(%d)' % base) + Trigger() + self._ClearAllBreakpoints() + + events = self._PopBreakpointEvents() + if events: + self.assertEqual( + [native.BREAKPOINT_EVENT_BREAKPOINT_CONDITION_QUOTA_EXCEEDED], + events) + break + + base *= 1.2 + time.sleep(0.1) + + # Sleep for some time to let the quota recover. + time.sleep(0.1) + + def testImmutableCallSuccess(self): + def Add(a, b, c): + return a + b + c + + def Magic(): + return 'cake' + + self.assertEqual( + '643535', + self._CallImmutable(inspect.currentframe(), 'str(643535)')) + self.assertEqual( + 786 + 23 + 891, + self._CallImmutable(inspect.currentframe(), 'Add(786, 23, 891)')) + self.assertEqual( + 'cake', + self._CallImmutable(inspect.currentframe(), 'Magic()')) + return Add or Magic + + def testImmutableCallMutable(self): + def Change(): + dictionary['bad'] = True + + dictionary = {} + frame = inspect.currentframe() + self.assertRaises( + SystemError, + lambda: self._CallImmutable(frame, 'Change()')) + self.assertEqual({}, dictionary) + return Change + + def testImmutableCallExceptionPropagation(self): + def Divide(a, b): + return a / b + + frame = inspect.currentframe() + self.assertRaises( + ZeroDivisionError, + lambda: self._CallImmutable(frame, 'Divide(1, 0)')) + return Divide + + def testImmutableCallInvalidFrame(self): + self.assertRaises( + TypeError, + lambda: native.CallImmutable(None, lambda: 1)) + self.assertRaises( + TypeError, + lambda: native.CallImmutable('not a frame', lambda: 1)) + + def testImmutableCallInvalidCallable(self): + frame = inspect.currentframe() + self.assertRaises( + TypeError, + lambda: native.CallImmutable(frame, None)) + self.assertRaises( + TypeError, + lambda: native.CallImmutable(frame, 'not a callable')) + + def _SetBreakpoint(self, method, tag, condition=None): + """Sets a breakpoint in this source file. + + The line number is identified by tag. This function does not verify that + the source line is in the specified method. + + The breakpoint may have an optional condition. + + Args: + method: method in which the breakpoint will be set. + tag: label for a source line. + condition: optional breakpoint condition. + """ + unused_path, line = python_test_util.ResolveTag(type(self), tag) + + compiled_condition = None + if condition is not None: + compiled_condition = compile(condition, '', 'eval') + + cookie = native.CreateConditionalBreakpoint( + six.get_function_code(method), line, compiled_condition, + self._BreakpointEvent) + + self._cookies.append(cookie) + native.ActivateConditionalBreakpoint(cookie) + + def _ClearAllBreakpoints(self): + """Removes all previously set breakpoints.""" + for cookie in self._cookies: + native.ClearConditionalBreakpoint(cookie) + + def _CallImmutable(self, frame, expression): + """Wrapper over native.ImmutableCall for callable.""" + return native.CallImmutable( + frame, + compile(expression, '', 'eval')) + + def _BreakpointEvent(self, event, frame): + """Callback on breakpoint event. + + See thread_breakpoints.h for more details of possible events. + + Args: + event: breakpoint event (see kIntegerConstants in native_module.cc). + frame: Python stack frame of breakpoint hit or None for other events. + """ + with self._lock: + if event == native.BREAKPOINT_EVENT_HIT: + self.assertTrue(inspect.isframe(frame)) + self._breakpoint_counter += 1 + else: + self._breakpoint_events.append(event) + + def _PopBreakpointEvents(self): + """Gets and resets the list of breakpoint events received so far.""" + with self._lock: + events = self._breakpoint_events + self._breakpoint_events = [] + return events + + def _HasBreakpointEvents(self): + """Checks whether there are unprocessed breakpoint events.""" + with self._lock: + if self._breakpoint_events: + return True + return False + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/python_breakpoint_test_disabled.py b/tests/python_breakpoint_test_disabled.py new file mode 100644 index 0000000..2de4d61 --- /dev/null +++ b/tests/python_breakpoint_test_disabled.py @@ -0,0 +1,607 @@ +"""Unit test for python_breakpoint module.""" + +# TODO: Get this test to work with all supported versions of Python. + +from datetime import datetime +from datetime import timedelta +import inspect +import os +import sys +import tempfile + +from absl.testing import absltest + +from googleclouddebugger import cdbg_native as native +from googleclouddebugger import imphook2 +from googleclouddebugger import python_breakpoint +import python_test_util + + +class PythonBreakpointTest(absltest.TestCase): + """Unit test for python_breakpoint module.""" + + def setUp(self): + self._test_package_dir = tempfile.mkdtemp('', 'package_') + sys.path.append(self._test_package_dir) + + path, line = python_test_util.ResolveTag(type(self), 'CODE_LINE') + + self._base_time = datetime(year=2015, month=1, day=1) # BPTAG: CODE_LINE + self._template = { + 'id': 'BP_ID', + 'createTime': python_test_util.DateTimeToTimestamp(self._base_time), + 'location': {'path': path, 'line': line}} + self._completed = set() + self._update_queue = [] + + def tearDown(self): + sys.path.remove(self._test_package_dir) + + def CompleteBreakpoint(self, breakpoint_id): + """Mock method of BreakpointsManager.""" + self._completed.add(breakpoint_id) + + def GetCurrentTime(self): + """Mock method of BreakpointsManager.""" + return self._base_time + + def EnqueueBreakpointUpdate(self, breakpoint): + """Mock method of HubClient.""" + self._update_queue.append(breakpoint) + + def testClear(self): + breakpoint = python_breakpoint.PythonBreakpoint( + self._template, self, self, None) + breakpoint.Clear() + self.assertFalse(breakpoint._cookie) + + def testId(self): + breakpoint = python_breakpoint.PythonBreakpoint( + self._template, self, self, None) + breakpoint.Clear() + self.assertEqual('BP_ID', breakpoint.GetBreakpointId()) + + def testNullBytesInCondition(self): + python_breakpoint.PythonBreakpoint( + dict(self._template, condition='\0'), + self, + self, + None) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['status']['isError']) + self.assertTrue(self._update_queue[0]['isFinalState']) + + # Test only applies to the old module search algorithm. When using new module + # search algorithm, this test is same as testDeferredBreakpoint. + def testUnknownModule(self): + pass + + def testDeferredBreakpoint(self): + with open(os.path.join(self._test_package_dir, 'defer_print.py'), 'w') as f: + f.write('def DoPrint():\n') + f.write(' print("Hello from deferred module")\n') + + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': 'defer_print.py', 'line': 2}), + self, + self, + None) + + self.assertFalse(self._completed) + self.assertEmpty(self._update_queue) + + import defer_print # pylint: disable=g-import-not-at-top + defer_print.DoPrint() + + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) + self.assertEqual( + 'DoPrint', + self._update_queue[0]['stackFrames'][0]['function']) + self.assertTrue(self._update_queue[0]['isFinalState']) + + self.assertEmpty(imphook2._import_callbacks) + + # Old module search algorithm rejects multiple matches. This test verifies + # that the new module search algorithm searches sys.path sequentially, and + # selects the first match (just like the Python importer). + def testSearchUsingSysPathOrder(self): + for i in range(2, 0, -1): + # Create directories and add them to sys.path. + test_dir = os.path.join(self._test_package_dir, ('inner2_%s' % i)) + os.mkdir(test_dir) + sys.path.append(test_dir) + with open(os.path.join(test_dir, 'mod2.py'), 'w') as f: + f.write('def DoPrint():\n') + f.write(' x = %s\n' % i) + f.write(' return x') + + # Loads inner2_2/mod2.py because it comes first in sys.path. + import mod2 # pylint: disable=g-import-not-at-top + + # Search will proceed in sys.path order, and the first match in sys.path + # will uniquely identify the full path of the module as inner2_2/mod2.py. + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': 'mod2.py', 'line': 3}), + self, + self, + None) + + self.assertEqual(2, mod2.DoPrint()) + + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) + self.assertEqual( + 'DoPrint', + self._update_queue[0]['stackFrames'][0]['function']) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertEqual( + 'x', + self._update_queue[0]['stackFrames'][0]['locals'][0]['name']) + self.assertEqual( + '2', + self._update_queue[0]['stackFrames'][0]['locals'][0]['value']) + + self.assertEmpty(imphook2._import_callbacks) + + # Old module search algorithm rejects multiple matches. This test verifies + # that when the new module search cannot find any match in sys.path, it + # defers the breakpoint, and then selects the first dynamically-loaded + # module that matches the given path. + def testMultipleDeferredMatches(self): + for i in range(2, 0, -1): + # Create packages, but do not add them to sys.path. + test_dir = os.path.join(self._test_package_dir, ('inner3_%s' % i)) + os.mkdir(test_dir) + with open(os.path.join(test_dir, '__init__.py'), 'w') as f: + pass + with open(os.path.join(test_dir, 'defer_print3.py'), 'w') as f: + f.write('def DoPrint():\n') + f.write(' x = %s\n' % i) + f.write(' return x') + + # This breakpoint will be deferred. It can match any one of the modules + # created above. + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': 'defer_print3.py', 'line': 3}), + self, + self, + None) + + # Lazy import module. Activates breakpoint on the loaded module. + import inner3_1.defer_print3 # pylint: disable=g-import-not-at-top + self.assertEqual(1, inner3_1.defer_print3.DoPrint()) + + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) + self.assertEqual( + 'DoPrint', + self._update_queue[0]['stackFrames'][0]['function']) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertEqual( + 'x', + self._update_queue[0]['stackFrames'][0]['locals'][0]['name']) + self.assertEqual( + '1', + self._update_queue[0]['stackFrames'][0]['locals'][0]['value']) + + self.assertEmpty(imphook2._import_callbacks) + + def testNeverLoadedBreakpoint(self): + open(os.path.join(self._test_package_dir, 'never_print.py'), 'w').close() + + breakpoint = python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': 'never_print.py', 'line': 99}), + self, + self, + None) + breakpoint.Clear() + + self.assertFalse(self._completed) + self.assertEmpty(self._update_queue) + + def testDeferredNoCodeAtLine(self): + open(os.path.join(self._test_package_dir, 'defer_empty.py'), 'w').close() + + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': 'defer_empty.py', 'line': 10}), + self, + self, + None) + + self.assertFalse(self._completed) + self.assertEmpty(self._update_queue) + + import defer_empty # pylint: disable=g-import-not-at-top,unused-variable + + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + status = self._update_queue[0]['status'] + self.assertEqual(status['isError'], True) + self.assertEqual(status['refersTo'], 'BREAKPOINT_SOURCE_LOCATION') + desc = status['description'] + self.assertEqual(desc['format'], 'No code found at line $0 in $1') + params = desc['parameters'] + self.assertIn('defer_empty.py', params[1]) + self.assertEqual(params[0], '10') + self.assertEmpty(imphook2._import_callbacks) + + def testDeferredBreakpointCancelled(self): + open(os.path.join(self._test_package_dir, 'defer_cancel.py'), 'w').close() + + breakpoint = python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': 'defer_cancel.py', 'line': 11}), + self, + self, + None) + breakpoint.Clear() + + self.assertFalse(self._completed) + self.assertEmpty(imphook2._import_callbacks) + unused_no_code_line_above = 0 # BPTAG: NO_CODE_LINE_ABOVE + + # BPTAG: NO_CODE_LINE + def testNoCodeAtLine(self): + unused_no_code_line_below = 0 # BPTAG: NO_CODE_LINE_BELOW + path, line = python_test_util.ResolveTag(sys.modules[__name__], + 'NO_CODE_LINE') + path, line_above = python_test_util.ResolveTag(sys.modules[__name__], + 'NO_CODE_LINE_ABOVE') + path, line_below = python_test_util.ResolveTag(sys.modules[__name__], + 'NO_CODE_LINE_BELOW') + + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': path, 'line': line}), + self, + self, + None) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + status = self._update_queue[0]['status'] + self.assertEqual(status['isError'], True) + self.assertEqual(status['refersTo'], 'BREAKPOINT_SOURCE_LOCATION') + desc = status['description'] + self.assertEqual(desc['format'], + 'No code found at line $0 in $1. Try lines $2 or $3.') + params = desc['parameters'] + self.assertEqual(params[0], str(line)) + self.assertIn(path, params[1]) + self.assertEqual(params[2], str(line_above)) + self.assertEqual(params[3], str(line_below)) + + def testBadExtension(self): + for path in ['unknown.so', 'unknown', 'unknown.java', 'unknown.pyc']: + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': path, 'line': 83}), + self, + self, + None) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertEqual( + {'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': { + 'format': ('Only files with .py extension are supported')}}, + self._update_queue[0]['status']) + self._update_queue = [] + + def testRootInitFile(self): + for path in ['__init__.py', '/__init__.py', '////__init__.py', + ' __init__.py ', ' //__init__.py']: + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': path, 'line': 83}), + self, + self, + None) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertEqual( + {'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': { + 'format': + 'Multiple modules matching $0. ' + 'Please specify the module path.', + 'parameters': ['__init__.py'] + }}, + self._update_queue[0]['status']) + self._update_queue = [] + + # Old module search algorithm rejects because there are too many matches. + # The new algorithm selects the very first match in sys.path. + def testNonRootInitFile(self): + # Neither 'a' nor 'a/b' are real packages accessible via sys.path. + # Therefore, module search falls back to search '__init__.py', which matches + # the first entry in sys.path, which we artifically inject below. + test_dir = os.path.join(self._test_package_dir, 'inner4') + os.mkdir(test_dir) + with open(os.path.join(test_dir, '__init__.py'), 'w') as f: + f.write('def DoPrint():\n') + f.write(' print("Hello")') + sys.path.insert(0, test_dir) + + import inner4 # pylint: disable=g-import-not-at-top,unused-variable + + for path in ['/a/__init__.py', 'a/__init__.py', 'a/b/__init__.py']: + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': path, 'line': 2}), + self, + self, + None) + + inner4.DoPrint() + + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) + self.assertEqual( + 'DoPrint', + self._update_queue[0]['stackFrames'][0]['function']) + + self.assertEmpty(imphook2._import_callbacks) + self._update_queue = [] + + def testBreakpointInLoadedPackageFile(self): + """Test breakpoint in a loaded package.""" + for name in ['pkg', 'pkg/pkg']: + test_dir = os.path.join(self._test_package_dir, name) + os.mkdir(test_dir) + with open(os.path.join(test_dir, '__init__.py'), 'w') as f: + f.write('def DoPrint():\n') + f.write(' print("Hello from %s")\n' % name) + + import pkg # pylint: disable=g-import-not-at-top,unused-variable + import pkg.pkg # pylint: disable=g-import-not-at-top,unused-variable + + python_breakpoint.PythonBreakpoint( + dict(self._template, + location={'path': 'pkg/pkg/__init__.py', 'line': 2}), + self, + self, + None) + + pkg.pkg.DoPrint() + + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertEqual(None, self._update_queue[0].get('status')) + self._update_queue = [] + + def testInternalError(self): + """Simulate internal error when setting a new breakpoint. + + Bytecode rewriting breakpoints are not supported for methods with more + than 65K constants. We generate such a method and try to set breakpoint in + it. + """ + + with open(os.path.join(self._test_package_dir, 'intern_err.py'), 'w') as f: + f.write('def DoSums():\n') + f.write(' x = 0\n') + for i in range(70000): + f.write(' x = x + %d\n' % i) + f.write(' print(x)\n') + + import intern_err # pylint: disable=g-import-not-at-top,unused-variable + + python_breakpoint.PythonBreakpoint( + dict(self._template, location={'path': 'intern_err.py', 'line': 100}), + self, + self, + None) + + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertEqual( + {'isError': True, + 'description': {'format': 'Internal error occurred'}}, + self._update_queue[0]['status']) + + def testInvalidCondition(self): + python_breakpoint.PythonBreakpoint( + dict(self._template, condition='2+'), + self, + self, + None) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertEqual( + {'isError': True, + 'refersTo': 'BREAKPOINT_CONDITION', + 'description': { + 'format': 'Expression could not be compiled: $0', + 'parameters': ['unexpected EOF while parsing']}}, + self._update_queue[0]['status']) + + def testHit(self): + breakpoint = python_breakpoint.PythonBreakpoint( + self._template, self, self, None) + breakpoint._BreakpointEvent( + native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) + self.assertTrue(self._update_queue[0]['isFinalState']) + + def testHitNewTimestamp(self): + # Override to use the new format (i.e., without the '.%f' sub-second part) + self._template['createTime'] = python_test_util.DateTimeToTimestampNew( + self._base_time) + + breakpoint = python_breakpoint.PythonBreakpoint( + self._template, self, self, None) + breakpoint._BreakpointEvent( + native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) + self.assertTrue(self._update_queue[0]['isFinalState']) + + def testDoubleHit(self): + breakpoint = python_breakpoint.PythonBreakpoint( + self._template, self, self, None) + breakpoint._BreakpointEvent( + native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) + breakpoint._BreakpointEvent( + native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + + def testEndToEndUnconditional(self): + def Trigger(): + pass # BPTAG: E2E_UNCONDITIONAL + + path, line = python_test_util.ResolveTag(type(self), 'E2E_UNCONDITIONAL') + breakpoint = python_breakpoint.PythonBreakpoint( + {'id': 'BP_ID', + 'location': {'path': path, 'line': line}}, + self, + self, + None) + self.assertEmpty(self._update_queue) + Trigger() + self.assertLen(self._update_queue, 1) + breakpoint.Clear() + + def testEndToEndConditional(self): + def Trigger(): + for i in range(2): + self.assertLen(self._update_queue, i) # BPTAG: E2E_CONDITIONAL + + path, line = python_test_util.ResolveTag(type(self), 'E2E_CONDITIONAL') + breakpoint = python_breakpoint.PythonBreakpoint( + {'id': 'BP_ID', + 'location': {'path': path, 'line': line}, + 'condition': 'i == 1'}, + self, + self, + None) + Trigger() + breakpoint.Clear() + + def testEndToEndCleared(self): + path, line = python_test_util.ResolveTag(type(self), 'E2E_CLEARED') + breakpoint = python_breakpoint.PythonBreakpoint( + {'id': 'BP_ID', + 'location': {'path': path, 'line': line}}, + self, + self, + None) + breakpoint.Clear() + self.assertEmpty(self._update_queue) # BPTAG: E2E_CLEARED + + def testBreakpointCancellationEvent(self): + events = [ + native.BREAKPOINT_EVENT_GLOBAL_CONDITION_QUOTA_EXCEEDED, + native.BREAKPOINT_EVENT_BREAKPOINT_CONDITION_QUOTA_EXCEEDED, + native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE] + for event in events: + breakpoint = python_breakpoint.PythonBreakpoint( + self._template, + self, + self, + None) + breakpoint._BreakpointEvent(event, None) + self.assertLen(self._update_queue, 1) + self.assertEqual(set(['BP_ID']), self._completed) + + self._update_queue = [] + self._completed = set() + + def testExpirationTime(self): + breakpoint = python_breakpoint.PythonBreakpoint( + self._template, self, self, None) + breakpoint.Clear() + self.assertEqual( + datetime(year=2015, month=1, day=2), + breakpoint.GetExpirationTime()) + + def testExpirationTimeWithExpiresIn(self): + definition = self._template.copy() + definition['expires_in'] = { + 'seconds': 300 # 5 minutes + } + + breakpoint = python_breakpoint.PythonBreakpoint( + definition, self, self, None) + breakpoint.Clear() + self.assertEqual( + datetime(year=2015, month=1, day=2), + breakpoint.GetExpirationTime()) + + def testExpiration(self): + breakpoint = python_breakpoint.PythonBreakpoint( + self._template, self, self, None) + breakpoint.ExpireBreakpoint() + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertEqual( + {'isError': True, + 'refersTo': 'BREAKPOINT_AGE', + 'description': {'format': 'The snapshot has expired'}}, + self._update_queue[0]['status']) + + def testLogpointExpiration(self): + definition = self._template.copy() + definition['action'] = 'LOG' + breakpoint = python_breakpoint.PythonBreakpoint( + definition, self, self, None) + breakpoint.ExpireBreakpoint() + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertTrue(self._update_queue[0]['isFinalState']) + self.assertEqual( + {'isError': True, + 'refersTo': 'BREAKPOINT_AGE', + 'description': {'format': 'The logpoint has expired'}}, + self._update_queue[0]['status']) + + def testNormalizePath(self): + # Removes leading '/' character. + for path in ['/__init__.py', '//__init__.py', '////__init__.py']: + self.assertEqual('__init__.py', python_breakpoint._NormalizePath(path)) + + # Removes leading and trailing whitespace. + for path in [' __init__.py', '__init__.py ', ' __init__.py ']: + self.assertEqual('__init__.py', python_breakpoint._NormalizePath(path)) + + # Removes combination of leading/trailing whitespace and '/' character. + for path in [' /__init__.py', ' ///__init__.py', '////__init__.py']: + self.assertEqual('__init__.py', python_breakpoint._NormalizePath(path)) + + # Normalizes the relative path. + for path in [' ./__init__.py', '././__init__.py', ' .//abc/../__init__.py', + ' ///abc///..///def/..////__init__.py']: + self.assertEqual('__init__.py', python_breakpoint._NormalizePath(path)) + + # Does not remove non-leading, non-trailing space, or non-leading '/' + # characters. + self.assertEqual( + 'foo bar/baz/__init__.py', + python_breakpoint._NormalizePath('/foo bar/baz/__init__.py')) + self.assertEqual( + 'foo/bar baz/__init__.py', + python_breakpoint._NormalizePath('/foo/bar baz/__init__.py')) + self.assertEqual( + 'foo/bar/baz/__in it__.py', + python_breakpoint._NormalizePath('/foo/bar/baz/__in it__.py')) + +if __name__ == '__main__': + absltest.main() diff --git a/tests/python_test_util.py b/tests/python_test_util.py new file mode 100644 index 0000000..44e231c --- /dev/null +++ b/tests/python_test_util.py @@ -0,0 +1,186 @@ +"""Set of helper methods for Python debuglet unit and component tests.""" + +import inspect +import re + + +def GetModuleInfo(obj): + """Gets the source file path and breakpoint tags for a module. + + Breakpoint tag is a named label of a source line. The tag is marked + with "# BPTAG: XXX" comment. + + Args: + obj: any object inside the queried module. + + Returns: + (path, tags) tuple where tags is a dictionary mapping tag name to + line numbers where this tag appears. + """ + return (inspect.getsourcefile(obj), GetSourceFileTags(obj)) + + +def GetSourceFileTags(source): + """Gets breakpoint tags for the specified source file. + + Breakpoint tag is a named label of a source line. The tag is marked + with "# BPTAG: XXX" comment. + + Args: + source: either path to the .py file to analyze or any code related + object (e.g. module, function, code object). + + Returns: + Dictionary mapping tag name to line numbers where this tag appears. + """ + if isinstance(source, str): + lines = open(source, 'r').read().splitlines() + start_line = 1 # line number is 1 based + else: + lines, start_line = inspect.getsourcelines(source) + if not start_line: # "getsourcelines" returns start_line of 0 for modules. + start_line = 1 + + tags = {} + regex = re.compile(r'# BPTAG: ([0-9a-zA-Z_]+)\s*$') + for n, line in enumerate(lines): + m = regex.search(line) + if m: + tag = m.group(1) + if tag in tags: + tags[tag].append(n + start_line) + else: + tags[tag] = [n + start_line] + + return tags + + +def ResolveTag(obj, tag): + """Resolves the breakpoint tag into source file path and a line number. + + Breakpoint tag is a named label of a source line. The tag is marked + with "# BPTAG: XXX" comment. + + Raises + + Args: + obj: any object inside the queried module. + tag: tag name to resolve. + + Raises: + Exception: if no line in the source file define the specified tag or if + more than one line define the tag. + + Returns: + (path, line) tuple, where line is the line number where the tag appears. + """ + path, tags = GetModuleInfo(obj) + if tag not in tags: + raise Exception('tag %s not found' % tag) + lines = tags[tag] + if len(lines) != 1: + raise Exception('tag %s is ambiguous (lines: %s)' % (tag, lines)) + return path, lines[0] + + +def DateTimeToTimestamp(t): + """Converts the specified time to Timestamp format. + + Args: + t: datetime instance + + Returns: + Time in Timestamp format + """ + return t.strftime('%Y-%m-%dT%H:%M:%S.%f') + 'Z' + + +def DateTimeToTimestampNew(t): + """Converts the specified time to Timestamp format in seconds granularity. + + Args: + t: datetime instance + + Returns: + Time in Timestamp format in seconds granularity + """ + return t.strftime('%Y-%m-%dT%H:%M:%S') + 'Z' + + +def PackFrameVariable(breakpoint, name, frame=0, collection='locals'): + """Finds local variable or argument by name. + + Indirections created through varTableIndex are recursively collapsed. Fails + the test case if the named variable is not found. + + Args: + breakpoint: queried breakpoint. + name: name of the local variable or argument. + frame: stack frame index to examine. + collection: 'locals' to get local variable or 'arguments' for an argument. + + Returns: + Single dictionary of variable data. + + Raises: + AssertionError: if the named variable not found. + """ + for variable in breakpoint['stackFrames'][frame][collection]: + if variable['name'] == name: + return _Pack(variable, breakpoint) + + raise AssertionError('Variable %s not found in frame %d collection %s' % ( + name, frame, collection)) + + +def PackWatchedExpression(breakpoint, expression): + """Finds watched expression by index. + + Indirections created through varTableIndex are recursively collapsed. Fails + the test case if the named variable is not found. + + Args: + breakpoint: queried breakpoint. + expression: index of the watched expression. + + Returns: + Single dictionary of variable data. + """ + return _Pack(breakpoint['evaluatedExpressions'][expression], breakpoint) + + +def _Pack(variable, breakpoint): + """Recursively collapses indirections created through varTableIndex. + + Circular references by objects are not supported. If variable subtree + has circular references, this function will hang. + + Variable members are sorted by name. This helps asserting the content of + variable since Python has no guarantees over the order of keys of a + dictionary. + + Args: + variable: variable object to pack. Not modified. + breakpoint: queried breakpoint. + + Returns: + A new dictionary with packed variable object. + """ + packed = dict(variable) + + while 'varTableIndex' in packed: + ref = breakpoint['variableTable'][packed['varTableIndex']] + assert 'name' not in ref + assert 'value' not in packed + assert 'members' not in packed + assert 'status' not in ref and 'status' not in packed + del packed['varTableIndex'] + packed.update(ref) + + if 'members' in packed: + packed['members'] = sorted( + [_Pack(m, breakpoint) for m in packed['members']], + key=lambda m: m.get('name', '')) + + return packed + diff --git a/tests/uniquifier_computer_test.py b/tests/uniquifier_computer_test.py new file mode 100644 index 0000000..5157772 --- /dev/null +++ b/tests/uniquifier_computer_test.py @@ -0,0 +1,125 @@ +"""Unit test for uniquifier_computer module.""" + +import os +import sys +import tempfile + +from absl.testing import absltest + +from googleclouddebugger import uniquifier_computer + + +class UniquifierComputerTest(absltest.TestCase): + + def _Compute(self, files): + """Creates a directory structure and computes uniquifier on it. + + Args: + files: dictionary of relative path to file content. + + Returns: + Uniquifier data lines. + """ + + class Hash(object): + """Fake implementation of hash to collect raw data.""" + + def __init__(self): + self.data = b'' + + def update(self, s): + self.data += s + + root = tempfile.mkdtemp('', 'fake_app_') + for relative_path, content in files.items(): + path = os.path.join(root, relative_path) + directory = os.path.split(path)[0] + if not os.path.exists(directory): + os.makedirs(directory) + with open(path, 'w') as f: + f.write(content) + + sys.path.insert(0, root) + try: + hash_obj = Hash() + uniquifier_computer.ComputeApplicationUniquifier(hash_obj) + return [ + u.decode() for u in ( + hash_obj.data.rstrip(b'\n').split(b'\n') if hash_obj.data else []) + ] + finally: + del sys.path[0] + + def testEmpty(self): + self.assertListEqual( + [], + self._Compute({})) + + def testBundle(self): + self.assertListEqual( + ['first.py:1', + 'in1/__init__.py:6', + 'in1/a.py:3', + 'in1/b.py:4', + 'in1/in2/__init__.py:7', + 'in1/in2/c.py:5', + 'second.py:2'], + self._Compute({ + 'db.app': 'abc', + 'first.py': 'a', + 'second.py': 'bb', + 'in1/a.py': 'ccc', + 'in1/b.py': 'dddd', + 'in1/in2/c.py': 'eeeee', + 'in1/__init__.py': 'ffffff', + 'in1/in2/__init__.py': 'ggggggg'})) + + def testEmptyFile(self): + self.assertListEqual( + ['empty.py:0'], + self._Compute({ + 'empty.py': ''})) + + def testNonPythonFilesIgnored(self): + self.assertListEqual( + ['real.py:1'], + self._Compute({ + 'file.p': '', + 'file.pya': '', + 'real.py': '1'})) + + def testNonPackageDirectoriesIgnored(self): + self.assertListEqual( + ['dir2/__init__.py:1'], + self._Compute({ + 'dir1/file.py': '', + 'dir2/__init__.py': 'a', + 'dir2/image.gif': ''})) + + def testDepthLimit(self): + self.assertListEqual( + [''.join(str(n) + '/' for n in range(1, m + 1)) + '__init__.py:%d' % m + for m in range(9, 0, -1)], + self._Compute({ + '1/__init__.py': '1', + '1/2/__init__.py': '2' * 2, + '1/2/3/__init__.py': '3' * 3, + '1/2/3/4/__init__.py': '4' * 4, + '1/2/3/4/5/__init__.py': '5' * 5, + '1/2/3/4/5/6/__init__.py': '6' * 6, + '1/2/3/4/5/6/7/__init__.py': '7' * 7, + '1/2/3/4/5/6/7/8/__init__.py': '8' * 8, + '1/2/3/4/5/6/7/8/9/__init__.py': '9' * 9, + '1/2/3/4/5/6/7/8/9/10/__init__.py': 'a' * 10, + '1/2/3/4/5/6/7/8/9/10/11/__init__.py': 'b' * 11})) + + def testPrecedence(self): + self.assertListEqual( + ['my.py:3'], + self._Compute({ + 'my.pyo': 'a', + 'my.pyc': 'aa', + 'my.py': 'aaa'})) + +if __name__ == '__main__': + absltest.main() diff --git a/tests/yaml_data_visibility_config_reader_test.py b/tests/yaml_data_visibility_config_reader_test.py new file mode 100644 index 0000000..a197f5c --- /dev/null +++ b/tests/yaml_data_visibility_config_reader_test.py @@ -0,0 +1,117 @@ +"""Tests for yaml_data_visibility_config_reader.""" + +import os +import sys +from unittest import mock + +from six import StringIO + +from absl.testing import absltest +from googleclouddebugger import yaml_data_visibility_config_reader + + +class StringIOOpen(object): + """An open for StringIO that supports "with" semantics. + + I tried using mock.mock_open, but the read logic in the yaml.load code is + incompatible with the returned mock object, leading to a test hang/timeout. + """ + + def __init__(self, data): + self.file_obj = StringIO(data) + + def __enter__(self): + return self.file_obj + + def __exit__(self, type, value, traceback): # pylint: disable=redefined-builtin + pass + + +class YamlDataVisibilityConfigReaderTest(absltest.TestCase): + + def testOpenAndReadSuccess(self): + data = """ + blacklist: + - bl1 + """ + path_prefix = 'googleclouddebugger.' + with mock.patch(path_prefix + 'yaml_data_visibility_config_reader.open', + create=True) as m: + m.return_value = StringIOOpen(data) + config = yaml_data_visibility_config_reader.OpenAndRead() + m.assert_called_with(os.path.join(sys.path[0], 'debugger-blacklist.yaml'), + 'r') + self.assertEqual(config.blacklist_patterns, ['bl1']) + + def testOpenAndReadFileNotFound(self): + path_prefix = 'googleclouddebugger.' + with mock.patch(path_prefix + 'yaml_data_visibility_config_reader.open', + create=True, side_effect=IOError('IO Error')): + f = yaml_data_visibility_config_reader.OpenAndRead() + self.assertIsNone(f) + + def testReadDataSuccess(self): + data = """ + blacklist: + - bl1 + - bl2 + whitelist: + - wl1 + - wl2.* + """ + + config = yaml_data_visibility_config_reader.Read(StringIO(data)) + self.assertItemsEqual(config.blacklist_patterns, ('bl1', 'bl2')) + self.assertItemsEqual(config.whitelist_patterns, ('wl1', 'wl2.*')) + + def testYAMLLoadError(self): + class ErrorIO(object): + + def read(self, size): + del size # Unused + raise IOError('IO Error') + + with self.assertRaises(yaml_data_visibility_config_reader.YAMLLoadError): + yaml_data_visibility_config_reader.Read(ErrorIO()) + + def testBadYamlSyntax(self): + data = """ + blacklist: whitelist: + """ + + with self.assertRaises(yaml_data_visibility_config_reader.ParseError): + yaml_data_visibility_config_reader.Read(StringIO(data)) + + def testUnknownConfigKeyError(self): + data = """ + foo: + - bar + """ + + with self.assertRaises( + yaml_data_visibility_config_reader.UnknownConfigKeyError): + yaml_data_visibility_config_reader.Read(StringIO(data)) + + def testNotAListError(self): + data = """ + blacklist: + foo: + - bar + """ + + with self.assertRaises(yaml_data_visibility_config_reader.NotAListError): + yaml_data_visibility_config_reader.Read(StringIO(data)) + + def testElementNotAStringError(self): + data = """ + blacklist: + - 5 + """ + + with self.assertRaises( + yaml_data_visibility_config_reader.ElementNotAStringError): + yaml_data_visibility_config_reader.Read(StringIO(data)) + + +if __name__ == '__main__': + absltest.main() From dc02cdc507142b36d1b4fa85b66094a72f20bb86 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 13 Jun 2022 16:15:25 -0400 Subject: [PATCH 199/241] fix: correctly populate visit recorder (#40) fixes issue #38 --- src/googleclouddebugger/module_explorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index 75edb05..e894427 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -195,7 +195,7 @@ def CheckIgnoreClass(cls): code_objects = set() current = start_objects for obj in current: - visit_recorder.Record(current) + visit_recorder.Record(obj) depth = 0 while current and depth < _MAX_REFERENTS_BFS_DEPTH: From 24f1692a7cd767049df3049c9733ee52e49fd748 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 16 Jun 2022 13:57:31 -0400 Subject: [PATCH 200/241] Bump gflags and glog to newer versions (#41) --- src/build-wheels.sh | 15 +++++++++------ src/build.sh | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/build-wheels.sh b/src/build-wheels.sh index 8ad6c29..2d6e92d 100755 --- a/src/build-wheels.sh +++ b/src/build-wheels.sh @@ -1,7 +1,7 @@ #!/bin/bash -e -GFLAGS_URL=https://github.com/gflags/gflags/archive/v2.1.2.tar.gz -GLOG_URL=https://github.com/google/glog/archive/v0.3.4.tar.gz +GFLAGS_URL=https://github.com/gflags/gflags/archive/v2.2.2.tar.gz +GLOG_URL=https://github.com/google/glog/archive/v0.4.0.tar.gz SUPPORTED_VERSIONS=(cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39) @@ -36,9 +36,12 @@ pushd ${ROOT}/build/third_party curl -L ${GLOG_URL} -o glog.tar.gz tar xzvf glog.tar.gz cd glog-* -./configure --with-pic \ - --prefix=${ROOT}/build/third_party \ - --with-gflags=${ROOT}/build/third_party +mkdir build +cd build +cmake -DCMAKE_CXX_FLAGS=-fpic \ + -DCMAKE_PREFIX_PATH=${ROOT}/build/third_party \ + -DCMAKE_INSTALL_PREFIX:PATH=${ROOT}/build/third_party \ + .. make ${PARALLEL_BUILD_OPTION} make install popd @@ -54,7 +57,7 @@ verbose=1 [build_ext] include_dirs=${ROOT}/build/third_party/include -library_dirs=${ROOT}/build/third_party/lib" > ${ROOT}/setup.cfg +library_dirs=${ROOT}/build/third_party/lib:${ROOT}/build/third_party/lib64" > ${ROOT}/setup.cfg # Build the Python Cloud Debugger agent. pushd ${ROOT} diff --git a/src/build.sh b/src/build.sh index 19837d3..c8a1b8e 100755 --- a/src/build.sh +++ b/src/build.sh @@ -33,8 +33,8 @@ # Home page of glog: https://github.com/google/glog # -GFLAGS_URL=https://github.com/gflags/gflags/archive/v2.1.2.tar.gz -GLOG_URL=https://github.com/google/glog/archive/v0.3.4.tar.gz +GFLAGS_URL=https://github.com/gflags/gflags/archive/v2.2.2.tar.gz +GLOG_URL=https://github.com/google/glog/archive/v0.4.0.tar.gz ROOT=$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null; /bin/pwd -P) @@ -67,9 +67,12 @@ pushd ${ROOT}/build/third_party curl -L ${GLOG_URL} -o glog.tar.gz tar xzvf glog.tar.gz cd glog-* -./configure --with-pic \ - --prefix=${ROOT}/build/third_party \ - --with-gflags=${ROOT}/build/third_party +mkdir build +cd build +cmake -DCMAKE_CXX_FLAGS=-fpic \ + -DCMAKE_PREFIX_PATH=${ROOT}/build/third_party \ + -DCMAKE_INSTALL_PREFIX:PATH=${ROOT}/build/third_party \ + .. make ${PARALLEL_BUILD_OPTION} make install popd @@ -80,7 +83,7 @@ verbose=1 [build_ext] include_dirs=${ROOT}/build/third_party/include -library_dirs=${ROOT}/build/third_party/lib" > ${ROOT}/setup.cfg +library_dirs=${ROOT}/build/third_party/lib:${ROOT}/build/third_party/lib64" > ${ROOT}/setup.cfg # Build the Python Cloud Debugger agent. pushd ${ROOT} From 5dee35d68ccaf49316fe3cf64454b10075419e1a Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 17 Jun 2022 11:30:06 -0400 Subject: [PATCH 201/241] chore: add pylint and yapf (#42) * Add pylint and yapf for linting and formatting * Apply formatting to tests --- .pylintrc | 429 ++++++ .style.yapf | 2 + src/googleclouddebugger/__init__.py | 25 +- src/googleclouddebugger/__main__.py | 2 - .../appengine_pretty_printers.py | 1 - src/googleclouddebugger/application_info.py | 5 +- src/googleclouddebugger/backoff.py | 1 - .../breakpoints_manager.py | 15 +- src/googleclouddebugger/capture_collector.py | 155 ++- .../error_data_visibility_policy.py | 1 - src/googleclouddebugger/gcp_hub_client.py | 39 +- .../glob_data_visibility_policy.py | 3 - src/googleclouddebugger/imphook2.py | 10 +- src/googleclouddebugger/labels.py | 1 - src/googleclouddebugger/module_explorer.py | 7 +- src/googleclouddebugger/module_search2.py | 3 +- src/googleclouddebugger/module_utils2.py | 6 +- src/googleclouddebugger/python_breakpoint.py | 132 +- .../uniquifier_computer.py | 5 +- .../yaml_data_visibility_config_reader.py | 8 +- tests/application_info_test.py | 4 +- tests/breakpoints_manager_test.py | 103 +- tests/capture_collector_test.py | 1147 +++++++++++------ tests/gcp_hub_client_test.py | 144 ++- tests/glob_data_visibility_policy_test.py | 5 +- tests/imphook2_test.py | 188 ++- tests/integration_test_disabled.py | 218 +++- tests/module_explorer_test_disabled.py | 32 +- tests/module_search2_test.py | 10 +- tests/module_utils2_test.py | 46 +- tests/native_module_test.py | 74 +- tests/python_breakpoint_test_disabled.py | 346 ++--- tests/python_test_util.py | 5 +- tests/uniquifier_computer_test.py | 110 +- ...yaml_data_visibility_config_reader_test.py | 16 +- 35 files changed, 2083 insertions(+), 1215 deletions(-) create mode 100644 .pylintrc create mode 100644 .style.yapf diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..be7798e --- /dev/null +++ b/.pylintrc @@ -0,0 +1,429 @@ +# This Pylint rcfile contains a best-effort configuration to uphold the +# best-practices and style described in the Google Python style guide: +# https://google.github.io/styleguide/pyguide.html +# +# Its canonical open-source location is: +# https://google.github.io/styleguide/pylintrc + +[MASTER] + +# Files or directories to be skipped. They should be base names, not paths. +ignore=third_party + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. +ignore-patterns= + +# Pickle collected data for later comparisons. +persistent=no + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=4 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=abstract-method, + apply-builtin, + arguments-differ, + attribute-defined-outside-init, + backtick, + bad-option-value, + basestring-builtin, + buffer-builtin, + c-extension-no-member, + consider-using-enumerate, + cmp-builtin, + cmp-method, + coerce-builtin, + coerce-method, + delslice-method, + div-method, + duplicate-code, + eq-without-hash, + execfile-builtin, + file-builtin, + filter-builtin-not-iterating, + fixme, + getslice-method, + global-statement, + hex-method, + idiv-method, + implicit-str-concat, + import-error, + import-self, + import-star-module-level, + inconsistent-return-statements, + input-builtin, + intern-builtin, + invalid-str-codec, + locally-disabled, + long-builtin, + long-suffix, + map-builtin-not-iterating, + misplaced-comparison-constant, + missing-function-docstring, + metaclass-assignment, + next-method-called, + next-method-defined, + no-absolute-import, + no-else-break, + no-else-continue, + no-else-raise, + no-else-return, + no-init, # added + no-member, + no-name-in-module, + no-self-use, + nonzero-method, + oct-method, + old-division, + old-ne-operator, + old-octal-literal, + old-raise-syntax, + parameter-unpacking, + print-statement, + raising-string, + range-builtin-not-iterating, + raw_input-builtin, + rdiv-method, + reduce-builtin, + relative-import, + reload-builtin, + round-builtin, + setslice-method, + signature-differs, + standarderror-builtin, + suppressed-message, + sys-max-int, + too-few-public-methods, + too-many-ancestors, + too-many-arguments, + too-many-boolean-expressions, + too-many-branches, + too-many-instance-attributes, + too-many-locals, + too-many-nested-blocks, + too-many-public-methods, + too-many-return-statements, + too-many-statements, + trailing-newlines, + unichr-builtin, + unicode-builtin, + unnecessary-pass, + unpacking-in-except, + useless-else-on-loop, + useless-object-inheritance, + useless-suppression, + using-cmp-argument, + wrong-import-order, + xrange-builtin, + zip-builtin-not-iterating, + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=main,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl + +# Regular expression matching correct function names +function-rgx=^(?:(?PsetUp|tearDown|setUpModule|tearDownModule)|(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ + +# Regular expression matching correct variable names +variable-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct constant names +const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression matching correct attribute names +attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ + +# Regular expression matching correct argument names +argument-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=^_?[A-Z][a-zA-Z0-9]*$ + +# Regular expression matching correct module names +module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$ + +# Regular expression matching correct method names +method-rgx=(?x)^(?:(?P_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=10 + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt +# lines made too long by directives to pytype. + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=(?x)( + ^\s*(\#\ )??$| + ^\s*(from\s+\S+\s+)?import\s+.+$) + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=yes + +# Maximum number of lines in a module +max-module-lines=99999 + +# String used as indentation unit. The internal Google style guide mandates 2 +# spaces. Google's externaly-published style guide says 4, consistent with +# PEP 8. Here, we use 2 spaces, for conformity with many open-sourced Google +# projects (like TensorFlow). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=TODO + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=yes + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging,absl.logging,tensorflow.io.logging + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub, + TERMIOS, + Bastion, + rexec, + sets + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant, absl + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls, + class_ + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=StandardError, + Exception, + BaseException diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 0000000..fdd0723 --- /dev/null +++ b/.style.yapf @@ -0,0 +1,2 @@ +[style] +based_on_style = yapf diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index f9364eb..c0daec0 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Main module for Python Cloud Debugger. The debugger is enabled in a very similar way to enabling pdb. @@ -49,15 +48,14 @@ def _StartDebugger(): global _breakpoints_manager cdbg_native.InitializeModule(_flags) - cdbg_native.LogInfo('Initializing Cloud Debugger Python agent version: %s' % - __version__) + cdbg_native.LogInfo( + f'Initializing Cloud Debugger Python agent version: {__version__}') _hub_client = gcp_hub_client.GcpHubClient() visibility_policy = _GetVisibilityPolicy() _breakpoints_manager = breakpoints_manager.BreakpointsManager( - _hub_client, - visibility_policy) + _hub_client, visibility_policy) # Set up loggers for logpoints. capture_collector.SetLogger(logging.getLogger()) @@ -69,8 +67,7 @@ def _StartDebugger(): _breakpoints_manager.SetActiveBreakpoints) _hub_client.on_idle = _breakpoints_manager.CheckBreakpointsExpiration _hub_client.SetupAuth( - _flags.get('project_id'), - _flags.get('project_number'), + _flags.get('project_id'), _flags.get('project_number'), _flags.get('service_account_json_file')) _hub_client.SetupCanaryMode( _flags.get('breakpoint_enable_canary'), @@ -85,7 +82,7 @@ def _GetVisibilityPolicy(): visibility_config = yaml_data_visibility_config_reader.OpenAndRead() except yaml_data_visibility_config_reader.Error as err: return error_data_visibility_policy.ErrorDataVisibilityPolicy( - 'Could not process debugger config: %s' % err) + f'Could not process debugger config: {err}') if visibility_config: return glob_data_visibility_policy.GlobDataVisibilityPolicy( @@ -121,16 +118,18 @@ def _DebuggerMain(): sys.path[0] = os.path.dirname(app_path) - import __main__ # pylint: disable=g-import-not-at-top + import __main__ # pylint: disable=import-outside-toplevel __main__.__dict__.clear() - __main__.__dict__.update({'__name__': '__main__', - '__file__': app_path, - '__builtins__': __builtins__}) + __main__.__dict__.update({ + '__name__': '__main__', + '__file__': app_path, + '__builtins__': __builtins__ + }) locals = globals = __main__.__dict__ # pylint: disable=redefined-builtin sys.modules['__main__'] = __main__ - with open(app_path) as f: + with open(app_path, encoding='utf-8') as f: code = compile(f.read(), app_path, 'exec') exec(code, globals, locals) # pylint: disable=exec-used diff --git a/src/googleclouddebugger/__main__.py b/src/googleclouddebugger/__main__.py index 1f55572..edfe6c0 100644 --- a/src/googleclouddebugger/__main__.py +++ b/src/googleclouddebugger/__main__.py @@ -11,10 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Entry point for Python Cloud Debugger.""" # pylint: disable=invalid-name if __name__ == '__main__': import googleclouddebugger googleclouddebugger._DebuggerMain() - diff --git a/src/googleclouddebugger/appengine_pretty_printers.py b/src/googleclouddebugger/appengine_pretty_printers.py index 036caad..9136d04 100644 --- a/src/googleclouddebugger/appengine_pretty_printers.py +++ b/src/googleclouddebugger/appengine_pretty_printers.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Formatters for well known objects that don't show up nicely by default.""" import six diff --git a/src/googleclouddebugger/application_info.py b/src/googleclouddebugger/application_info.py index 9909f37..045c06b 100644 --- a/src/googleclouddebugger/application_info.py +++ b/src/googleclouddebugger/application_info.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Module to fetch information regarding the current application. Some examples of the information the methods in this module fetch are platform @@ -65,8 +64,8 @@ def GetRegion(): # Otherwise try fetching it from the metadata server. try: - response = requests.get(_GCP_METADATA_REGION_URL, - headers=_GCP_METADATA_HEADER) + response = requests.get( + _GCP_METADATA_REGION_URL, headers=_GCP_METADATA_HEADER) response.raise_for_status() # Example of response text: projects/id/regions/us-central1. So we strip # everything before the last /. diff --git a/src/googleclouddebugger/backoff.py b/src/googleclouddebugger/backoff.py index edc024f..f12237d 100644 --- a/src/googleclouddebugger/backoff.py +++ b/src/googleclouddebugger/backoff.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Implements exponential backoff for retry timeouts.""" diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index 07f4094..b9843d0 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Manages lifetime of individual breakpoint objects.""" from datetime import datetime @@ -40,9 +39,7 @@ class BreakpointsManager(object): of a captured variable. May be None if no policy is available. """ - def __init__(self, - hub_client, - data_visibility_policy): + def __init__(self, hub_client, data_visibility_policy): self._hub_client = hub_client self.data_visibility_policy = data_visibility_policy @@ -78,13 +75,11 @@ def SetActiveBreakpoints(self, breakpoints_data): # Create new breakpoints. self._active.update([ (x['id'], - python_breakpoint.PythonBreakpoint( - x, - self._hub_client, - self, - self.data_visibility_policy)) + python_breakpoint.PythonBreakpoint(x, self._hub_client, self, + self.data_visibility_policy)) for x in breakpoints_data - if x['id'] in ids - six.viewkeys(self._active) - self._completed]) + if x['id'] in ids - six.viewkeys(self._active) - self._completed + ]) # Remove entries from completed_breakpoints_ that weren't listed in # breakpoints_data vector. These are confirmed to have been removed by the diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index fc79366..d57c7b4 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Captures application state on a breakpoint hit.""" # TODO: rename this file to collector.py. @@ -280,8 +279,8 @@ def __init__(self, definition, data_visibility_policy): # because in the case where the user has not indicated a preference, we # don't want a single large object on the stack to use the entire max_size # quota and hide the rest of the data. - self.expression_capture_limits = _CaptureLimits(max_value_len=32768, - max_list_items=32768) + self.expression_capture_limits = _CaptureLimits( + max_value_len=32768, max_list_items=32768) def Collect(self, top_frame): """Collects call stack, local variables and objects. @@ -301,8 +300,9 @@ def Collect(self, top_frame): # Evaluate watched expressions. if 'expressions' in self.breakpoint: self.breakpoint['evaluatedExpressions'] = [ - self._CaptureExpression(top_frame, expression) for expression - in self.breakpoint['expressions']] + self._CaptureExpression(top_frame, expression) + for expression in self.breakpoint['expressions'] + ] while frame and (len(breakpoint_frames) < self.max_frames): line = top_line if frame == top_frame else frame.f_lineno @@ -332,7 +332,10 @@ def Collect(self, top_frame): 'description': { 'format': ('INTERNAL ERROR: Failed while capturing locals ' 'of frame $0: $1'), - 'parameters': [str(len(breakpoint_frames)), str(e)]}} + 'parameters': [str(len(breakpoint_frames)), + str(e)] + } + } # Number of entries in _var_table. Starts at 1 (index 0 is the 'buffer full' # status value). @@ -340,10 +343,12 @@ def Collect(self, top_frame): # Explore variables table in BFS fashion. The variables table will grow # inside CaptureVariable as we encounter new references. - while (num_vars < len(self._var_table)) and ( - self._total_size < self.max_size): + while (num_vars < len(self._var_table)) and (self._total_size < + self.max_size): self._var_table[num_vars] = self.CaptureVariable( - self._var_table[num_vars], 0, self.default_capture_limits, + self._var_table[num_vars], + 0, + self.default_capture_limits, can_enqueue=False) # Move on to the next entry in the variable table. @@ -367,18 +372,22 @@ def CaptureFrameLocals(self, frame): (arguments, locals) tuple. """ # Capture all local variables (including method arguments). - variables = {n: self.CaptureNamedVariable(n, v, 1, - self.default_capture_limits) - for n, v in six.viewitems(frame.f_locals)} + variables = { + n: self.CaptureNamedVariable(n, v, 1, self.default_capture_limits) + for n, v in six.viewitems(frame.f_locals) + } # Split between locals and arguments (keeping arguments in the right order). nargs = frame.f_code.co_argcount - if frame.f_code.co_flags & inspect.CO_VARARGS: nargs += 1 - if frame.f_code.co_flags & inspect.CO_VARKEYWORDS: nargs += 1 + if frame.f_code.co_flags & inspect.CO_VARARGS: + nargs += 1 + if frame.f_code.co_flags & inspect.CO_VARKEYWORDS: + nargs += 1 frame_arguments = [] for argname in frame.f_code.co_varnames[:nargs]: - if argname in variables: frame_arguments.append(variables.pop(argname)) + if argname in variables: + frame_arguments.append(variables.pop(argname)) return (frame_arguments, list(six.viewvalues(variables))) @@ -400,8 +409,9 @@ def CaptureNamedVariable(self, name, value, depth, limits): name = str(id(name)) self._total_size += len(name) - v = (self.CheckDataVisibility(value) or - self.CaptureVariable(value, depth, limits)) + v = ( + self.CheckDataVisibility(value) or + self.CaptureVariable(value, depth, limits)) v['name'] = name return v @@ -449,23 +459,30 @@ def CaptureVariablesList(self, items, depth, empty_message, limits): """ v = [] for name, value in items: - if (self._total_size >= self.max_size) or ( - len(v) >= limits.max_list_items): + if (self._total_size >= self.max_size) or (len(v) >= + limits.max_list_items): v.append({ 'status': { 'refersTo': 'VARIABLE_VALUE', 'description': { - 'format': - ('Only first $0 items were captured. Use in an ' - 'expression to see all items.'), - 'parameters': [str(len(v))]}}}) + 'format': ('Only first $0 items were captured. Use in an ' + 'expression to see all items.'), + 'parameters': [str(len(v))] + } + } + }) break v.append(self.CaptureNamedVariable(name, value, depth, limits)) if not v: - return [{'status': { - 'refersTo': 'VARIABLE_NAME', - 'description': {'format': empty_message}}}] + return [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': { + 'format': empty_message + } + } + }] return v @@ -508,31 +525,34 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): return {'value': 'None'} if isinstance(value, _PRIMITIVE_TYPES): - r = _TrimString(repr(value), # Primitive type, always immutable. - min(limits.max_value_len, - self.max_size - self._total_size)) + r = _TrimString( + repr(value), # Primitive type, always immutable. + min(limits.max_value_len, self.max_size - self._total_size)) self._total_size += len(r) return {'value': r, 'type': type(value).__name__} if isinstance(value, _DATE_TYPES): r = str(value) # Safe to call str(). self._total_size += len(r) - return {'value': r, 'type': 'datetime.'+ type(value).__name__} + return {'value': r, 'type': 'datetime.' + type(value).__name__} if isinstance(value, dict): # Do not use iteritems() here. If GC happens during iteration (which it # often can for dictionaries containing large variables), you will get a # RunTimeError exception. items = [(repr(k), v) for (k, v) in value.items()] - return {'members': - self.CaptureVariablesList(items, depth + 1, - EMPTY_DICTIONARY, limits), - 'type': 'dict'} + return { + 'members': + self.CaptureVariablesList(items, depth + 1, EMPTY_DICTIONARY, + limits), + 'type': + 'dict' + } if isinstance(value, _VECTOR_TYPES): fields = self.CaptureVariablesList( - (('[%d]' % i, x) for i, x in enumerate(value)), - depth + 1, EMPTY_COLLECTION, limits) + (('[%d]' % i, x) for i, x in enumerate(value)), depth + 1, + EMPTY_COLLECTION, limits) return {'members': fields, 'type': type(value).__name__} if isinstance(value, types.FunctionType): @@ -542,8 +562,8 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): if isinstance(value, Exception): fields = self.CaptureVariablesList( - (('[%d]' % i, x) for i, x in enumerate(value.args)), - depth + 1, EMPTY_COLLECTION, limits) + (('[%d]' % i, x) for i, x in enumerate(value.args)), depth + 1, + EMPTY_COLLECTION, limits) return {'members': fields, 'type': type(value).__name__} if can_enqueue: @@ -561,10 +581,13 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): continue fields, object_type = pretty_value - return {'members': + return { + 'members': self.CaptureVariablesList(fields, depth + 1, OBJECT_HAS_NO_FIELDS, limits), - 'type': object_type} + 'type': + object_type + } if not hasattr(value, '__dict__'): # TODO: keep "value" empty and populate the "type" field instead. @@ -580,8 +603,8 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): # Only limits.max_list_items + 1 items are copied, anything past that will # get ignored by CaptureVariablesList(). items = list(itertools.islice(items, limits.max_list_items + 1)) - members = self.CaptureVariablesList(items, depth + 2, - OBJECT_HAS_NO_FIELDS, limits) + members = self.CaptureVariablesList(items, depth + 2, OBJECT_HAS_NO_FIELDS, + limits) v = {'members': members} type_string = DetermineType(value) @@ -736,8 +759,12 @@ def Log(self, frame): """ # Return error if log methods were not configured globally. if not self._log_message: - return {'isError': True, - 'description': {'format': LOG_ACTION_NOT_SUPPORTED}} + return { + 'isError': True, + 'description': { + 'format': LOG_ACTION_NOT_SUPPORTED + } + } if self._quota_recovery_start_time: ms_elapsed = (time.time() - self._quota_recovery_start_time) * 1000 @@ -778,8 +805,10 @@ def _EvaluateExpressions(self, frame): Array of strings where each string corresponds to the breakpoint expression with the same index. """ - return [self._FormatExpression(frame, expression) for expression in - self._definition.get('expressions') or []] + return [ + self._FormatExpression(frame, expression) + for expression in self._definition.get('expressions') or [] + ] def _FormatExpression(self, frame, expression): """Evaluates a single watched expression and formats it into a string form. @@ -819,8 +848,7 @@ def _FormatValue(self, value, level=0): def FormatDictItem(key_value): """Formats single dictionary item.""" key, value = key_value - return (self._FormatValue(key, level + 1) + - ': ' + + return (self._FormatValue(key, level + 1) + ': ' + self._FormatValue(value, level + 1)) def LimitedEnumerate(items, formatter, level=0): @@ -840,8 +868,9 @@ def FormatList(items, formatter, level=0): return ', '.join(LimitedEnumerate(items, formatter, level=level)) if isinstance(value, _PRIMITIVE_TYPES): - return _TrimString(repr(value), # Primitive type, always immutable. - self.max_value_len) + return _TrimString( + repr(value), # Primitive type, always immutable. + self.max_value_len) if isinstance(value, _DATE_TYPES): return str(value) @@ -853,8 +882,11 @@ def FormatList(items, formatter, level=0): return '{' + FormatList(six.iteritems(value), FormatDictItem) + '}' if isinstance(value, _VECTOR_TYPES): - return _ListTypeFormatString(value).format(FormatList( - value, lambda item: self._FormatValue(item, level + 1), level=level)) + return _ListTypeFormatString(value).format( + FormatList( + value, + lambda item: self._FormatValue(item, level + 1), + level=level)) if isinstance(value, types.FunctionType): return 'function ' + value.__name__ @@ -884,14 +916,18 @@ def _EvaluateExpression(frame, expression): 'refersTo': 'VARIABLE_NAME', 'description': { 'format': 'Invalid expression', - 'parameters': [str(e)]}}) + 'parameters': [str(e)] + } + }) except SyntaxError as e: return (False, { 'isError': True, 'refersTo': 'VARIABLE_NAME', 'description': { 'format': 'Expression could not be compiled: $0', - 'parameters': [e.msg]}}) + 'parameters': [e.msg] + } + }) try: return (True, native.CallImmutable(frame, code)) @@ -901,7 +937,9 @@ def _EvaluateExpression(frame, expression): 'refersTo': 'VARIABLE_VALUE', 'description': { 'format': 'Exception occurred: $0', - 'parameters': [str(e)]}}) + 'parameters': [str(e)] + } + }) def _GetFrameCodeObjectName(frame): @@ -917,8 +955,8 @@ def _GetFrameCodeObjectName(frame): # This functions under the assumption that member functions will name their # first parameter argument 'self' but has some edge-cases. if frame.f_code.co_argcount >= 1 and 'self' == frame.f_code.co_varnames[0]: - return (frame.f_locals['self'].__class__.__name__ + - '.' + frame.f_code.co_name) + return (frame.f_locals['self'].__class__.__name__ + '.' + + frame.f_code.co_name) else: return frame.f_code.co_name @@ -933,6 +971,7 @@ def _FormatMessage(template, parameters): Returns: Formatted message with parameters embedded in template placeholders. """ + def GetParameter(m): try: return parameters[int(m.group(0)[1:])] @@ -947,4 +986,4 @@ def _TrimString(s, max_len): """Trims the string if it exceeds max_len.""" if len(s) <= max_len: return s - return s[:max_len+1] + '...' + return s[:max_len + 1] + '...' diff --git a/src/googleclouddebugger/error_data_visibility_policy.py b/src/googleclouddebugger/error_data_visibility_policy.py index a604578..0a04c36 100644 --- a/src/googleclouddebugger/error_data_visibility_policy.py +++ b/src/googleclouddebugger/error_data_visibility_policy.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Always returns the provided error on visibility requests. Example Usage: diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 6214479..750c8ed 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Communicates with Cloud Debugger backend over HTTP.""" from collections import deque @@ -28,8 +27,6 @@ import time import traceback - - import google_auth_httplib2 import googleapiclient import googleapiclient.discovery @@ -143,6 +140,7 @@ def filter(self, record): if os.path.splitext(f[1])[0] == self._my_filename: return False return True + self._log_filter = _ChildLogFilter({logging.INFO}) googleapiclient.discovery.logger.addFilter(self._log_filter) @@ -196,9 +194,11 @@ def InitializeDebuggeeLabels(self, flags): self._debuggee_labels[labels.Debuggee.VERSION] = 'unversioned' if flags: - self._debuggee_labels.update( - {name: value for (name, value) in six.iteritems(flags) - if name in _DEBUGGEE_LABELS}) + self._debuggee_labels.update({ + name: value + for (name, value) in six.iteritems(flags) + if name in _DEBUGGEE_LABELS + }) self._debuggee_labels[labels.Debuggee.PROJECT_ID] = self._project_id @@ -414,9 +414,8 @@ def _ListActiveBreakpoints(self, service): breakpoints = response.get('breakpoints') or [] if self._breakpoints != breakpoints: self._breakpoints = breakpoints - native.LogInfo( - 'Breakpoints list changed, %d active, wait token: %s' % ( - len(self._breakpoints), self._wait_token)) + native.LogInfo('Breakpoints list changed, %d active, wait token: %s' % + (len(self._breakpoints), self._wait_token)) self.on_active_breakpoints_changed(copy.deepcopy(self._breakpoints)) except BaseException: native.LogInfo('Failed to query active breakpoints: ' + @@ -461,11 +460,14 @@ def _TransmitBreakpointUpdates(self, service): try: service.debuggees().breakpoints().update( - debuggeeId=self._debuggee_id, id=breakpoint['id'], - body={'breakpoint': breakpoint}).execute() - - native.LogInfo('Breakpoint %s update transmitted successfully' % ( - breakpoint['id'])) + debuggeeId=self._debuggee_id, + id=breakpoint['id'], + body={ + 'breakpoint': breakpoint + }).execute() + + native.LogInfo('Breakpoint %s update transmitted successfully' % + (breakpoint['id'])) except googleapiclient.errors.HttpError as err: # Treat 400 error codes (except timeout) as application error that will # not be retried. All other errors are assumed to be transient. @@ -495,9 +497,8 @@ def _TransmitBreakpointUpdates(self, service): # Socket errors shouldn't persist like this; reconnect. reconnect = True except BaseException: - native.LogWarning( - 'Fatal error sending breakpoint %s update: %s' % ( - breakpoint['id'], traceback.format_exc())) + native.LogWarning('Fatal error sending breakpoint %s update: %s' % + (breakpoint['id'], traceback.format_exc())) reconnect = True self._transmission_queue.extend(retry_list) @@ -513,8 +514,8 @@ def _GetDebuggee(self): """Builds the debuggee structure.""" major_version = 'v' + version.__version__.split('.')[0] python_version = ''.join(platform.python_version().split('.')[:2]) - agent_version = ('google.com/python%s-gcp/%s' % (python_version, - major_version)) + agent_version = ('google.com/python%s-gcp/%s' % + (python_version, major_version)) debuggee = { 'project': self._project_number, diff --git a/src/googleclouddebugger/glob_data_visibility_policy.py b/src/googleclouddebugger/glob_data_visibility_policy.py index 00255ef..275e69a 100644 --- a/src/googleclouddebugger/glob_data_visibility_policy.py +++ b/src/googleclouddebugger/glob_data_visibility_policy.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Determines the visibility of python data and symbols. Example Usage: @@ -33,7 +32,6 @@ import fnmatch - # Possible visibility responses RESPONSES = { 'UNKNOWN_TYPE': 'could not determine type', @@ -86,4 +84,3 @@ def _Matches(path, pattern_list): """ # Note: This code does not scale to large pattern_list sizes. return any(fnmatch.fnmatchcase(path, pattern) for pattern in pattern_list) - diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index 1aeb89f..3dafd13 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Support for breakpoints on modules that haven't been loaded yet. This is the new module import hook which: @@ -168,8 +167,11 @@ def _ProcessImportBySuffix(name, fromlist, globals): # pylint: disable=redefined-builtin, g-doc-args, g-doc-return-or-yield -def _ImportHookBySuffix( - name, globals=None, locals=None, fromlist=None, level=None): +def _ImportHookBySuffix(name, + globals=None, + locals=None, + fromlist=None, + level=None): """Callback when an import statement is executed by the Python interpreter. Argument names have to exactly match those of __import__. Otherwise calls @@ -272,6 +274,7 @@ def _GenerateNames(name, fromlist, globals): the execution of this import statement. The returned set may contain names that are not real modules. """ + def GetCurrentPackage(globals): """Finds the name of the package for the currently executing module.""" if not globals: @@ -375,6 +378,7 @@ def _InvokeImportCallbackBySuffix(names): to a module. The list is expected to be much smaller than the exact sys.modules so that a linear search is not as costly. """ + def GetModuleFromName(name, path): """Returns the loaded module for this name/path, or None if not found. diff --git a/src/googleclouddebugger/labels.py b/src/googleclouddebugger/labels.py index d22129a..1bca819 100644 --- a/src/googleclouddebugger/labels.py +++ b/src/googleclouddebugger/labels.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Defines the keys of the well known labels used by the cloud debugger. TODO: Define these strings in a common format for all agents to diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index e894427..2887655 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Finds all the code objects defined by a module.""" import gc @@ -154,6 +153,7 @@ def _FindCodeObjectsReferents(module, start_objects, visit_recorder): Returns: List of code objects. """ + def CheckIgnoreCodeObject(code_object): """Checks if the code object can be ignored. @@ -188,9 +188,8 @@ def CheckIgnoreClass(cls): if not cls_module: return False # We can't tell for sure, so explore this class. - return ( - cls_module is not module and - getattr(cls_module, '__file__', None) != module.__file__) + return (cls_module is not module and + getattr(cls_module, '__file__', None) != module.__file__) code_objects = set() current = start_objects diff --git a/src/googleclouddebugger/module_search2.py b/src/googleclouddebugger/module_search2.py index f7e5de8..e8d29f3 100644 --- a/src/googleclouddebugger/module_search2.py +++ b/src/googleclouddebugger/module_search2.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Inclusive search for module files.""" import os @@ -68,6 +67,7 @@ def Search(path): AssertionError: if the provided path is an absolute path, or if it does not have a .py extension. """ + def SearchCandidates(p): """Generates all candidates for the fuzzy search of p.""" while p: @@ -103,4 +103,3 @@ def SearchCandidates(p): # A matching file was not found in sys.path directories. return path - diff --git a/src/googleclouddebugger/module_utils2.py b/src/googleclouddebugger/module_utils2.py index 996209f..738fc8c 100644 --- a/src/googleclouddebugger/module_utils2.py +++ b/src/googleclouddebugger/module_utils2.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Provides utility functions for module path processing.""" import os @@ -29,9 +28,8 @@ def IsPathSuffix(mod_path, path): Returns: True if path is a full path suffix of mod_path. False otherwise. """ - return (mod_path.endswith(path) and - (len(mod_path) == len(path) or - mod_path[:-len(path)].endswith(os.sep))) + return (mod_path.endswith(path) and (len(mod_path) == len(path) or + mod_path[:-len(path)].endswith(os.sep))) def GetLoadedModuleBySuffix(path): diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 6a96a34..4d86ce1 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Handles a single Python breakpoint.""" from datetime import datetime @@ -36,8 +35,7 @@ 'version of the service you are trying to debug.') ERROR_LOCATION_MULTIPLE_MODULES_1 = ( 'Multiple modules matching $0. Please specify the module path.') -ERROR_LOCATION_MULTIPLE_MODULES_3 = ( - 'Multiple modules matching $0 ($1, $2)') +ERROR_LOCATION_MULTIPLE_MODULES_3 = ('Multiple modules matching $0 ($1, $2)') ERROR_LOCATION_MULTIPLE_MODULES_4 = ( 'Multiple modules matching $0 ($1, $2, and $3 more)') ERROR_LOCATION_NO_CODE_FOUND_AT_LINE_2 = 'No code found at line $0 in $1' @@ -54,30 +52,40 @@ 'the snapshot to a less frequently called statement.') ERROR_CONDITION_MUTABLE_0 = ( 'Only immutable expressions can be used in snapshot conditions') -ERROR_AGE_SNAPSHOT_EXPIRED_0 = ( - 'The snapshot has expired') -ERROR_AGE_LOGPOINT_EXPIRED_0 = ( - 'The logpoint has expired') -ERROR_UNSPECIFIED_INTERNAL_ERROR = ( - 'Internal error occurred') +ERROR_AGE_SNAPSHOT_EXPIRED_0 = ('The snapshot has expired') +ERROR_AGE_LOGPOINT_EXPIRED_0 = ('The logpoint has expired') +ERROR_UNSPECIFIED_INTERNAL_ERROR = ('Internal error occurred') # Status messages for different breakpoint events (except of "hit"). -_BREAKPOINT_EVENT_STATUS = dict( - [(native.BREAKPOINT_EVENT_ERROR, - {'isError': True, - 'description': {'format': ERROR_UNSPECIFIED_INTERNAL_ERROR}}), - (native.BREAKPOINT_EVENT_GLOBAL_CONDITION_QUOTA_EXCEEDED, - {'isError': True, - 'refersTo': 'BREAKPOINT_CONDITION', - 'description': {'format': ERROR_CONDITION_GLOBAL_QUOTA_EXCEEDED_0}}), - (native.BREAKPOINT_EVENT_BREAKPOINT_CONDITION_QUOTA_EXCEEDED, - {'isError': True, - 'refersTo': 'BREAKPOINT_CONDITION', - 'description': {'format': ERROR_CONDITION_BREAKPOINT_QUOTA_EXCEEDED_0}}), - (native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE, - {'isError': True, - 'refersTo': 'BREAKPOINT_CONDITION', - 'description': {'format': ERROR_CONDITION_MUTABLE_0}})]) +_BREAKPOINT_EVENT_STATUS = dict([ + (native.BREAKPOINT_EVENT_ERROR, { + 'isError': True, + 'description': { + 'format': ERROR_UNSPECIFIED_INTERNAL_ERROR + } + }), + (native.BREAKPOINT_EVENT_GLOBAL_CONDITION_QUOTA_EXCEEDED, { + 'isError': True, + 'refersTo': 'BREAKPOINT_CONDITION', + 'description': { + 'format': ERROR_CONDITION_GLOBAL_QUOTA_EXCEEDED_0 + } + }), + (native.BREAKPOINT_EVENT_BREAKPOINT_CONDITION_QUOTA_EXCEEDED, { + 'isError': True, + 'refersTo': 'BREAKPOINT_CONDITION', + 'description': { + 'format': ERROR_CONDITION_BREAKPOINT_QUOTA_EXCEEDED_0 + } + }), + (native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE, { + 'isError': True, + 'refersTo': 'BREAKPOINT_CONDITION', + 'description': { + 'format': ERROR_CONDITION_MUTABLE_0 + } + }) +]) # The implementation of datetime.strptime imports an undocumented module called # _strptime. If it happens at the wrong time, we can get an exception about @@ -196,7 +204,11 @@ def __init__(self, definition, hub_client, breakpoints_manager, 'status': { 'isError': True, 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': {'format': ERROR_LOCATION_FILE_EXTENSION_0}}}) + 'description': { + 'format': ERROR_LOCATION_FILE_EXTENSION_0 + } + } + }) return # A flat init file is too generic; path must include package name. @@ -207,7 +219,10 @@ def __init__(self, definition, hub_client, breakpoints_manager, 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', 'description': { 'format': ERROR_LOCATION_MULTIPLE_MODULES_1, - 'parameters': [path]}}}) + 'parameters': [path] + } + } + }) return new_path = module_search2.Search(path) @@ -217,8 +232,7 @@ def __init__(self, definition, hub_client, breakpoints_manager, self._ActivateBreakpoint(new_module) else: self._import_hook_cleanup = imphook2.AddImportCallbackBySuffix( - new_path, - self._ActivateBreakpoint) + new_path, self._ActivateBreakpoint) def Clear(self): """Clears the breakpoint and releases all breakpoint resources. @@ -263,7 +277,11 @@ def ExpireBreakpoint(self): 'status': { 'isError': True, 'refersTo': 'BREAKPOINT_AGE', - 'description': {'format': message}}}) + 'description': { + 'format': message + } + } + }) def _ActivateBreakpoint(self, module): """Sets the breakpoint in the loaded module, or complete with error.""" @@ -300,16 +318,18 @@ def _ActivateBreakpoint(self, module): 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', 'description': { 'format': fmt, - 'parameters': params}}}) + 'parameters': params + } + } + }) return # Compile the breakpoint condition. condition = None if self.definition.get('condition'): try: - condition = compile(self.definition.get('condition'), - '', - 'eval') + condition = compile( + self.definition.get('condition'), '', 'eval') except (TypeError, ValueError) as e: # condition string contains null bytes. self._CompleteBreakpoint({ @@ -318,7 +338,10 @@ def _ActivateBreakpoint(self, module): 'refersTo': 'BREAKPOINT_CONDITION', 'description': { 'format': 'Invalid expression', - 'parameters': [str(e)]}}}) + 'parameters': [str(e)] + } + } + }) return except SyntaxError as e: @@ -328,17 +351,17 @@ def _ActivateBreakpoint(self, module): 'refersTo': 'BREAKPOINT_CONDITION', 'description': { 'format': 'Expression could not be compiled: $0', - 'parameters': [e.msg]}}}) + 'parameters': [e.msg] + } + } + }) return - native.LogInfo('Creating new Python breakpoint %s in %s, line %d' % ( - self.GetBreakpointId(), codeobj, line)) + native.LogInfo('Creating new Python breakpoint %s in %s, line %d' % + (self.GetBreakpointId(), codeobj, line)) - self._cookie = native.CreateConditionalBreakpoint( - codeobj, - line, - condition, - self._BreakpointEvent) + self._cookie = native.CreateConditionalBreakpoint(codeobj, line, condition, + self._BreakpointEvent) native.ActivateConditionalBreakpoint(self._cookie) @@ -397,8 +420,8 @@ def _BreakpointEvent(self, event, frame): self._CompleteBreakpoint({'status': error_status}) return - collector = capture_collector.CaptureCollector( - self.definition, self.data_visibility_policy) + collector = capture_collector.CaptureCollector(self.definition, + self.data_visibility_policy) # TODO: This is a temporary try/except. All exceptions should be # caught inside Collect and converted into breakpoint error messages. @@ -406,17 +429,22 @@ def _BreakpointEvent(self, event, frame): collector.Collect(frame) except BaseException as e: # pylint: disable=broad-except native.LogInfo('Internal error during data capture: %s' % repr(e)) - error_status = {'isError': True, - 'description': { - 'format': ('Internal error while capturing data: %s' % - repr(e))}} + error_status = { + 'isError': True, + 'description': { + 'format': ('Internal error while capturing data: %s' % repr(e)) + } + } self._CompleteBreakpoint({'status': error_status}) return except: # pylint: disable=bare-except native.LogInfo('Unknown exception raised') - error_status = {'isError': True, - 'description': { - 'format': 'Unknown internal error'}} + error_status = { + 'isError': True, + 'description': { + 'format': 'Unknown internal error' + } + } self._CompleteBreakpoint({'status': error_status}) return diff --git a/src/googleclouddebugger/uniquifier_computer.py b/src/googleclouddebugger/uniquifier_computer.py index 873b110..8395f33 100644 --- a/src/googleclouddebugger/uniquifier_computer.py +++ b/src/googleclouddebugger/uniquifier_computer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Computes a unique identifier of the deployed application. When the application runs under AppEngine, the deployment is uniquely @@ -28,7 +27,6 @@ import os import sys - # Maximum recursion depth to follow when traversing the file system. This limit # will prevent stack overflow in case of a loop created by symbolic links. _MAX_DEPTH = 10 @@ -93,8 +91,7 @@ def ProcessDirectory(path, relative_path, depth=1): modules.add(file_name) ProcessApplicationFile(current_path, os.path.join(relative_path, name)) elif IsPackage(current_path): - ProcessDirectory(current_path, - os.path.join(relative_path, name), + ProcessDirectory(current_path, os.path.join(relative_path, name), depth + 1) def IsPackage(path): diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py index 198af80..1ebc406 100644 --- a/src/googleclouddebugger/yaml_data_visibility_config_reader.py +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Reads a YAML configuration file to determine visibility policy. Example Usage: @@ -114,8 +113,7 @@ def Read(f): try: return Config( - yaml_data.get('blacklist', ()), - yaml_data.get('whitelist', ('*'))) + yaml_data.get('blacklist', ()), yaml_data.get('whitelist', ('*'))) except UnicodeDecodeError as e: raise YAMLLoadError('%s' % e) @@ -125,8 +123,8 @@ def _CheckData(yaml_data): legal_keys = set(('blacklist', 'whitelist')) unknown_keys = set(yaml_data) - legal_keys if unknown_keys: - raise UnknownConfigKeyError( - 'Unknown keys in configuration: %s' % unknown_keys) + raise UnknownConfigKeyError('Unknown keys in configuration: %s' % + unknown_keys) for key, data in six.iteritems(yaml_data): _AssertDataIsList(key, data) diff --git a/tests/application_info_test.py b/tests/application_info_test.py index d247a25..51f7427 100644 --- a/tests/application_info_test.py +++ b/tests/application_info_test.py @@ -42,8 +42,7 @@ def test_get_region_gcf(self): """Returns correct region when the FUNCTION_REGION env variable is set.""" try: os.environ['FUNCTION_REGION'] = 'function-region' - self.assertEqual('function-region', - application_info.GetRegion()) + self.assertEqual('function-region', application_info.GetRegion()) finally: del os.environ['FUNCTION_REGION'] @@ -68,5 +67,6 @@ def test_get_region_metadata_server_fail(self, mock_requests_get): self.assertIsNone(application_info.GetRegion()) + if __name__ == '__main__': absltest.main() diff --git a/tests/breakpoints_manager_test.py b/tests/breakpoints_manager_test.py index 36ec946..269931f 100644 --- a/tests/breakpoints_manager_test.py +++ b/tests/breakpoints_manager_test.py @@ -28,21 +28,25 @@ def testEmpty(self): def testSetSingle(self): self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) - self._mock_breakpoint.assert_has_calls([ - mock.call({'id': 'ID1'}, self, self._breakpoints_manager, None)]) + self._mock_breakpoint.assert_has_calls( + [mock.call({'id': 'ID1'}, self, self._breakpoints_manager, None)]) self.assertLen(self._breakpoints_manager._active, 1) def testSetDouble(self): self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) - self._mock_breakpoint.assert_has_calls([ - mock.call({'id': 'ID1'}, self, self._breakpoints_manager, None)]) + self._mock_breakpoint.assert_has_calls( + [mock.call({'id': 'ID1'}, self, self._breakpoints_manager, None)]) self.assertLen(self._breakpoints_manager._active, 1) - self._breakpoints_manager.SetActiveBreakpoints( - [{'id': 'ID1'}, {'id': 'ID2'}]) + self._breakpoints_manager.SetActiveBreakpoints([{ + 'id': 'ID1' + }, { + 'id': 'ID2' + }]) self._mock_breakpoint.assert_has_calls([ mock.call({'id': 'ID1'}, self, self._breakpoints_manager, None), - mock.call({'id': 'ID2'}, self, self._breakpoints_manager, None)]) + mock.call({'id': 'ID2'}, self, self._breakpoints_manager, None) + ]) self.assertLen(self._breakpoints_manager._active, 2) def testSetRepeated(self): @@ -76,28 +80,52 @@ def testCompletedCleanup(self): self.assertEqual(1, self._mock_breakpoint.call_count) def testMultipleSetDelete(self): - self._breakpoints_manager.SetActiveBreakpoints( - [{'id': 'ID1'}, {'id': 'ID2'}, {'id': 'ID3'}, {'id': 'ID4'}]) + self._breakpoints_manager.SetActiveBreakpoints([{ + 'id': 'ID1' + }, { + 'id': 'ID2' + }, { + 'id': 'ID3' + }, { + 'id': 'ID4' + }]) self.assertLen(self._breakpoints_manager._active, 4) - self._breakpoints_manager.SetActiveBreakpoints( - [{'id': 'ID1'}, {'id': 'ID2'}, {'id': 'ID3'}, {'id': 'ID4'}]) + self._breakpoints_manager.SetActiveBreakpoints([{ + 'id': 'ID1' + }, { + 'id': 'ID2' + }, { + 'id': 'ID3' + }, { + 'id': 'ID4' + }]) self.assertLen(self._breakpoints_manager._active, 4) self._breakpoints_manager.SetActiveBreakpoints([]) self.assertEmpty(self._breakpoints_manager._active) def testCombination(self): - self._breakpoints_manager.SetActiveBreakpoints( - [{'id': 'ID1'}, {'id': 'ID2'}, {'id': 'ID3'}]) + self._breakpoints_manager.SetActiveBreakpoints([{ + 'id': 'ID1' + }, { + 'id': 'ID2' + }, { + 'id': 'ID3' + }]) self.assertLen(self._breakpoints_manager._active, 3) self._breakpoints_manager.CompleteBreakpoint('ID2') self.assertEqual(1, self._mock_breakpoint.return_value.Clear.call_count) self.assertLen(self._breakpoints_manager._active, 2) - self._breakpoints_manager.SetActiveBreakpoints( - [{'id': 'ID2'}, {'id': 'ID3'}, {'id': 'ID4'}]) + self._breakpoints_manager.SetActiveBreakpoints([{ + 'id': 'ID2' + }, { + 'id': 'ID3' + }, { + 'id': 'ID4' + }]) self.assertEqual(2, self._mock_breakpoint.return_value.Clear.call_count) self.assertLen(self._breakpoints_manager._active, 2) @@ -113,24 +141,28 @@ def testCheckExpirationNoBreakpoints(self): self._breakpoints_manager.CheckBreakpointsExpiration() def testCheckNotExpired(self): - self._breakpoints_manager.SetActiveBreakpoints( - [{'id': 'ID1'}, {'id': 'ID2'}]) + self._breakpoints_manager.SetActiveBreakpoints([{ + 'id': 'ID1' + }, { + 'id': 'ID2' + }]) self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( datetime.utcnow() + timedelta(minutes=1)) self._breakpoints_manager.CheckBreakpointsExpiration() self.assertEqual( - 0, - self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + 0, self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) def testCheckExpired(self): - self._breakpoints_manager.SetActiveBreakpoints( - [{'id': 'ID1'}, {'id': 'ID2'}]) + self._breakpoints_manager.SetActiveBreakpoints([{ + 'id': 'ID1' + }, { + 'id': 'ID2' + }]) self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( datetime.utcnow() - timedelta(minutes=1)) self._breakpoints_manager.CheckBreakpointsExpiration() self.assertEqual( - 2, - self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + 2, self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) def testCheckExpirationReset(self): self._breakpoints_manager.SetActiveBreakpoints([{'id': 'ID1'}]) @@ -138,17 +170,18 @@ def testCheckExpirationReset(self): datetime.utcnow() + timedelta(minutes=1)) self._breakpoints_manager.CheckBreakpointsExpiration() self.assertEqual( - 0, - self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + 0, self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) - self._breakpoints_manager.SetActiveBreakpoints( - [{'id': 'ID1'}, {'id': 'ID2'}]) + self._breakpoints_manager.SetActiveBreakpoints([{ + 'id': 'ID1' + }, { + 'id': 'ID2' + }]) self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( datetime.utcnow() - timedelta(minutes=1)) self._breakpoints_manager.CheckBreakpointsExpiration() self.assertEqual( - 2, - self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + 2, self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) def testCheckExpirationCacheNegative(self): base = datetime(2015, 1, 1) @@ -163,16 +196,14 @@ def testCheckExpirationCacheNegative(self): self._breakpoints_manager.CheckBreakpointsExpiration() self.assertEqual( - 0, - self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + 0, self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) # The nearest expiration time is cached, so this should have no effect. self._mock_breakpoint.return_value.GetExpirationTime.return_value = ( base - timedelta(minutes=1)) self._breakpoints_manager.CheckBreakpointsExpiration() self.assertEqual( - 0, - self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + 0, self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) def testCheckExpirationCachePositive(self): base = datetime(2015, 1, 1) @@ -186,14 +217,12 @@ def testCheckExpirationCachePositive(self): mock_time.return_value = base self._breakpoints_manager.CheckBreakpointsExpiration() self.assertEqual( - 0, - self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + 0, self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) mock_time.return_value = base + timedelta(minutes=2) self._breakpoints_manager.CheckBreakpointsExpiration() self.assertEqual( - 1, - self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) + 1, self._mock_breakpoint.return_value.ExpireBreakpoint.call_count) if __name__ == '__main__': diff --git a/tests/capture_collector_test.py b/tests/capture_collector_test.py index 935173f..8733497 100644 --- a/tests/capture_collector_test.py +++ b/tests/capture_collector_test.py @@ -20,8 +20,8 @@ 'quota is restored') -def CaptureCollectorWithDefaultLocation( - definition, data_visibility_policy=None): +def CaptureCollectorWithDefaultLocation(definition, + data_visibility_policy=None): """Makes a LogCollector with a default location. Args: @@ -80,6 +80,7 @@ def testCallStackLimitedFrames(self): self.assertEqual(frame_below_line, frame_below['location']['line']) def testCallStackLimitedExpandedFrames(self): + def CountLocals(frame): return len(frame['arguments']) + len(frame['locals']) @@ -99,10 +100,15 @@ def testSimpleArguments(self): def Method(unused_a, unused_b): self._collector.Collect(inspect.currentframe()) top_frame = self._collector.breakpoint['stackFrames'][0] - self.assertListEqual( - [{'name': 'unused_a', 'value': '158', 'type': 'int'}, - {'name': 'unused_b', 'value': "'hello'", 'type': 'str'}], - top_frame['arguments']) + self.assertListEqual([{ + 'name': 'unused_a', + 'value': '158', + 'type': 'int' + }, { + 'name': 'unused_b', + 'value': "'hello'", + 'type': 'str' + }], top_frame['arguments']) self.assertEqual('Method', top_frame['function']) self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) @@ -114,11 +120,19 @@ def testMethodWithFirstArgumentNamedSelf(self): def Method(self, unused_a, unused_b): # pylint: disable=unused-argument this._collector.Collect(inspect.currentframe()) top_frame = this._collector.breakpoint['stackFrames'][0] - this.assertListEqual( - [{'name': 'self', 'value': "'world'", 'type': 'str'}, - {'name': 'unused_a', 'value': '158', 'type': 'int'}, - {'name': 'unused_b', 'value': "'hello'", 'type': 'str'}], - top_frame['arguments']) + this.assertListEqual([{ + 'name': 'self', + 'value': "'world'", + 'type': 'str' + }, { + 'name': 'unused_a', + 'value': '158', + 'type': 'int' + }, { + 'name': 'unused_b', + 'value': "'hello'", + 'type': 'str' + }], top_frame['arguments']) # This is the incorrect function name, but we are validating that no # exceptions are thrown here. this.assertEqual('str.Method', top_frame['function']) @@ -132,11 +146,19 @@ def testMethodWithArgumentNamedSelf(self): def Method(unused_a, unused_b, self): # pylint: disable=unused-argument this._collector.Collect(inspect.currentframe()) top_frame = this._collector.breakpoint['stackFrames'][0] - this.assertListEqual( - [{'name': 'unused_a', 'value': '158', 'type': 'int'}, - {'name': 'unused_b', 'value': "'hello'", 'type': 'str'}, - {'name': 'self', 'value': "'world'", 'type': 'str'}], - top_frame['arguments']) + this.assertListEqual([{ + 'name': 'unused_a', + 'value': '158', + 'type': 'int' + }, { + 'name': 'unused_b', + 'value': "'hello'", + 'type': 'str' + }, { + 'name': 'self', + 'value': "'world'", + 'type': 'str' + }], top_frame['arguments']) this.assertEqual('Method', top_frame['function']) self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) @@ -146,9 +168,10 @@ def testClassMethod(self): self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) top_frame = self._collector.breakpoint['stackFrames'][0] - self.assertListEqual( - [{'name': 'self', 'varTableIndex': 1}], - top_frame['arguments']) + self.assertListEqual([{ + 'name': 'self', + 'varTableIndex': 1 + }], top_frame['arguments']) self.assertEqual('CaptureCollectorTest.testClassMethod', top_frame['function']) @@ -157,10 +180,14 @@ def testClassMethodWithOptionalArguments(self): def Method(unused_a, unused_optional='notneeded'): self._collector.Collect(inspect.currentframe()) top_frame = self._collector.breakpoint['stackFrames'][0] - self.assertListEqual( - [{'name': 'unused_a', 'varTableIndex': 1}, - {'name': 'unused_optional', 'value': "'notneeded'", 'type': 'str'}], - top_frame['arguments']) + self.assertListEqual([{ + 'name': 'unused_a', + 'varTableIndex': 1 + }, { + 'name': 'unused_optional', + 'value': "'notneeded'", + 'type': 'str' + }], top_frame['arguments']) self.assertEqual('Method', top_frame['function']) self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) @@ -171,11 +198,15 @@ def testClassMethodWithPositionalArguments(self): def Method(*unused_pos): self._collector.Collect(inspect.currentframe()) top_frame = self._collector.breakpoint['stackFrames'][0] - self.assertListEqual( - [{'name': 'unused_pos', - 'type': 'tuple', - 'members': [{'name': '[0]', 'value': '1', 'type': 'int'}]}], - top_frame['arguments']) + self.assertListEqual([{ + 'name': 'unused_pos', + 'type': 'tuple', + 'members': [{ + 'name': '[0]', + 'value': '1', + 'type': 'int' + }] + }], top_frame['arguments']) self.assertEqual('Method', top_frame['function']) self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) @@ -186,10 +217,15 @@ def testClassMethodWithKeywords(self): def Method(**unused_kwd): self._collector.Collect(inspect.currentframe()) top_frame = self._collector.breakpoint['stackFrames'][0] - self.assertCountEqual( - [{'name': "'first'", 'value': '1', 'type': 'int'}, - {'name': "'second'", 'value': '2', 'type': 'int'}], - top_frame['arguments'][0]['members']) + self.assertCountEqual([{ + 'name': "'first'", + 'value': '1', + 'type': 'int' + }, { + 'name': "'second'", + 'value': '2', + 'type': 'int' + }], top_frame['arguments'][0]['members']) self.assertEqual('Method', top_frame['function']) self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) @@ -204,6 +240,7 @@ def testNoLocalVariables(self): top_frame['function']) def testRuntimeError(self): + class BadDict(dict): def __init__(self, d): @@ -238,6 +275,7 @@ def __init__(self): }, var_a) def testBadDictionary(self): + class BadDict(dict): def items(self): @@ -281,12 +319,22 @@ def testLocalVariables(self): self._collector.Collect(inspect.currentframe()) top_frame = self._collector.breakpoint['stackFrames'][0] self.assertLen(top_frame['arguments'], 1) # just self. - self.assertCountEqual( - [{'name': 'unused_a', 'value': '8', 'type': 'int'}, - {'name': 'unused_b', 'value': 'True', 'type': 'bool'}, - {'name': 'unused_nothing', 'value': 'None'}, - {'name': 'unused_s', 'value': "'hippo'", 'type': 'str'}], - top_frame['locals']) + self.assertCountEqual([{ + 'name': 'unused_a', + 'value': '8', + 'type': 'int' + }, { + 'name': 'unused_b', + 'value': 'True', + 'type': 'bool' + }, { + 'name': 'unused_nothing', + 'value': 'None' + }, { + 'name': 'unused_s', + 'value': "'hippo'", + 'type': 'str' + }], top_frame['locals']) def testLocalVariablesWithBlacklist(self): unused_a = capture_collector.LineNoFilter() @@ -302,9 +350,8 @@ def IsDataVisible(name): mock_policy = mock.MagicMock() mock_policy.IsDataVisible.side_effect = IsDataVisible - self._collector = CaptureCollectorWithDefaultLocation( - {'id': 'BP_ID'}, - mock_policy) + self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}, + mock_policy) self._collector.Collect(inspect.currentframe()) top_frame = self._collector.breakpoint['stackFrames'][0] # Should be blocked @@ -312,22 +359,22 @@ def IsDataVisible(name): { 'name': 'unused_a', 'status': { - 'description': {'format': 'data blocked'}, + 'description': { + 'format': 'data blocked' + }, 'refersTo': 'VARIABLE_NAME', 'isError': True } - }, - top_frame['locals']) + }, top_frame['locals']) # Should not be blocked - self.assertIn( - { - 'name': 'unused_b', - 'value': '5', - 'type': 'int' - }, - top_frame['locals']) + self.assertIn({ + 'name': 'unused_b', + 'value': '5', + 'type': 'int' + }, top_frame['locals']) def testWatchedExpressionsBlacklisted(self): + class TestClass(object): def __init__(self): @@ -340,6 +387,7 @@ def IsDataVisible(name): if name == 'capture_collector_test.TestClass': return (False, 'data blocked') return (True, None) + mock_policy = mock.MagicMock() mock_policy.IsDataVisible.side_effect = IsDataVisible @@ -347,42 +395,45 @@ def IsDataVisible(name): { 'id': 'BP_ID', 'expressions': ['unused_a', 'unused_a.a'] - }, - mock_policy) + }, mock_policy) self._collector.Collect(inspect.currentframe()) # Class should be blocked self.assertIn( { 'name': 'unused_a', 'status': { - 'description': {'format': 'data blocked'}, + 'description': { + 'format': 'data blocked' + }, 'refersTo': 'VARIABLE_NAME', 'isError': True } - }, - self._collector.breakpoint['evaluatedExpressions']) + }, self._collector.breakpoint['evaluatedExpressions']) # TODO: Explicit member SHOULD also be blocked but this is # currently not implemented. After fixing the implementation, change # the test below to assert that it's blocked too. - self.assertIn( - { - 'name': 'unused_a.a', - 'type': 'int', - 'value': '5' - }, - self._collector.breakpoint['evaluatedExpressions']) + self.assertIn({ + 'name': 'unused_a.a', + 'type': 'int', + 'value': '5' + }, self._collector.breakpoint['evaluatedExpressions']) def testLocalsNonTopFrame(self): def Method(): self._collector.Collect(inspect.currentframe()) - self.assertListEqual( - [{'name': 'self', 'varTableIndex': 1}], - self._collector.breakpoint['stackFrames'][1]['arguments']) - self.assertCountEqual( - [{'name': 'unused_a', 'value': '47', 'type': 'int'}, - {'name': 'Method', 'value': 'function Method'}], - self._collector.breakpoint['stackFrames'][1]['locals']) + self.assertListEqual([{ + 'name': 'self', + 'varTableIndex': 1 + }], self._collector.breakpoint['stackFrames'][1]['arguments']) + self.assertCountEqual([{ + 'name': 'unused_a', + 'value': '47', + 'type': 'int' + }, { + 'name': 'Method', + 'value': 'function Method' + }], self._collector.breakpoint['stackFrames'][1]['locals']) unused_a = 47 self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) @@ -399,12 +450,20 @@ def testDictionaryMaxDepth(self): self._collector.default_capture_limits.max_depth = 3 self._collector.Collect(inspect.currentframe()) self.assertDictEqual( - {'name': 'd', - 'type': 'dict', - 'members': [{'name': "'inner'", - 'type': 'dict', - 'members': [{'name': "'inner'", 'varTableIndex': 0}]}]}, - self._LocalByName('d')) + { + 'name': + 'd', + 'type': + 'dict', + 'members': [{ + 'name': "'inner'", + 'type': 'dict', + 'members': [{ + 'name': "'inner'", + 'varTableIndex': 0 + }] + }] + }, self._LocalByName('d')) def testVectorMaxDepth(self): l = [] @@ -417,31 +476,42 @@ def testVectorMaxDepth(self): self._collector.default_capture_limits.max_depth = 3 self._collector.Collect(inspect.currentframe()) self.assertDictEqual( - {'name': 'l', - 'type': 'list', - 'members': [{'name': '[0]', - 'type': 'list', - 'members': [{'name': '[0]', 'varTableIndex': 0}]}]}, - self._LocalByName('l')) + { + 'name': + 'l', + 'type': + 'list', + 'members': [{ + 'name': '[0]', + 'type': 'list', + 'members': [{ + 'name': '[0]', + 'varTableIndex': 0 + }] + }] + }, self._LocalByName('l')) def testStringTrimming(self): unused_s = '123456789' self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.default_capture_limits.max_value_len = 8 self._collector.Collect(inspect.currentframe()) - self.assertListEqual( - [{'name': 'unused_s', 'value': "'12345678...", 'type': 'str'}], - self._collector.breakpoint['stackFrames'][0]['locals']) + self.assertListEqual([{ + 'name': 'unused_s', + 'value': "'12345678...", + 'type': 'str' + }], self._collector.breakpoint['stackFrames'][0]['locals']) def testBytearrayTrimming(self): unused_bytes = bytearray(range(20)) self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.default_capture_limits.max_value_len = 20 self._collector.Collect(inspect.currentframe()) - self.assertListEqual( - [{'name': 'unused_bytes', 'value': r"bytearray(b'\x00\x01\...", - 'type': 'bytearray'}], - self._collector.breakpoint['stackFrames'][0]['locals']) + self.assertListEqual([{ + 'name': 'unused_bytes', + 'value': r"bytearray(b'\x00\x01\...", + 'type': 'bytearray' + }], self._collector.breakpoint['stackFrames'][0]['locals']) def testObject(self): @@ -458,10 +528,15 @@ def __init__(self): self.assertEqual( __name__ + '.MyClass', self._collector.breakpoint['variableTable'][var_index]['type']) - self.assertCountEqual( - [{'name': 'a', 'value': '1', 'type': 'int'}, - {'name': 'b', 'value': '2', 'type': 'int'}], - self._collector.breakpoint['variableTable'][var_index]['members']) + self.assertCountEqual([{ + 'name': 'a', + 'value': '1', + 'type': 'int' + }, { + 'name': 'b', + 'value': '2', + 'type': 'int' + }], self._collector.breakpoint['variableTable'][var_index]['members']) def testBufferFullLocalRef(self): @@ -514,8 +589,10 @@ def Method(): self._collector.Collect(inspect.currentframe()) # Verify that one of {d1,d2} could fit and the other didn't. - var_indexes = [self._LocalByName(n)['members'][0]['varTableIndex'] == 0 - for n in ['unused_d1', 'unused_d2']] + var_indexes = [ + self._LocalByName(n)['members'][0]['varTableIndex'] == 0 + for n in ['unused_d1', 'unused_d2'] + ] self.assertEqual(1, sum(var_indexes)) Method() @@ -538,13 +615,21 @@ class MyClass(object): var_table = self._collector.breakpoint['variableTable'] self.assertDictEqual( - {'type': __name__ + '.MyClass', - 'members': [{'name': 'other', 'varTableIndex': m1_var_index}]}, - var_table[m2_var_index]) + { + 'type': __name__ + '.MyClass', + 'members': [{ + 'name': 'other', + 'varTableIndex': m1_var_index + }] + }, var_table[m2_var_index]) self.assertDictEqual( - {'type': __name__ + '.MyClass', - 'members': [{'name': 'other', 'varTableIndex': m2_var_index}]}, - var_table[m1_var_index]) + { + 'type': __name__ + '.MyClass', + 'members': [{ + 'name': 'other', + 'varTableIndex': m2_var_index + }] + }, var_table[m1_var_index]) def testCaptureVector(self): unused_my_list = [1, 2, 3, 4, 5] @@ -554,21 +639,53 @@ def testCaptureVector(self): self._collector.Collect(inspect.currentframe()) self.assertDictEqual( - {'name': 'unused_my_list', - 'type': 'list', - 'members': [{'name': '[0]', 'value': '1', 'type': 'int'}, - {'name': '[1]', 'value': '2', 'type': 'int'}, - {'name': '[2]', 'value': '3', 'type': 'int'}, - {'name': '[3]', 'value': '4', 'type': 'int'}, - {'name': '[4]', 'value': '5', 'type': 'int'}]}, - self._LocalByName('unused_my_list')) + { + 'name': + 'unused_my_list', + 'type': + 'list', + 'members': [{ + 'name': '[0]', + 'value': '1', + 'type': 'int' + }, { + 'name': '[1]', + 'value': '2', + 'type': 'int' + }, { + 'name': '[2]', + 'value': '3', + 'type': 'int' + }, { + 'name': '[3]', + 'value': '4', + 'type': 'int' + }, { + 'name': '[4]', + 'value': '5', + 'type': 'int' + }] + }, self._LocalByName('unused_my_list')) self.assertDictEqual( - {'name': 'unused_my_slice', - 'type': 'list', - 'members': [{'name': '[0]', 'value': '2', 'type': 'int'}, - {'name': '[1]', 'value': '3', 'type': 'int'}, - {'name': '[2]', 'value': '4', 'type': 'int'}]}, - self._LocalByName('unused_my_slice')) + { + 'name': + 'unused_my_slice', + 'type': + 'list', + 'members': [{ + 'name': '[0]', + 'value': '2', + 'type': 'int' + }, { + 'name': '[1]', + 'value': '3', + 'type': 'int' + }, { + 'name': '[2]', + 'value': '4', + 'type': 'int' + }] + }, self._LocalByName('unused_my_slice')) def testCaptureDictionary(self): unused_my_dict = { @@ -577,33 +694,73 @@ def testCaptureDictionary(self): (5, 6): 7, frozenset([5, 6]): 'frozen', 'vector': ['odin', 'dva', 'tri'], - 'inner': {1: 'one'}, - 'empty': {}} + 'inner': { + 1: 'one' + }, + 'empty': {} + } self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) frozenset_name = 'frozenset({5, 6})' if six.PY3 else 'frozenset([5, 6])' - self.assertCountEqual( - [{'name': "'first'", 'value': '1', 'type': 'int'}, - {'name': '3.14', 'value': "'pi'", 'type': 'str'}, - {'name': '(5, 6)', 'value': '7', 'type': 'int'}, - {'name': frozenset_name, 'value': "'frozen'", 'type': 'str'}, - {'name': "'vector'", - 'type': 'list', - 'members': [{'name': '[0]', 'value': "'odin'", 'type': 'str'}, - {'name': '[1]', 'value': "'dva'", 'type': 'str'}, - {'name': '[2]', 'value': "'tri'", 'type': 'str'}]}, - {'name': "'inner'", - 'type': 'dict', - 'members': [{'name': '1', 'value': "'one'", 'type': 'str'}]}, - {'name': "'empty'", - 'type': 'dict', - 'members': [ - {'status': { - 'refersTo': 'VARIABLE_NAME', - 'description': {'format': 'Empty dictionary'}}}]}], - self._LocalByName('unused_my_dict')['members']) + self.assertCountEqual([{ + 'name': "'first'", + 'value': '1', + 'type': 'int' + }, { + 'name': '3.14', + 'value': "'pi'", + 'type': 'str' + }, { + 'name': '(5, 6)', + 'value': '7', + 'type': 'int' + }, { + 'name': frozenset_name, + 'value': "'frozen'", + 'type': 'str' + }, { + 'name': + "'vector'", + 'type': + 'list', + 'members': [{ + 'name': '[0]', + 'value': "'odin'", + 'type': 'str' + }, { + 'name': '[1]', + 'value': "'dva'", + 'type': 'str' + }, { + 'name': '[2]', + 'value': "'tri'", + 'type': 'str' + }] + }, { + 'name': "'inner'", + 'type': 'dict', + 'members': [{ + 'name': '1', + 'value': "'one'", + 'type': 'str' + }] + }, { + 'name': + "'empty'", + 'type': + 'dict', + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': { + 'format': 'Empty dictionary' + } + } + }] + }], + self._LocalByName('unused_my_dict')['members']) def testEscapeDictionaryKey(self): unused_dict = {} @@ -617,10 +774,16 @@ def testEscapeDictionaryKey(self): unicode_name = "'\xe0'" if six.PY3 else "u'\\xe0'" unicode_value = "'\xe0'" if six.PY3 else "u'\\xe0'" - self.assertCountEqual( - [{'type': 'str', 'name': "'\\x88'", 'value': "'\\x88'"}, - {'type': unicode_type, 'name': unicode_name, 'value': unicode_value}], - self._LocalByName('unused_dict')['members']) + self.assertCountEqual([{ + 'type': 'str', + 'name': "'\\x88'", + 'value': "'\\x88'" + }, { + 'type': unicode_type, + 'name': unicode_name, + 'value': unicode_value + }], + self._LocalByName('unused_dict')['members']) def testOversizedList(self): unused_big_list = ['x'] * 10000 @@ -631,17 +794,23 @@ def testOversizedList(self): members = self._LocalByName('unused_big_list')['members'] self.assertLen(members, 26) - self.assertDictEqual({'name': '[7]', 'value': "'x'", 'type': 'str'}, - members[7]) + self.assertDictEqual({ + 'name': '[7]', + 'value': "'x'", + 'type': 'str' + }, members[7]) self.assertDictEqual( - {'status': { - 'refersTo': 'VARIABLE_VALUE', - 'description': { - 'format': - ('Only first $0 items were captured. Use in an expression' - ' to see all items.'), - 'parameters': ['25']}}}, - members[25]) + { + 'status': { + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': ( + 'Only first $0 items were captured. Use in an expression' + ' to see all items.'), + 'parameters': ['25'] + } + } + }, members[25]) def testOversizedDictionary(self): unused_big_dict = {'item' + str(i): i**2 for i in range(26)} @@ -653,14 +822,17 @@ def testOversizedDictionary(self): self.assertLen(members, 26) self.assertDictEqual( - {'status': { - 'refersTo': 'VARIABLE_VALUE', - 'description': { - 'format': - ('Only first $0 items were captured. Use in an expression' - ' to see all items.'), - 'parameters': ['25']}}}, - members[25]) + { + 'status': { + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': ( + 'Only first $0 items were captured. Use in an expression' + ' to see all items.'), + 'parameters': ['25'] + } + } + }, members[25]) def testEmptyDictionary(self): unused_empty_dict = {} @@ -669,13 +841,20 @@ def testEmptyDictionary(self): self._collector.Collect(inspect.currentframe()) self.assertEqual( - {'name': 'unused_empty_dict', - 'type': 'dict', - 'members': [{ - 'status': { - 'refersTo': 'VARIABLE_NAME', - 'description': {'format': 'Empty dictionary'}}}]}, - self._LocalByName('unused_empty_dict')) + { + 'name': + 'unused_empty_dict', + 'type': + 'dict', + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': { + 'format': 'Empty dictionary' + } + } + }] + }, self._LocalByName('unused_empty_dict')) def testEmptyCollection(self): for unused_c, object_type in [([], 'list'), ((), 'tuple'), (set(), 'set')]: @@ -683,13 +862,20 @@ def testEmptyCollection(self): self._collector.Collect(inspect.currentframe()) self.assertEqual( - {'name': 'unused_c', - 'type': object_type, - 'members': [{ - 'status': { - 'refersTo': 'VARIABLE_NAME', - 'description': {'format': 'Empty collection'}}}]}, - self._Pack(self._LocalByName('unused_c'))) + { + 'name': + 'unused_c', + 'type': + object_type, + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': { + 'format': 'Empty collection' + } + } + }] + }, self._Pack(self._LocalByName('unused_c'))) def testEmptyClass(self): @@ -702,13 +888,20 @@ class EmptyObject(object): self._collector.Collect(inspect.currentframe()) self.assertEqual( - {'name': 'unused_empty_object', - 'type': __name__ + '.EmptyObject', - 'members': [{ - 'status': { - 'refersTo': 'VARIABLE_NAME', - 'description': {'format': 'Object has no fields'}}}]}, - self._Pack(self._LocalByName('unused_empty_object'))) + { + 'name': + 'unused_empty_object', + 'type': + __name__ + '.EmptyObject', + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': { + 'format': 'Object has no fields' + } + } + }] + }, self._Pack(self._LocalByName('unused_empty_object'))) def testWatchedExpressionsSuccess(self): unused_dummy_a = 'x' @@ -716,16 +909,32 @@ def testWatchedExpressionsSuccess(self): self._collector = CaptureCollectorWithDefaultLocation({ 'id': 'BP_ID', - 'expressions': ['1+2', 'unused_dummy_a*8', 'unused_dummy_b']}) + 'expressions': ['1+2', 'unused_dummy_a*8', 'unused_dummy_b'] + }) self._collector.Collect(inspect.currentframe()) - self.assertListEqual( - [{'name': '1+2', 'value': '3', 'type': 'int'}, - {'name': 'unused_dummy_a*8', 'value': "'xxxxxxxx'", 'type': 'str'}, - {'name': 'unused_dummy_b', - 'type': 'dict', - 'members': [{'name': '1', 'value': '2', 'type': 'int'}, - {'name': '3', 'value': "'a'", 'type': 'str'}]}], - self._collector.breakpoint['evaluatedExpressions']) + self.assertListEqual([{ + 'name': '1+2', + 'value': '3', + 'type': 'int' + }, { + 'name': 'unused_dummy_a*8', + 'value': "'xxxxxxxx'", + 'type': 'str' + }, { + 'name': + 'unused_dummy_b', + 'type': + 'dict', + 'members': [{ + 'name': '1', + 'value': '2', + 'type': 'int' + }, { + 'name': '3', + 'value': "'a'", + 'type': 'str' + }] + }], self._collector.breakpoint['evaluatedExpressions']) def testOversizedStringExpression(self): # This test checks that string expressions are collected first, up to the @@ -737,33 +946,42 @@ def testOversizedStringExpression(self): # ensure that we're not using the normal limit of 256 bytes. self._collector = CaptureCollectorWithDefaultLocation({ 'id': 'BP_ID', - 'expressions': ['unused_dummy_a']}) + 'expressions': ['unused_dummy_a'] + }) self._collector.max_size = 500 unused_dummy_a = '|'.join(['%04d' % i for i in range(5, 510, 5)]) self._collector.Collect(inspect.currentframe()) - self.assertListEqual( - [{'name': 'unused_dummy_a', - 'type': 'str', - 'value': "'{0}...".format(unused_dummy_a[0:-18])}], - self._collector.breakpoint['evaluatedExpressions']) + self.assertListEqual([{ + 'name': 'unused_dummy_a', + 'type': 'str', + 'value': "'{0}...".format(unused_dummy_a[0:-18]) + }], self._collector.breakpoint['evaluatedExpressions']) def testOversizedListExpression(self): self._collector = CaptureCollectorWithDefaultLocation({ 'id': 'BP_ID', - 'expressions': ['unused_dummy_a']}) + 'expressions': ['unused_dummy_a'] + }) unused_dummy_a = list(range(0, 100)) self._collector.Collect(inspect.currentframe()) # Verify that the list did not get truncated. - self.assertListEqual( - [{'name': 'unused_dummy_a', 'type': 'list', 'members': [ - {'type': 'int', 'value': str(a), 'name': '[{0}]'.format(a)} - for a in unused_dummy_a]}], - self._collector.breakpoint['evaluatedExpressions']) + self.assertListEqual([{ + 'name': + 'unused_dummy_a', + 'type': + 'list', + 'members': [{ + 'type': 'int', + 'value': str(a), + 'name': '[{0}]'.format(a) + } for a in unused_dummy_a] + }], self._collector.breakpoint['evaluatedExpressions']) def testExpressionNullBytes(self): self._collector = CaptureCollectorWithDefaultLocation({ 'id': 'BP_ID', - 'expressions': ['\0']}) + 'expressions': ['\0'] + }) self._collector.Collect(inspect.currentframe()) evaluated_expressions = self._collector.breakpoint['evaluatedExpressions'] @@ -773,36 +991,39 @@ def testExpressionNullBytes(self): def testSyntaxErrorExpression(self): self._collector = CaptureCollectorWithDefaultLocation({ 'id': 'BP_ID', - 'expressions': ['2+']}) + 'expressions': ['2+'] + }) self._collector.Collect(inspect.currentframe()) evaluated_expressions = self._collector.breakpoint['evaluatedExpressions'] self.assertLen(evaluated_expressions, 1) self.assertTrue(evaluated_expressions[0]['status']['isError']) - self.assertEqual( - 'VARIABLE_NAME', - evaluated_expressions[0]['status']['refersTo']) + self.assertEqual('VARIABLE_NAME', + evaluated_expressions[0]['status']['refersTo']) def testExpressionException(self): unused_dummy_a = 1 unused_dummy_b = 0 self._collector = CaptureCollectorWithDefaultLocation({ 'id': 'BP_ID', - 'expressions': ['unused_dummy_a/unused_dummy_b']}) + 'expressions': ['unused_dummy_a/unused_dummy_b'] + }) self._collector.Collect(inspect.currentframe()) zero_division_msg = ('division by zero' if six.PY3 else 'integer division or modulo by zero') - self.assertListEqual( - [{'name': 'unused_dummy_a/unused_dummy_b', - 'status': { - 'isError': True, - 'refersTo': 'VARIABLE_VALUE', - 'description': { - 'format': 'Exception occurred: $0', - 'parameters': [zero_division_msg]}}}], - self._collector.breakpoint['evaluatedExpressions']) + self.assertListEqual([{ + 'name': 'unused_dummy_a/unused_dummy_b', + 'status': { + 'isError': True, + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': 'Exception occurred: $0', + 'parameters': [zero_division_msg] + } + } + }], self._collector.breakpoint['evaluatedExpressions']) def testMutableExpression(self): @@ -813,20 +1034,24 @@ def ChangeA(): ChangeA() self._collector = CaptureCollectorWithDefaultLocation({ 'id': 'BP_ID', - 'expressions': ['ChangeA()']}) + 'expressions': ['ChangeA()'] + }) self._collector.Collect(inspect.currentframe()) self.assertEqual(1, self._a) - self.assertListEqual( - [{'name': 'ChangeA()', - 'status': { - 'isError': True, - 'refersTo': 'VARIABLE_VALUE', - 'description': { - 'format': 'Exception occurred: $0', - 'parameters': [('Only immutable methods can be ' - 'called from expressions')]}}}], - self._collector.breakpoint['evaluatedExpressions']) + self.assertListEqual([{ + 'name': 'ChangeA()', + 'status': { + 'isError': True, + 'refersTo': 'VARIABLE_VALUE', + 'description': { + 'format': + 'Exception occurred: $0', + 'parameters': [('Only immutable methods can be ' + 'called from expressions')] + } + } + }], self._collector.breakpoint['evaluatedExpressions']) def testPrettyPrinters(self): @@ -844,7 +1069,8 @@ def PrettyPrinter2(obj): return ((('name2_%d' % i, '2_%d' % i) for i in range(3)), 'pp-type2') capture_collector.CaptureCollector.pretty_printers += [ - PrettyPrinter1, PrettyPrinter2] + PrettyPrinter1, PrettyPrinter2 + ] unused_obj1 = MyClass() unused_obj2 = MyClass() @@ -853,29 +1079,56 @@ def PrettyPrinter2(obj): self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) - obj_vars = [self._Pack(self._LocalByName('unused_obj%d' % i)) - for i in range(1, 4)] - - self.assertListEqual( - [ - {'name': 'unused_obj1', - 'type': 'pp-type1', - 'members': [ - {'name': 'name1_0', 'value': "'1_0'", 'type': 'str'}, - {'name': 'name1_1', 'value': "'1_1'", 'type': 'str'}]}, - {'name': 'unused_obj2', - 'type': 'pp-type2', - 'members': [ - {'name': 'name2_0', 'value': "'2_0'", 'type': 'str'}, - {'name': 'name2_1', 'value': "'2_1'", 'type': 'str'}, - {'name': 'name2_2', 'value': "'2_2'", 'type': 'str'}]}, - {'name': 'unused_obj3', - 'type': __name__ + '.MyClass', - 'members': [ - {'status': { - 'refersTo': 'VARIABLE_NAME', - 'description': {'format': 'Object has no fields'}}}]}], - obj_vars) + obj_vars = [ + self._Pack(self._LocalByName('unused_obj%d' % i)) for i in range(1, 4) + ] + + self.assertListEqual([{ + 'name': + 'unused_obj1', + 'type': + 'pp-type1', + 'members': [{ + 'name': 'name1_0', + 'value': "'1_0'", + 'type': 'str' + }, { + 'name': 'name1_1', + 'value': "'1_1'", + 'type': 'str' + }] + }, { + 'name': + 'unused_obj2', + 'type': + 'pp-type2', + 'members': [{ + 'name': 'name2_0', + 'value': "'2_0'", + 'type': 'str' + }, { + 'name': 'name2_1', + 'value': "'2_1'", + 'type': 'str' + }, { + 'name': 'name2_2', + 'value': "'2_2'", + 'type': 'str' + }] + }, { + 'name': + 'unused_obj3', + 'type': + __name__ + '.MyClass', + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': { + 'format': 'Object has no fields' + } + } + }] + }], obj_vars) def testDateTime(self): unused_datetime = datetime.datetime(2014, 6, 11, 2, 30) @@ -887,28 +1140,32 @@ def testDateTime(self): self._collector.Collect(inspect.currentframe()) self.assertDictEqual( - {'name': 'unused_datetime', - 'type': 'datetime.datetime', - 'value': '2014-06-11 02:30:00'}, - self._Pack(self._LocalByName('unused_datetime'))) + { + 'name': 'unused_datetime', + 'type': 'datetime.datetime', + 'value': '2014-06-11 02:30:00' + }, self._Pack(self._LocalByName('unused_datetime'))) self.assertDictEqual( - {'name': 'unused_date', - 'type': 'datetime.datetime', - 'value': '1980-03-01 00:00:00'}, - self._Pack(self._LocalByName('unused_date'))) + { + 'name': 'unused_date', + 'type': 'datetime.datetime', + 'value': '1980-03-01 00:00:00' + }, self._Pack(self._LocalByName('unused_date'))) self.assertDictEqual( - {'name': 'unused_time', - 'type': 'datetime.time', - 'value': '18:43:11'}, - self._Pack(self._LocalByName('unused_time'))) + { + 'name': 'unused_time', + 'type': 'datetime.time', + 'value': '18:43:11' + }, self._Pack(self._LocalByName('unused_time'))) self.assertDictEqual( - {'name': 'unused_timedelta', - 'type': 'datetime.timedelta', - 'value': '3 days, 0:00:00.008237'}, - self._Pack(self._LocalByName('unused_timedelta'))) + { + 'name': 'unused_timedelta', + 'type': 'datetime.timedelta', + 'value': '3 days, 0:00:00.008237' + }, self._Pack(self._LocalByName('unused_timedelta'))) def testException(self): unused_exception = ValueError('arg1', 2, [3]) @@ -919,12 +1176,23 @@ def testException(self): self.assertEqual('unused_exception', obj['name']) self.assertEqual('ValueError', obj['type']) - self.assertListEqual([ - {'value': "'arg1'", 'type': 'str', 'name': '[0]'}, - {'value': '2', 'type': 'int', 'name': '[1]'}, - {'members': [{'value': '3', 'type': 'int', 'name': '[0]'}], - 'type': 'list', - 'name': '[2]'}], obj['members']) + self.assertListEqual([{ + 'value': "'arg1'", + 'type': 'str', + 'name': '[0]' + }, { + 'value': '2', + 'type': 'int', + 'name': '[1]' + }, { + 'members': [{ + 'value': '3', + 'type': 'int', + 'name': '[0]' + }], + 'type': 'list', + 'name': '[2]' + }], obj['members']) def testRequestLogIdCapturing(self): capture_collector.request_log_id_collector = lambda: 'test_log_id' @@ -952,12 +1220,10 @@ def testUserIdSuccess(self): self._collector.Collect(inspect.currentframe()) self.assertIn('evaluatedUserId', self._collector.breakpoint) - self.assertEqual( - { - 'kind': 'mdb_user', - 'id': 'noogler' - }, - self._collector.breakpoint['evaluatedUserId']) + self.assertEqual({ + 'kind': 'mdb_user', + 'id': 'noogler' + }, self._collector.breakpoint['evaluatedUserId']) def testUserIdIsNone(self): capture_collector.user_id_collector = lambda: (None, None) @@ -997,8 +1263,9 @@ def _Pack(self, variable): del packed_variable['varTableIndex'] if 'members' in packed_variable: - packed_variable['members'] = [self._Pack(member) for member - in packed_variable['members']] + packed_variable['members'] = [ + self._Pack(member) for member in packed_variable['members'] + ] return packed_variable @@ -1018,7 +1285,10 @@ def __init__(self): def emit(self, record): self._received_records.append(record) - def GotMessage(self, msg, level=logging.INFO, line_number=10, + def GotMessage(self, + msg, + level=logging.INFO, + line_number=10, func_name=None): """Checks that the given message was logged correctly. @@ -1037,8 +1307,8 @@ def GotMessage(self, msg, level=logging.INFO, line_number=10, record = self._received_records.pop(0) frame = inspect.currentframe().f_back if level != record.levelno: - logging.error('Expected log level %d, got %d (%s)', - level, record.levelno, record.levelname) + logging.error('Expected log level %d, got %d (%s)', level, + record.levelno, record.levelname) return False if msg != record.msg: logging.error('Expected msg "%s", received "%s"', msg, record.msg) @@ -1053,12 +1323,12 @@ def GotMessage(self, msg, level=logging.INFO, line_number=10, os.path.basename(pathname), record.filename) return False if func_name and func_name != record.funcName: - logging.error('Expected function "%s", received "%s"', - func_name, record.funcName) + logging.error('Expected function "%s", received "%s"', func_name, + record.funcName) return False if line_number and record.lineno != line_number: - logging.error('Expected lineno %d, received %d', - line_number, record.lineno) + logging.error('Expected lineno %d, received %d', line_number, + record.lineno) return False for attr in ['cdbg_pathname', 'cdbg_lineno']: if hasattr(record, attr): @@ -1114,20 +1384,23 @@ def testLogQuota(self): # recover so the ordering of tests ideally doesn't affect this test. self.ResetGlobalLogQuota() bucket_max_capacity = 250 - collector = LogCollectorWithDefaultLocation( - {'logMessageFormat': '$0', 'expressions': ['i']}) + collector = LogCollectorWithDefaultLocation({ + 'logMessageFormat': '$0', + 'expressions': ['i'] + }) for i in range(0, bucket_max_capacity * 2): self.assertIsNone(collector.Log(inspect.currentframe())) if not self._verifier.CheckMessageSafe('LOGPOINT: %s' % i): - self.assertGreaterEqual( - i, bucket_max_capacity, - 'Log quota exhausted earlier than expected') - self.assertTrue(self._verifier.CheckMessageSafe(LOGPOINT_PAUSE_MSG), - 'Quota hit message not logged') + self.assertGreaterEqual(i, bucket_max_capacity, + 'Log quota exhausted earlier than expected') + self.assertTrue( + self._verifier.CheckMessageSafe(LOGPOINT_PAUSE_MSG), + 'Quota hit message not logged') time.sleep(0.6) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.CheckMessageSafe('LOGPOINT: %s' % i), - 'Logging not resumed after quota recovery time') + self.assertTrue( + self._verifier.CheckMessageSafe('LOGPOINT: %s' % i), + 'Logging not resumed after quota recovery time') return self.fail('Logging was never paused when quota was exceeded') @@ -1140,15 +1413,15 @@ def testLogBytesQuota(self): # implemented, it can allow effectively twice that amount to go out in a # very short time frame. So the third 30k message should pause. msg = ' ' * 30000 - collector = LogCollectorWithDefaultLocation( - {'logMessageFormat': msg}) + collector = LogCollectorWithDefaultLocation({'logMessageFormat': msg}) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: ' + msg)) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: ' + msg)) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.CheckMessageSafe(LOGPOINT_PAUSE_MSG), - 'Quota hit message not logged') + self.assertTrue( + self._verifier.CheckMessageSafe(LOGPOINT_PAUSE_MSG), + 'Quota hit message not logged') time.sleep(0.6) collector._definition['logMessageFormat'] = 'hello' self.assertIsNone(collector.Log(inspect.currentframe())) @@ -1158,8 +1431,7 @@ def testLogBytesQuota(self): def testMissingLogLevel(self): # Missing is equivalent to INFO. - collector = LogCollectorWithDefaultLocation( - {'logMessageFormat': 'hello'}) + collector = LogCollectorWithDefaultLocation({'logMessageFormat': 'hello'}) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: hello')) @@ -1167,13 +1439,18 @@ def testUndefinedLogLevel(self): capture_collector.log_info_message = None collector = LogCollectorWithDefaultLocation({'logLevel': 'INFO'}) self.assertDictEqual( - {'isError': True, - 'description': {'format': 'Log action on a breakpoint not supported'}}, - collector.Log(inspect.currentframe())) + { + 'isError': True, + 'description': { + 'format': 'Log action on a breakpoint not supported' + } + }, collector.Log(inspect.currentframe())) def testLogInfo(self): - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', 'logMessageFormat': 'hello'}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': 'hello' + }) collector._definition['location']['line'] = 20 self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue( @@ -1183,8 +1460,10 @@ def testLogInfo(self): line_number=20)) def testLogWarning(self): - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'WARNING', 'logMessageFormat': 'hello'}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'WARNING', + 'logMessageFormat': 'hello' + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage( @@ -1193,8 +1472,10 @@ def testLogWarning(self): func_name='LogCollectorTest.testLogWarning')) def testLogError(self): - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'ERROR', 'logMessageFormat': 'hello'}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'ERROR', + 'logMessageFormat': 'hello' + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage( @@ -1203,15 +1484,17 @@ def testLogError(self): func_name='LogCollectorTest.testLogError')) def testBadExpression(self): - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': 'a=$0, b=$1', - 'expressions': ['-', '+']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': 'a=$0, b=$1', + 'expressions': ['-', '+'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - 'LOGPOINT: a=, b=')) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: a=, b=')) def testDollarEscape(self): unused_integer = 12345 @@ -1226,42 +1509,48 @@ def testDollarEscape(self): self.assertTrue(self._verifier.GotMessage(msg)) def testInvalidExpressionIndex(self): - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': 'a=$0'}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': 'a=$0' + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: a=')) def testException(self): - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['[][1]']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['[][1]'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - 'LOGPOINT: ')) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: ')) def testMutableExpression(self): def MutableMethod(): # pylint: disable=unused-variable self.abc = None - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['MutableMethod()']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['MutableMethod()'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - 'LOGPOINT: ')) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: ')) def testNone(self): unused_none = None - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_none']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_none'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: None')) @@ -1270,20 +1559,22 @@ def testPrimitives(self): unused_integer = 12345 unused_string = 'hello' - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0,$1,$2', - 'expressions': ['unused_boolean', 'unused_integer', 'unused_string']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0,$1,$2', + 'expressions': ['unused_boolean', 'unused_integer', 'unused_string'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage("LOGPOINT: True,12345,'hello'")) def testLongString(self): unused_string = '1234567890' - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_string']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_string'] + }) collector.max_value_len = 9 self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage("LOGPOINT: '123456789...")) @@ -1291,14 +1582,15 @@ def testLongString(self): def testLongBytes(self): unused_bytes = bytearray([i for i in range(20)]) - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_bytes']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_bytes'] + }) collector.max_value_len = 20 self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - r"LOGPOINT: bytearray(b'\x00\x01\...")) + self.assertTrue( + self._verifier.GotMessage(r"LOGPOINT: bytearray(b'\x00\x01\...")) def testDate(self): unused_datetime = datetime.datetime(2014, 6, 11, 2, 30) @@ -1306,53 +1598,62 @@ def testDate(self): unused_time = datetime.time(18, 43, 11) unused_timedelta = datetime.timedelta(days=3, microseconds=8237) - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0;$1;$2;$3', - 'expressions': ['unused_datetime', 'unused_date', - 'unused_time', 'unused_timedelta']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': + 'INFO', + 'logMessageFormat': + '$0;$1;$2;$3', + 'expressions': [ + 'unused_datetime', 'unused_date', 'unused_time', 'unused_timedelta' + ] + }) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - 'LOGPOINT: 2014-06-11 02:30:00;1980-03-01 00:00:00;' - '18:43:11;3 days, 0:00:00.008237')) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: 2014-06-11 02:30:00;1980-03-01 00:00:00;' + '18:43:11;3 days, 0:00:00.008237')) def testSet(self): unused_set = set(['a']) - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_set']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_set'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage("LOGPOINT: {'a'}")) def testTuple(self): unused_tuple = (1, 2, 3, 4, 5) - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_tuple']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_tuple'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: (1, 2, 3, 4, 5)')) def testList(self): unused_list = ['a', 'b', 'c'] - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_list']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage("LOGPOINT: ['a', 'b', 'c']")) def testOversizedList(self): unused_list = [1, 2, 3, 4] - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_list']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list'] + }) collector.max_list_items = 3 self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: [1, 2, 3, ...]')) @@ -1360,10 +1661,11 @@ def testOversizedList(self): def testSlice(self): unused_slice = slice(1, 10) - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_slice']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_slice'] + }) collector.max_list_items = 3 self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: slice(1, 10, None)')) @@ -1371,10 +1673,11 @@ def testSlice(self): def testMap(self): unused_map = {'a': 1} - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_map']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_map'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage("LOGPOINT: {'a': 1}")) @@ -1387,61 +1690,70 @@ def __init__(self): unused_my = MyClass() - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_my']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_my'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage("LOGPOINT: {'some': 'thing'}")) def testNestedBelowLimit(self): unused_list = [1, [2], [1, 2, 3], [1, [1, 2, 3]], 5] - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_list']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - 'LOGPOINT: [1, [2], [1, 2, 3], [1, [1, 2, 3]], 5]')) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: [1, [2], [1, 2, 3], [1, [1, 2, 3]], 5]')) def testNestedAtLimits(self): unused_list = [ - 1, [1, 2, 3, 4, 5], [[1, 2, 3, 4, 5], 2, 3, 4, 5], 4, 5, 6, 7, 8, 9] + 1, [1, 2, 3, 4, 5], [[1, 2, 3, 4, 5], 2, 3, 4, 5], 4, 5, 6, 7, 8, 9 + ] - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_list']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - 'LOGPOINT: [1, [1, 2, 3, 4, 5], [[1, 2, 3, 4, 5], 2, 3, 4, 5], ' - '4, 5, 6, 7, 8, 9]')) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: [1, [1, 2, 3, 4, 5], [[1, 2, 3, 4, 5], 2, 3, 4, 5], ' + '4, 5, 6, 7, 8, 9]')) def testNestedRecursionLimit(self): unused_list = [1, [[2, [3]], 4], 5] - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_list']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list'] + }) self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - 'LOGPOINT: [1, [[2, %s], 4], 5]' % type([]))) + self.assertTrue( + self._verifier.GotMessage('LOGPOINT: [1, [[2, %s], 4], 5]' % type([]))) def testNestedRecursionItemLimits(self): unused_list = [1, [1, [1, [2], 3, 4], 3, 4], 3, 4] list_type = "" if six.PY3 else "" - collector = LogCollectorWithDefaultLocation( - {'logLevel': 'INFO', - 'logMessageFormat': '$0', - 'expressions': ['unused_list']}) + collector = LogCollectorWithDefaultLocation({ + 'logLevel': 'INFO', + 'logMessageFormat': '$0', + 'expressions': ['unused_list'] + }) collector.max_list_items = 3 collector.max_sublist_items = 3 self.assertIsNone(collector.Log(inspect.currentframe())) - self.assertTrue(self._verifier.GotMessage( - 'LOGPOINT: [1, [1, [1, %s, 3, ...], 3, ...], 3, ...]' % list_type)) + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: [1, [1, [1, %s, 3, ...], 3, ...], 3, ...]' % list_type)) def testDetermineType(self): builtin_prefix = 'builtins.' if six.PY3 else '__builtin__.' @@ -1450,8 +1762,7 @@ def testDetermineType(self): (builtin_prefix + 'int', 5), (builtin_prefix + 'str', 'hello'), (builtin_prefix + 'function', capture_collector.DetermineType), - (path_prefix + 'LineNoFilter', - capture_collector.LineNoFilter()), + (path_prefix + 'LineNoFilter', capture_collector.LineNoFilter()), ) for type_string, value in test_data: diff --git a/tests/gcp_hub_client_test.py b/tests/gcp_hub_client_test.py index 7fc7720..ada26bf 100644 --- a/tests/gcp_hub_client_test.py +++ b/tests/gcp_hub_client_test.py @@ -20,7 +20,6 @@ from googleclouddebugger import gcp_hub_client - TEST_DEBUGGEE_ID = 'gcp:debuggee-id' TEST_AGENT_ID = 'abc-123-d4' TEST_PROJECT_ID = 'test-project-id' @@ -51,9 +50,10 @@ def setUp(self): self._client = gcp_hub_client.GcpHubClient() - for backoff in [self._client.register_backoff, - self._client.list_backoff, - self._client.update_backoff]: + for backoff in [ + self._client.register_backoff, self._client.list_backoff, + self._client.update_backoff + ]: backoff.min_interval_sec /= 100000.0 backoff.max_interval_sec /= 100000.0 backoff._current_interval_sec /= 100000.0 @@ -252,24 +252,25 @@ def testTransmitBreakpointUpdateSocketError(self): def _TestInitializeLabels(self, module_var, version_var, minor_var): self._Start() - self._client.InitializeDebuggeeLabels( - {'module': 'my_module', - 'version': '1', - 'minorversion': '23', - 'something_else': 'irrelevant'}) - self.assertEqual( - {'projectid': 'test-project-id', - 'module': 'my_module', - 'version': '1', - 'minorversion': '23', - 'platform': 'default'}, - self._client._debuggee_labels) + self._client.InitializeDebuggeeLabels({ + 'module': 'my_module', + 'version': '1', + 'minorversion': '23', + 'something_else': 'irrelevant' + }) self.assertEqual( - 'test-project-id-my_module-1', - self._client._GetDebuggeeDescription()) - - uniquifier1 = self._client._ComputeUniquifier({ - 'labels': self._client._debuggee_labels}) + { + 'projectid': 'test-project-id', + 'module': 'my_module', + 'version': '1', + 'minorversion': '23', + 'platform': 'default' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-my_module-1', + self._client._GetDebuggeeDescription()) + + uniquifier1 = self._client._ComputeUniquifier( + {'labels': self._client._debuggee_labels}) self.assertTrue(uniquifier1) # Not empty string. try: @@ -278,29 +279,29 @@ def _TestInitializeLabels(self, module_var, version_var, minor_var): os.environ[minor_var] = '3476734' self._client.InitializeDebuggeeLabels(None) self.assertEqual( - {'projectid': 'test-project-id', - 'module': 'env_module', - 'version': '213', - 'minorversion': '3476734', - 'platform': 'default'}, - self._client._debuggee_labels) - self.assertEqual( - 'test-project-id-env_module-213', - self._client._GetDebuggeeDescription()) + { + 'projectid': 'test-project-id', + 'module': 'env_module', + 'version': '213', + 'minorversion': '3476734', + 'platform': 'default' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-env_module-213', + self._client._GetDebuggeeDescription()) os.environ[module_var] = 'default' os.environ[version_var] = '213' os.environ[minor_var] = '3476734' self._client.InitializeDebuggeeLabels({'minorversion': 'something else'}) self.assertEqual( - {'projectid': 'test-project-id', - 'version': '213', - 'minorversion': 'something else', - 'platform': 'default'}, - self._client._debuggee_labels) - self.assertEqual( - 'test-project-id-213', - self._client._GetDebuggeeDescription()) + { + 'projectid': 'test-project-id', + 'version': '213', + 'minorversion': 'something else', + 'platform': 'default' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-213', + self._client._GetDebuggeeDescription()) finally: del os.environ[module_var] @@ -308,12 +309,12 @@ def _TestInitializeLabels(self, module_var, version_var, minor_var): del os.environ[minor_var] def testInitializeLegacyDebuggeeLabels(self): - self._TestInitializeLabels( - 'GAE_MODULE_NAME', 'GAE_MODULE_VERSION', 'GAE_MINOR_VERSION') + self._TestInitializeLabels('GAE_MODULE_NAME', 'GAE_MODULE_VERSION', + 'GAE_MINOR_VERSION') def testInitializeDebuggeeLabels(self): - self._TestInitializeLabels( - 'GAE_SERVICE', 'GAE_VERSION', 'GAE_DEPLOYMENT_ID') + self._TestInitializeLabels('GAE_SERVICE', 'GAE_VERSION', + 'GAE_DEPLOYMENT_ID') def testInitializeCloudRunDebuggeeLabels(self): self._Start() @@ -322,12 +323,13 @@ def testInitializeCloudRunDebuggeeLabels(self): os.environ['K_SERVICE'] = 'env_module' os.environ['K_REVISION'] = '213' self._client.InitializeDebuggeeLabels(None) - self.assertEqual({ - 'projectid': 'test-project-id', - 'module': 'env_module', - 'version': '213', - 'platform': 'default' - }, self._client._debuggee_labels) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'env_module', + 'version': '213', + 'platform': 'default' + }, self._client._debuggee_labels) self.assertEqual('test-project-id-env_module-213', self._client._GetDebuggeeDescription()) @@ -342,12 +344,13 @@ def testInitializeCloudFunctionDebuggeeLabels(self): os.environ['FUNCTION_NAME'] = 'fcn-name' os.environ['X_GOOGLE_FUNCTION_VERSION'] = '213' self._client.InitializeDebuggeeLabels(None) - self.assertEqual({ - 'projectid': 'test-project-id', - 'module': 'fcn-name', - 'version': '213', - 'platform': 'cloud_function' - }, self._client._debuggee_labels) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': '213', + 'platform': 'cloud_function' + }, self._client._debuggee_labels) self.assertEqual('test-project-id-fcn-name-213', self._client._GetDebuggeeDescription()) @@ -361,12 +364,13 @@ def testInitializeCloudFunctionUnversionedDebuggeeLabels(self): try: os.environ['FUNCTION_NAME'] = 'fcn-name' self._client.InitializeDebuggeeLabels(None) - self.assertEqual({ - 'projectid': 'test-project-id', - 'module': 'fcn-name', - 'version': 'unversioned', - 'platform': 'cloud_function' - }, self._client._debuggee_labels) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': 'unversioned', + 'platform': 'cloud_function' + }, self._client._debuggee_labels) self.assertEqual('test-project-id-fcn-name-unversioned', self._client._GetDebuggeeDescription()) @@ -380,13 +384,14 @@ def testInitializeCloudFunctionWithRegionDebuggeeLabels(self): os.environ['FUNCTION_NAME'] = 'fcn-name' os.environ['FUNCTION_REGION'] = 'fcn-region' self._client.InitializeDebuggeeLabels(None) - self.assertEqual({ - 'projectid': 'test-project-id', - 'module': 'fcn-name', - 'version': 'unversioned', - 'platform': 'cloud_function', - 'region': 'fcn-region' - }, self._client._debuggee_labels) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': 'unversioned', + 'platform': 'cloud_function', + 'region': 'fcn-region' + }, self._client._debuggee_labels) self.assertEqual('test-project-id-fcn-name-unversioned', self._client._GetDebuggeeDescription()) @@ -461,8 +466,9 @@ def testSourceContext(self): self.assertNotIn('sourceContexts', debuggee_no_source_context1) self.assertNotIn('sourceContexts', debuggee_bad_source_context) - self.assertListEqual([{'what': 'source context'}], - debuggee_with_source_context['sourceContexts']) + self.assertListEqual([{ + 'what': 'source context' + }], debuggee_with_source_context['sourceContexts']) uniquifiers = set() uniquifiers.add(debuggee_no_source_context1['uniquifier']) diff --git a/tests/glob_data_visibility_policy_test.py b/tests/glob_data_visibility_policy_test.py index acf9625..8670198 100644 --- a/tests/glob_data_visibility_policy_test.py +++ b/tests/glob_data_visibility_policy_test.py @@ -19,10 +19,7 @@ def testIsDataVisible(self): '*.private2', '', ) - whitelist_patterns = ( - 'wl1.*', - 'wl2.*' - ) + whitelist_patterns = ('wl1.*', 'wl2.*') policy = glob_data_visibility_policy.GlobDataVisibilityPolicy( blacklist_patterns, whitelist_patterns) diff --git a/tests/imphook2_test.py b/tests/imphook2_test.py index 38f5cce..8045e56 100644 --- a/tests/imphook2_test.py +++ b/tests/imphook2_test.py @@ -52,10 +52,8 @@ def testDoubleImport(self): self._Hook(self._CreateFile('testpkg4/__init__.py')) import testpkg4 # pylint: disable=g-import-not-at-top,unused-variable import testpkg4 # pylint: disable=g-import-not-at-top,unused-variable - self.assertEqual( - ['testpkg4/__init__.py', - 'testpkg4/__init__.py'], - sorted(self._import_callbacks_log)) + self.assertEqual(['testpkg4/__init__.py', 'testpkg4/__init__.py'], + sorted(self._import_callbacks_log)) def testRemoveCallback(self): cleanup = self._Hook(self._CreateFile('testpkg4b/__init__.py')) @@ -77,19 +75,15 @@ def testTransitiveImport(self): self._Hook(self._CreateFile('testpkg6/third.py')) import testpkg6.first # pylint: disable=g-import-not-at-top,unused-variable self.assertEqual( - ['testpkg6/first.py', - 'testpkg6/second.py', - 'testpkg6/third.py'], + ['testpkg6/first.py', 'testpkg6/second.py', 'testpkg6/third.py'], sorted(self._import_callbacks_log)) def testPackageDotModuleImport(self): self._Hook(self._CreateFile('testpkg8/__init__.py')) self._Hook(self._CreateFile('testpkg8/my.py')) import testpkg8.my # pylint: disable=g-import-not-at-top,unused-variable - self.assertEqual( - ['testpkg8/__init__.py', - 'testpkg8/my.py'], - sorted(self._import_callbacks_log)) + self.assertEqual(['testpkg8/__init__.py', 'testpkg8/my.py'], + sorted(self._import_callbacks_log)) def testNestedPackageDotModuleImport(self): self._Hook(self._CreateFile('testpkg9a/__init__.py')) @@ -97,8 +91,7 @@ def testNestedPackageDotModuleImport(self): self._CreateFile('testpkg9a/testpkg9b/my.py') import testpkg9a.testpkg9b.my # pylint: disable=g-import-not-at-top,unused-variable self.assertEqual( - ['testpkg9a/__init__.py', - 'testpkg9a/testpkg9b/__init__.py'], + ['testpkg9a/__init__.py', 'testpkg9a/testpkg9b/__init__.py'], sorted(self._import_callbacks_log)) def testFromImport(self): @@ -109,15 +102,12 @@ def testFromImport(self): def testTransitiveFromImport(self): self._CreateFile('testpkg7/__init__.py') - self._Hook(self._CreateFile( - 'testpkg7/first.py', - 'from testpkg7 import second')) + self._Hook( + self._CreateFile('testpkg7/first.py', 'from testpkg7 import second')) self._Hook(self._CreateFile('testpkg7/second.py')) from testpkg7 import first # pylint: disable=g-import-not-at-top,unused-variable - self.assertEqual( - ['testpkg7/first.py', - 'testpkg7/second.py'], - sorted(self._import_callbacks_log)) + self.assertEqual(['testpkg7/first.py', 'testpkg7/second.py'], + sorted(self._import_callbacks_log)) def testFromNestedPackageImportModule(self): self._Hook(self._CreateFile('testpkg11a/__init__.py')) @@ -125,12 +115,10 @@ def testFromNestedPackageImportModule(self): self._Hook(self._CreateFile('testpkg11a/testpkg11b/my.py')) self._Hook(self._CreateFile('testpkg11a/testpkg11b/your.py')) from testpkg11a.testpkg11b import my, your # pylint: disable=g-import-not-at-top,unused-variable,g-multiple-import - self.assertEqual( - ['testpkg11a/__init__.py', - 'testpkg11a/testpkg11b/__init__.py', - 'testpkg11a/testpkg11b/my.py', - 'testpkg11a/testpkg11b/your.py'], - sorted(self._import_callbacks_log)) + self.assertEqual([ + 'testpkg11a/__init__.py', 'testpkg11a/testpkg11b/__init__.py', + 'testpkg11a/testpkg11b/my.py', 'testpkg11a/testpkg11b/your.py' + ], sorted(self._import_callbacks_log)) def testDoubleNestedImport(self): self._Hook(self._CreateFile('testpkg12a/__init__.py')) @@ -138,14 +126,12 @@ def testDoubleNestedImport(self): self._Hook(self._CreateFile('testpkg12a/testpkg12b/my.py')) from testpkg12a.testpkg12b import my # pylint: disable=g-import-not-at-top,unused-variable,g-multiple-import from testpkg12a.testpkg12b import my # pylint: disable=g-import-not-at-top,unused-variable,g-multiple-import - self.assertEqual( - ['testpkg12a/__init__.py', - 'testpkg12a/__init__.py', - 'testpkg12a/testpkg12b/__init__.py', - 'testpkg12a/testpkg12b/__init__.py', - 'testpkg12a/testpkg12b/my.py', - 'testpkg12a/testpkg12b/my.py'], - sorted(self._import_callbacks_log)) + self.assertEqual([ + 'testpkg12a/__init__.py', 'testpkg12a/__init__.py', + 'testpkg12a/testpkg12b/__init__.py', + 'testpkg12a/testpkg12b/__init__.py', 'testpkg12a/testpkg12b/my.py', + 'testpkg12a/testpkg12b/my.py' + ], sorted(self._import_callbacks_log)) def testFromPackageImportStar(self): self._Hook(self._CreateFile('testpkg13a/__init__.py')) @@ -161,10 +147,8 @@ def testFromPackageImportStarWith__all__(self): self._Hook(self._CreateFile('testpkg14a/my1.py')) self._Hook(self._CreateFile('testpkg14a/your1.py')) exec('from testpkg14a import *') # pylint: disable=exec-used - self.assertEqual( - ['testpkg14a/__init__.py', - 'testpkg14a/my1.py'], - sorted(self._import_callbacks_log)) + self.assertEqual(['testpkg14a/__init__.py', 'testpkg14a/my1.py'], + sorted(self._import_callbacks_log)) def testImportFunction(self): self._Hook(self._CreateFile('testpkg27/__init__.py')) @@ -175,9 +159,9 @@ def testImportLib(self): self._Hook(self._CreateFile('zero.py')) self._Hook(self._CreateFile('testpkg15a/__init__.py')) self._Hook(self._CreateFile('testpkg15a/first.py')) - self._Hook(self._CreateFile( - 'testpkg15a/testpkg15b/__init__.py', - 'assert False, "unexpected import"')) + self._Hook( + self._CreateFile('testpkg15a/testpkg15b/__init__.py', + 'assert False, "unexpected import"')) self._Hook(self._CreateFile('testpkg15a/testpkg15c/__init__.py')) self._Hook(self._CreateFile('testpkg15a/testpkg15c/second.py')) @@ -193,18 +177,14 @@ def testImportLib(self): # Import package.module. importlib.import_module('testpkg15a.first') - self.assertEqual( - ['testpkg15a/__init__.py', - 'testpkg15a/first.py'], - sorted(self._import_callbacks_log)) + self.assertEqual(['testpkg15a/__init__.py', 'testpkg15a/first.py'], + sorted(self._import_callbacks_log)) self._import_callbacks_log = [] # Relative module import from package context. importlib.import_module('.first', 'testpkg15a') - self.assertEqual( - ['testpkg15a/__init__.py', - 'testpkg15a/first.py'], - sorted(self._import_callbacks_log)) + self.assertEqual(['testpkg15a/__init__.py', 'testpkg15a/first.py'], + sorted(self._import_callbacks_log)) self._import_callbacks_log = [] # Relative module import from package context with '..'. @@ -216,31 +196,32 @@ def testImportLib(self): else: importlib.import_module('..first', 'testpkg15a.testpkg15b') self.assertEqual( - ['testpkg15a/__init__.py', - # TODO: Importlib may or may not load testpkg15b, - # depending on the implementation. Currently on blaze, it does not - # load testpkg15b, but a similar non-blaze code on my workstation - # loads testpkg15b. We should verify this behavior. - # 'testpkg15a/testpkg15b/__init__.py', - 'testpkg15a/first.py'], + [ + 'testpkg15a/__init__.py', + # TODO: Importlib may or may not load testpkg15b, + # depending on the implementation. Currently on blaze, it does not + # load testpkg15b, but a similar non-blaze code on my workstation + # loads testpkg15b. We should verify this behavior. + # 'testpkg15a/testpkg15b/__init__.py', + 'testpkg15a/first.py' + ], sorted(self._import_callbacks_log)) self._import_callbacks_log = [] # Relative module import from nested package context. importlib.import_module('.second', 'testpkg15a.testpkg15c') - self.assertEqual( - ['testpkg15a/__init__.py', - 'testpkg15a/testpkg15c/__init__.py', - 'testpkg15a/testpkg15c/second.py'], - sorted(self._import_callbacks_log)) + self.assertEqual([ + 'testpkg15a/__init__.py', 'testpkg15a/testpkg15c/__init__.py', + 'testpkg15a/testpkg15c/second.py' + ], sorted(self._import_callbacks_log)) self._import_callbacks_log = [] def testRemoveImportHookFromCallback(self): + def RunCleanup(unused_mod): cleanup() - cleanup = self._Hook( - self._CreateFile('testpkg15/__init__.py'), RunCleanup) + cleanup = self._Hook(self._CreateFile('testpkg15/__init__.py'), RunCleanup) import testpkg15 # pylint: disable=g-import-not-at-top,unused-variable import testpkg15 # pylint: disable=g-import-not-at-top,unused-variable import testpkg15 # pylint: disable=g-import-not-at-top,unused-variable @@ -255,14 +236,13 @@ def CheckFullyLoaded(module): self.assertEqual(1, getattr(module, 'validate', None), 'premature call') self._Hook(self._CreateFile('testpkg16/my1.py')) - self._Hook(self._CreateFile('testpkg16/__init__.py', - 'import my1\nvalidate = 1'), CheckFullyLoaded) + self._Hook( + self._CreateFile('testpkg16/__init__.py', 'import my1\nvalidate = 1'), + CheckFullyLoaded) import testpkg16.my1 # pylint: disable=g-import-not-at-top,unused-variable - self.assertEqual( - ['testpkg16/__init__.py', - 'testpkg16/my1.py'], - sorted(self._import_callbacks_log)) + self.assertEqual(['testpkg16/__init__.py', 'testpkg16/my1.py'], + sorted(self._import_callbacks_log)) def testCircularImportNoPrematureCallback(self): # Verifies that the callback is not invoked before the first module is fully @@ -272,22 +252,16 @@ def CheckFullyLoaded(module): self._CreateFile('testpkg17/__init__.py') self._Hook( - self._CreateFile( - 'testpkg17/c1.py', - 'import testpkg17.c2\nvalidate = 1', False), - CheckFullyLoaded) + self._CreateFile('testpkg17/c1.py', 'import testpkg17.c2\nvalidate = 1', + False), CheckFullyLoaded) self._Hook( - self._CreateFile( - 'testpkg17/c2.py', - 'import testpkg17.c1\nvalidate = 1', False), - CheckFullyLoaded) + self._CreateFile('testpkg17/c2.py', 'import testpkg17.c1\nvalidate = 1', + False), CheckFullyLoaded) import testpkg17.c1 # pylint: disable=g-import-not-at-top,unused-variable - self.assertEqual( - ['testpkg17/c1.py', - 'testpkg17/c2.py'], - sorted(self._import_callbacks_log)) + self.assertEqual(['testpkg17/c1.py', 'testpkg17/c2.py'], + sorted(self._import_callbacks_log)) def testImportException(self): # An exception is thrown by the builtin importer during import. @@ -307,8 +281,9 @@ def testImportException(self): def testImportNestedException(self): # An import exception is thrown and caught inside a module being imported. self._CreateFile('testpkg19/__init__.py') - self._Hook(self._CreateFile('testpkg19/m19.py', - 'try: import m19b\nexcept ImportError: pass')) + self._Hook( + self._CreateFile('testpkg19/m19.py', + 'try: import m19b\nexcept ImportError: pass')) import testpkg19.m19 # pylint: disable=g-import-not-at-top,unused-variable @@ -343,13 +318,11 @@ def testModuleImportByPathSuffix(self): def testFromImportImportsFunction(self): self._CreateFile('testpkg21a/__init__.py') self._CreateFile('testpkg21a/testpkg21b/__init__.py') - self._CreateFile( - 'testpkg21a/testpkg21b/mod.py', - ('def func1():\n' - ' return 5\n' - '\n' - 'def func2():\n' - ' return 7\n')) + self._CreateFile('testpkg21a/testpkg21b/mod.py', ('def func1():\n' + ' return 5\n' + '\n' + 'def func2():\n' + ' return 7\n')) self._Hook('mod.py') from testpkg21a.testpkg21b.mod import func1, func2 # pylint: disable=g-import-not-at-top,unused-variable,g-multiple-import @@ -357,9 +330,7 @@ def testFromImportImportsFunction(self): def testImportSibling(self): self._CreateFile('testpkg22/__init__.py') - self._CreateFile( - 'testpkg22/first.py', - 'import second') + self._CreateFile('testpkg22/first.py', 'import second') self._CreateFile('testpkg22/second.py') self._Hook('testpkg22/second.py') @@ -376,22 +347,20 @@ def testImportSiblingSamePackage(self): self._Hook('testpkg23/testpkg23/second.py') import testpkg23.first # pylint: disable=g-import-not-at-top,unused-variable - self.assertEqual( - ['testpkg23/testpkg23/second.py'], - self._import_callbacks_log) + self.assertEqual(['testpkg23/testpkg23/second.py'], + self._import_callbacks_log) def testImportSiblingFromInit(self): self._Hook(self._CreateFile('testpkg23a/__init__.py', 'import testpkg23b')) - self._Hook(self._CreateFile( - 'testpkg23a/testpkg23b/__init__.py', - 'import testpkg23c')) + self._Hook( + self._CreateFile('testpkg23a/testpkg23b/__init__.py', + 'import testpkg23c')) self._Hook(self._CreateFile('testpkg23a/testpkg23b/testpkg23c/__init__.py')) import testpkg23a # pylint: disable=g-import-not-at-top,unused-variable - self.assertEqual( - ['testpkg23a/__init__.py', - 'testpkg23a/testpkg23b/__init__.py', - 'testpkg23a/testpkg23b/testpkg23c/__init__.py'], - sorted(self._import_callbacks_log)) + self.assertEqual([ + 'testpkg23a/__init__.py', 'testpkg23a/testpkg23b/__init__.py', + 'testpkg23a/testpkg23b/testpkg23c/__init__.py' + ], sorted(self._import_callbacks_log)) def testThreadLocalCleanup(self): self._CreateFile('testpkg24/__init__.py') @@ -411,11 +380,10 @@ def testThreadLocalCleanupWithCaughtImportError(self): self._CreateFile( 'testpkg25/foo.py', 'import bar\n' # success. - 'import baz') # success. + 'import baz') # success. self._CreateFile('testpkg25/bar.py') self._CreateFile( - 'testpkg25/baz.py', - 'try:\n' + 'testpkg25/baz.py', 'try:\n' ' import testpkg25b\n' 'except ImportError:\n' ' pass') @@ -434,7 +402,7 @@ def testThreadLocalCleanupWithUncaughtImportError(self): self._CreateFile( 'testpkg26/foo.py', 'import bar\n' # success. - 'import baz') # fail. + 'import baz') # fail. self._CreateFile('testpkg26/bar.py') # Create a hook for any arbitrary module. Doesn't need to hit. @@ -510,8 +478,8 @@ def RewriteImport(line): # TODO: add test for the module param in the callback. def _Hook(self, path, callback=lambda m: None): cleanup = imphook2.AddImportCallbackBySuffix( - path, - lambda mod: (self._import_callbacks_log.append(path), callback(mod))) + path, lambda mod: + (self._import_callbacks_log.append(path), callback(mod))) self.assertTrue(cleanup, path) self._callback_cleanups.append(cleanup) return cleanup diff --git a/tests/integration_test_disabled.py b/tests/integration_test_disabled.py index 434c5e2..2fcfa00 100644 --- a/tests/integration_test_disabled.py +++ b/tests/integration_test_disabled.py @@ -73,14 +73,20 @@ def __init__(self): # Simulate a time delay for calls to the mock API. def ReturnWithDelay(val): + def GetVal(): time.sleep(_REQUEST_DELAY_SECS) return val + return GetVal self._register_execute = debuggees.register.return_value.execute - self._register_execute.side_effect = ReturnWithDelay( - {'debuggee': {'id': _TEST_DEBUGGEE_ID}, 'agentId': _TEST_AGENT_ID}) + self._register_execute.side_effect = ReturnWithDelay({ + 'debuggee': { + 'id': _TEST_DEBUGGEE_ID + }, + 'agentId': _TEST_AGENT_ID + }) self._active_breakpoints = {'breakpoints': []} self._list_execute = breakpoints.list.return_value.execute @@ -142,9 +148,12 @@ def SetBreakpointAtPathLine(self, path, line, template=None): """Sets a new breakpoint at path:line.""" breakpoint = { 'id': 'BP_%d' % next(self._id_counter), - 'createTime': - python_test_util.DateTimeToTimestamp(datetime.utcnow()), - 'location': {'path': path, 'line': line}} + 'createTime': python_test_util.DateTimeToTimestamp(datetime.utcnow()), + 'location': { + 'path': path, + 'line': line + } + } breakpoint.update(template or {}) self.SetActiveBreakpoints(self.GetActiveBreakpoints() + [breakpoint]) @@ -205,8 +214,10 @@ def execute(self): # pylint: disable=invalid-name return FakeBreakpointUpdateCommand(self._incoming_breakpoint_updates) + # We only need to attach the debugger exactly once. The IntegrationTest class - # is created for each test case, so we need to keep this state global. +# is created for each test case, so we need to keep this state global. + _hub = FakeHub() def _FakeLog(self, message, extra=None): @@ -231,6 +242,7 @@ def testBackCompat(self): self.assertEqual(cdbg.enable, cdbg.AttachDebugger) def testBasic(self): + def Trigger(): print('Breakpoint trigger') # BPTAG: BASIC @@ -244,12 +256,16 @@ def Trigger(): # Verify that any pre existing labels present in the breakpoint are preserved # by the agent. def testExistingLabelsSurvive(self): + def Trigger(): print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_SURVIVE IntegrationTest._hub.SetBreakpoint( 'EXISTING_LABELS_SURVIVE', - {'labels': {'label_1': 'value_1', 'label_2': 'value_2'}}) + {'labels': { + 'label_1': 'value_1', + 'label_2': 'value_2' + }}) Trigger() result = IntegrationTest._hub.GetNextResult() self.assertIn('labels', result.keys()) @@ -261,6 +277,7 @@ def Trigger(): # Verify that any pre existing labels present in the breakpoint have priority # if they 'collide' with labels in the agent. def testExistingLabelsPriority(self): + def Trigger(): print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_PRIORITY @@ -270,7 +287,10 @@ def Trigger(): IntegrationTest._hub.SetBreakpoint( 'EXISTING_LABELS_PRIORITY', - {'labels': {'label_1': 'value_foobar', 'label_3': 'value_3'}}) + {'labels': { + 'label_1': 'value_foobar', + 'label_3': 'value_3' + }}) Trigger() @@ -288,6 +308,7 @@ def Trigger(): self.assertEqual('value_3', result['labels']['label_3']) def testRequestLogIdLabel(self): + def Trigger(): print('Breakpoint trigger req id label') # BPTAG: REQUEST_LOG_ID_LABEL @@ -305,11 +326,12 @@ def Trigger(): result = IntegrationTest._hub.GetNextResult() self.assertIn('labels', result.keys()) self.assertIn(labels.Breakpoint.REQUEST_LOG_ID, result['labels']) - self.assertEqual( - 'foo_bar_id', result['labels'][labels.Breakpoint.REQUEST_LOG_ID]) + self.assertEqual('foo_bar_id', + result['labels'][labels.Breakpoint.REQUEST_LOG_ID]) # Tests the issue in b/30876465 def testSameLine(self): + def Trigger(): print('Breakpoint trigger same line') # BPTAG: SAME_LINE @@ -325,6 +347,7 @@ def Trigger(): self.assertListEqual(lines, [line] * num_breakpoints) def testCallStack(self): + def Method1(): Method2() @@ -343,21 +366,22 @@ def Method5(): IntegrationTest._hub.SetBreakpoint('CALL_STACK') Method1() result = IntegrationTest._hub.GetNextResult() - self.assertEqual( - ['Method5', - 'Method4', - 'Method3', - 'Method2', - 'Method1', - 'IntegrationTest.testCallStack'], - [frame['function'] for frame in result['stackFrames']][:6]) + self.assertEqual([ + 'Method5', 'Method4', 'Method3', 'Method2', 'Method1', + 'IntegrationTest.testCallStack' + ], [frame['function'] for frame in result['stackFrames']][:6]) def testInnerMethod(self): + def Inner1(): + def Inner2(): + def Inner3(): print('Inner3') # BPTAG: INNER3 + Inner3() + Inner2() IntegrationTest._hub.SetBreakpoint('INNER3') @@ -390,14 +414,22 @@ def Get(self): self.assertEqual('MyClass.Get', result['stackFrames'][0]['function']) self.assertEqual('MyClass.Caller', result['stackFrames'][1]['function']) self.assertEqual( - {'name': 'self', - 'type': __name__ + '.MyClass', - 'members': [ - {'status': { - 'refersTo': 'VARIABLE_NAME', - 'description': {'format': 'Object has no fields'}}}]}, - python_test_util.PackFrameVariable(result, 'self', - collection='arguments')) + { + 'name': + 'self', + 'type': + __name__ + '.MyClass', + 'members': [{ + 'status': { + 'refersTo': 'VARIABLE_NAME', + 'description': { + 'format': 'Object has no fields' + } + } + }] + }, + python_test_util.PackFrameVariable( + result, 'self', collection='arguments')) def testGlobalDecorator(self): IntegrationTest._hub.SetBreakpoint('WRAPPED_GLOBAL_METHOD') @@ -407,6 +439,7 @@ def testGlobalDecorator(self): self.assertNotIn('status', result) def testNoLambdaExpression(self): + def Trigger(): cube = lambda x: x**3 # BPTAG: LAMBDA cube(18) @@ -422,6 +455,7 @@ def Trigger(): self.assertListEqual(functions, ['Trigger'] * num_breakpoints) def testNoGeneratorExpression(self): + def Trigger(): gen = (i for i in range(0, 5)) # BPTAG: GENEXPR next(gen) @@ -441,6 +475,7 @@ def Trigger(): self.assertListEqual(functions, ['Trigger'] * num_breakpoints) def testTryBlock(self): + def Method(a): try: return a * a # BPTAG: TRY_BLOCK @@ -451,55 +486,81 @@ def Method(a): Method(11) result = IntegrationTest._hub.GetNextResult() self.assertEqual('Method', result['stackFrames'][0]['function']) - self.assertEqual( - [{'name': 'a', 'value': '11', 'type': 'int'}], - result['stackFrames'][0]['arguments']) + self.assertEqual([{ + 'name': 'a', + 'value': '11', + 'type': 'int' + }], result['stackFrames'][0]['arguments']) def testFrameArguments(self): + def Method(a, b): return a + str(b) # BPTAG: FRAME_ARGUMENTS + IntegrationTest._hub.SetBreakpoint('FRAME_ARGUMENTS') Method('hello', 87) result = IntegrationTest._hub.GetNextResult() - self.assertEqual( - [{'name': 'a', 'value': "'hello'", 'type': 'str'}, - {'name': 'b', 'value': '87', 'type': 'int'}], - result['stackFrames'][0]['arguments']) + self.assertEqual([{ + 'name': 'a', + 'value': "'hello'", + 'type': 'str' + }, { + 'name': 'b', + 'value': '87', + 'type': 'int' + }], result['stackFrames'][0]['arguments']) self.assertEqual('self', result['stackFrames'][1]['arguments'][0]['name']) def testFrameLocals(self): + class Number(object): def __init__(self): self.n = 57 def Method(a): - b = a ** 2 + b = a**2 c = str(a) * 3 return c + str(b) # BPTAG: FRAME_LOCALS + IntegrationTest._hub.SetBreakpoint('FRAME_LOCALS') x = {'a': 1, 'b': Number()} Method(8) result = IntegrationTest._hub.GetNextResult() + self.assertEqual({ + 'name': 'b', + 'value': '64', + 'type': 'int' + }, python_test_util.PackFrameVariable(result, 'b')) + self.assertEqual({ + 'name': 'c', + 'value': "'888'", + 'type': 'str' + }, python_test_util.PackFrameVariable(result, 'c')) self.assertEqual( - {'name': 'b', 'value': '64', 'type': 'int'}, - python_test_util.PackFrameVariable(result, 'b')) - self.assertEqual( - {'name': 'c', 'value': "'888'", 'type': 'str'}, - python_test_util.PackFrameVariable(result, 'c')) - self.assertEqual( - {'name': 'x', - 'type': 'dict', - 'members': [{'name': "'a'", 'value': '1', 'type': 'int'}, - {'name': "'b'", - 'type': __name__ + '.Number', - 'members': [{'name': 'n', - 'value': '57', - 'type': 'int'}]}]}, - python_test_util.PackFrameVariable(result, 'x', frame=1)) + { + 'name': + 'x', + 'type': + 'dict', + 'members': [{ + 'name': "'a'", + 'value': '1', + 'type': 'int' + }, { + 'name': "'b'", + 'type': __name__ + '.Number', + 'members': [{ + 'name': 'n', + 'value': '57', + 'type': 'int' + }] + }] + }, python_test_util.PackFrameVariable(result, 'x', frame=1)) return x def testRecursion(self): + def RecursiveMethod(i): if i == 0: return 0 # BPTAG: RECURSION @@ -510,11 +571,14 @@ def RecursiveMethod(i): result = IntegrationTest._hub.GetNextResult() for frame in range(5): - self.assertEqual( - {'name': 'i', 'value': str(frame), 'type': 'int'}, - python_test_util.PackFrameVariable(result, 'i', frame, 'arguments')) + self.assertEqual({ + 'name': 'i', + 'value': str(frame), + 'type': 'int' + }, python_test_util.PackFrameVariable(result, 'i', frame, 'arguments')) def testWatchedExpressions(self): + def Trigger(): class MyClass(object): @@ -526,18 +590,27 @@ def __init__(self): unused_my = MyClass() print('Breakpoint trigger') # BPTAG: WATCHED_EXPRESSION - IntegrationTest._hub.SetBreakpoint( - 'WATCHED_EXPRESSION', - {'expressions': ['unused_my']}) + IntegrationTest._hub.SetBreakpoint('WATCHED_EXPRESSION', + {'expressions': ['unused_my']}) Trigger() result = IntegrationTest._hub.GetNextResult() self.assertEqual( - {'name': 'unused_my', - 'type': __name__ + '.MyClass', - 'members': [{'name': 'a', 'value': '1', 'type': 'int'}, - {'name': 'b', 'value': "'bbb'", 'type': 'str'}]}, - python_test_util.PackWatchedExpression(result, 0)) + { + 'name': + 'unused_my', + 'type': + __name__ + '.MyClass', + 'members': [{ + 'name': 'a', + 'value': '1', + 'type': 'int' + }, { + 'name': 'b', + 'value': "'bbb'", + 'type': 'str' + }] + }, python_test_util.PackWatchedExpression(result, 0)) def testBreakpointExpiration(self): # BPTAG: BREAKPOINT_EXPIRATION created_time = datetime.utcnow() - timedelta(hours=25) @@ -549,28 +622,31 @@ def testBreakpointExpiration(self): # BPTAG: BREAKPOINT_EXPIRATION self.assertTrue(result['status']['isError']) def testLogAction(self): + def Trigger(): for i in range(3): print('Log me %d' % i) # BPTAG: LOG IntegrationTest._hub.SetBreakpoint( - 'LOG', - {'action': 'LOG', - 'logLevel': 'INFO', - 'logMessageFormat': 'hello $0', - 'expressions': ['i']}) + 'LOG', { + 'action': 'LOG', + 'logLevel': 'INFO', + 'logMessageFormat': 'hello $0', + 'expressions': ['i'] + }) Trigger() - self.assertListEqual(['LOGPOINT: hello 0', 'LOGPOINT: hello 1', - 'LOGPOINT: hello 2'], self._info_log) + self.assertListEqual( + ['LOGPOINT: hello 0', 'LOGPOINT: hello 1', 'LOGPOINT: hello 2'], + self._info_log) def testDeferred(self): + def Trigger(): import integration_test_helper # pylint: disable=g-import-not-at-top integration_test_helper.Trigger() - IntegrationTest._hub.SetBreakpointAtFile( - 'integration_test_helper.py', - 'DEFERRED') + IntegrationTest._hub.SetBreakpointAtFile('integration_test_helper.py', + 'DEFERRED') Trigger() result = IntegrationTest._hub.GetNextResult() @@ -581,9 +657,11 @@ def Trigger(): def MyGlobalDecorator(fn): + @functools.wraps(fn) def Wrapper(*args, **kwargs): return fn(*args, **kwargs) + return Wrapper diff --git a/tests/module_explorer_test_disabled.py b/tests/module_explorer_test_disabled.py index e451a79..b05ec53 100644 --- a/tests/module_explorer_test_disabled.py +++ b/tests/module_explorer_test_disabled.py @@ -64,15 +64,24 @@ def testNonModuleClassMethod(self): def testDeepInnerMethod(self): """Verify that inner of inner of inner, etc. method is found.""" + def Inner1(): + def Inner2(): + def Inner3(): + def Inner4(): + def Inner5(): pass + return six.get_function_code(Inner5) + return Inner4() + return Inner3() + return Inner2() self.assertIn(Inner1(), self._code_objects) @@ -118,8 +127,8 @@ def testGlobalMethodWithClosureDecorator(self): self.assertEqual('GlobalMethodWithClosureDecorator', co.co_name) def testClassMethodWithClosureDecorator(self): - co = self._GetCodeObjectAtLine(self._module, - 'GLOBAL_CLASS_METHOD_WITH_CLOSURE_DECORATOR') + co = self._GetCodeObjectAtLine( + self._module, 'GLOBAL_CLASS_METHOD_WITH_CLOSURE_DECORATOR') self.assertTrue(co) self.assertEqual('FnWithClosureDecorator', co.co_name) @@ -145,16 +154,15 @@ def testSameFileName(self): def testCodeObjectAtLine(self): """Verify that query of code object at a specified source line.""" - test_cases = [ - (six.get_function_code(self.testCodeObjectAtLine), - 'TEST_CODE_OBJECT_AT_ASSERT'), - (ModuleExplorerTest._StaticMethod(), 'INNER_OF_STATIC_METHOD'), - (_GlobalMethod(), 'INNER_OF_GLOBAL_METHOD')] + test_cases = [(six.get_function_code(self.testCodeObjectAtLine), + 'TEST_CODE_OBJECT_AT_ASSERT'), + (ModuleExplorerTest._StaticMethod(), + 'INNER_OF_STATIC_METHOD'), + (_GlobalMethod(), 'INNER_OF_GLOBAL_METHOD')] for code_object, tag in test_cases: self.assertEqual( # BPTAG: TEST_CODE_OBJECT_AT_ASSERT - code_object, - self._GetCodeObjectAtLine(code_object, tag)) + code_object, self._GetCodeObjectAtLine(code_object, tag)) def testCodeObjectWithoutModule(self): """Verify no crash/hang when module has no file name.""" @@ -164,6 +172,7 @@ def testCodeObjectWithoutModule(self): self.assertFalse( module_explorer.GetCodeObjectAtLine(self._module, 111111)[0]) + # TODO: Re-enable this test, without hardcoding a python version into it. # def testCodeExtensionMismatch(self): # """Verify module match when code object points to .py and module to .pyc.""" @@ -217,6 +226,7 @@ def testMaxReferentsBfsDepth(self): module_explorer._MAX_REFERENTS_BFS_DEPTH = default_quota def testMaxObjectReferents(self): + class A(object): pass @@ -249,6 +259,7 @@ class A(object): @staticmethod def _StaticMethod(): + def InnerMethod(): pass # BPTAG: INNER_OF_STATIC_METHOD @@ -261,6 +272,7 @@ def _GetCodeObjectAtLine(self, fn, tag): def _GlobalMethod(): + def InnerMethod(): pass # BPTAG: INNER_OF_GLOBAL_METHOD @@ -268,6 +280,7 @@ def InnerMethod(): def ClosureDecorator(handler): + def Caller(*args): return handler(*args) @@ -311,6 +324,7 @@ def _MethodWithLambdaExpression(): def _MethodWithGeneratorExpression(): return (i for i in range(0, 2)).gi_code + # Used for testMaxObjectReferents, need to be in global scope or else the module # explorer would not explore this large_dict = None diff --git a/tests/module_search2_test.py b/tests/module_search2_test.py index 51bbc99..dc47e71 100644 --- a/tests/module_search2_test.py +++ b/tests/module_search2_test.py @@ -23,8 +23,7 @@ def tearDown(self): def testSearchValidSourcePath(self): # These modules are on the sys.path. self.assertEndsWith( - module_search2.Search( - 'googleclouddebugger/module_search2.py'), + module_search2.Search('googleclouddebugger/module_search2.py'), '/site-packages/googleclouddebugger/module_search2.py') # inspect and dis are libraries with no real file. So, we @@ -36,9 +35,7 @@ def testSearchInvalidSourcePath(self): # This module exists, but the search input is missing the outer package # name. - self.assertEqual( - module_search2.Search('absltest.py'), - 'absltest.py') + self.assertEqual(module_search2.Search('absltest.py'), 'absltest.py') def testSearchInvalidExtension(self): # Test that the module rejects invalid extension in the input. @@ -87,8 +84,7 @@ def testSearchSymLinkInSysPath(self): # Returned result should have a successful file match and symbolic # links should be kept. self.assertEndsWith( - module_search2.Search('b/first.py'), - 'link/b/first.py') + module_search2.Search('b/first.py'), 'link/b/first.py') finally: sys.path.remove(os.path.join(self._test_package_dir, 'link')) diff --git a/tests/module_utils2_test.py b/tests/module_utils2_test.py index 82110ea..3790acc 100644 --- a/tests/module_utils2_test.py +++ b/tests/module_utils2_test.py @@ -59,13 +59,9 @@ def testSimpleLoadedModuleFromSuffix(self): # Lookup simple module. _AddSysModule('m1', '/a/b/p1/m1.pyc') for suffix in [ - 'm1.py', - 'm1.pyc', - 'm1.pyo', - 'p1/m1.py', - 'b/p1/m1.py', - 'a/b/p1/m1.py', - '/a/b/p1/m1.py']: + 'm1.py', 'm1.pyc', 'm1.pyo', 'p1/m1.py', 'b/p1/m1.py', 'a/b/p1/m1.py', + '/a/b/p1/m1.py' + ]: m1 = module_utils2.GetLoadedModuleBySuffix(suffix) self.assertTrue(m1, 'Module not found') self.assertEqual('/a/b/p1/m1.pyc', m1.__file__) @@ -73,21 +69,17 @@ def testSimpleLoadedModuleFromSuffix(self): # Lookup simple package, no ext. _AddSysModule('p1', '/a/b/p1/__init__.pyc') for suffix in [ - 'p1/__init__.py', - 'b/p1/__init__.py', - 'a/b/p1/__init__.py', - '/a/b/p1/__init__.py']: + 'p1/__init__.py', 'b/p1/__init__.py', 'a/b/p1/__init__.py', + '/a/b/p1/__init__.py' + ]: p1 = module_utils2.GetLoadedModuleBySuffix(suffix) self.assertTrue(p1, 'Package not found') self.assertEqual('/a/b/p1/__init__.pyc', p1.__file__) # Lookup via bad suffix. for suffix in [ - 'm2.py', - 'p2/m1.py', - 'b2/p1/m1.py', - 'a2/b/p1/m1.py', - '/a2/b/p1/m1.py']: + 'm2.py', 'p2/m1.py', 'b2/p1/m1.py', 'a2/b/p1/m1.py', '/a2/b/p1/m1.py' + ]: m1 = module_utils2.GetLoadedModuleBySuffix(suffix) self.assertFalse(m1, 'Module found unexpectedly') @@ -95,11 +87,8 @@ def testComplexLoadedModuleFromSuffix(self): # Lookup complex module. _AddSysModule('b.p1.m1', '/a/b/p1/m1.pyc') for suffix in [ - 'm1.py', - 'p1/m1.py', - 'b/p1/m1.py', - 'a/b/p1/m1.py', - '/a/b/p1/m1.py']: + 'm1.py', 'p1/m1.py', 'b/p1/m1.py', 'a/b/p1/m1.py', '/a/b/p1/m1.py' + ]: m1 = module_utils2.GetLoadedModuleBySuffix(suffix) self.assertTrue(m1, 'Module not found') self.assertEqual('/a/b/p1/m1.pyc', m1.__file__) @@ -107,10 +96,9 @@ def testComplexLoadedModuleFromSuffix(self): # Lookup complex package, no ext. _AddSysModule('a.b.p1', '/a/b/p1/__init__.pyc') for suffix in [ - 'p1/__init__.py', - 'b/p1/__init__.py', - 'a/b/p1/__init__.py', - '/a/b/p1/__init__.py']: + 'p1/__init__.py', 'b/p1/__init__.py', 'a/b/p1/__init__.py', + '/a/b/p1/__init__.py' + ]: p1 = module_utils2.GetLoadedModuleBySuffix(suffix) self.assertTrue(p1, 'Package not found') self.assertEqual('/a/b/p1/__init__.pyc', p1.__file__) @@ -144,16 +132,12 @@ def testDuplicateLoadedModuleFromSuffix(self): # Ambiguous request, multiple modules might have matched. m1 = module_utils2.GetLoadedModuleBySuffix('/m1/__init__.py') self.assertTrue(m1, 'Package not found') - self.assertIn( - m1.__file__, - ['/m1/__init__.pyc', '/m1/m1/m1/__init__.pyc']) + self.assertIn(m1.__file__, ['/m1/__init__.pyc', '/m1/m1/m1/__init__.pyc']) # Ambiguous request, multiple modules might have matched. m1m1 = module_utils2.GetLoadedModuleBySuffix('/m1/m1.py') self.assertTrue(m1m1, 'Module not found') - self.assertIn( - m1m1.__file__, - ['/m1/m1.pyc', '/m1/m1/m1/m1.pyc']) + self.assertIn(m1m1.__file__, ['/m1/m1.pyc', '/m1/m1/m1/m1.pyc']) # Not ambiguous. Only 1 match possible. m1m1m1 = module_utils2.GetLoadedModuleBySuffix('/m1/m1/m1/__init__.py') diff --git a/tests/native_module_test.py b/tests/native_module_test.py index d235d7f..2beaada 100644 --- a/tests/native_module_test.py +++ b/tests/native_module_test.py @@ -44,6 +44,7 @@ def tearDown(self): self._ClearAllBreakpoints() def testUnconditionalBreakpoint(self): + def Trigger(): unused_lock = threading.Lock() print('Breakpoint trigger') # BPTAG: UNCONDITIONAL_BREAKPOINT @@ -53,6 +54,7 @@ def Trigger(): self.assertEqual(1, self._breakpoint_counter) def testConditionalBreakpoint(self): + def Trigger(): d = {} for i in range(1, 10): @@ -75,6 +77,7 @@ def Trigger(): self.assertEqual(1, self._breakpoint_counter) def testMissingModule(self): + def Test(): native.CreateConditionalBreakpoint(None, 123123, None, self._BreakpointEvent) @@ -82,6 +85,7 @@ def Test(): self.assertRaises(TypeError, Test) def testBadModule(self): + def Test(): native.CreateConditionalBreakpoint('str', 123123, None, self._BreakpointEvent) @@ -89,6 +93,7 @@ def Test(): self.assertRaises(TypeError, Test) def testInvalidCondition(self): + def Test(): native.CreateConditionalBreakpoint(sys.modules[__name__], 123123, '2+2', self._BreakpointEvent) @@ -96,39 +101,43 @@ def Test(): self.assertRaises(TypeError, Test) def testMissingCallback(self): + def Test(): native.CreateConditionalBreakpoint('code.py', 123123, None, None) self.assertRaises(TypeError, Test) def testInvalidCallback(self): + def Test(): native.CreateConditionalBreakpoint('code.py', 123123, None, {}) self.assertRaises(TypeError, Test) def testMissingCookie(self): - self.assertRaises( - TypeError, - lambda: native.ClearConditionalBreakpoint(None)) + self.assertRaises(TypeError, + lambda: native.ClearConditionalBreakpoint(None)) def testInvalidCookie(self): native.ClearConditionalBreakpoint(387873457) def testMutableCondition(self): + def Trigger(): + def MutableMethod(): self._evil = True return True + print('MutableMethod = %s' % MutableMethod) # BPTAG: MUTABLE_CONDITION self._SetBreakpoint(Trigger, 'MUTABLE_CONDITION', 'MutableMethod()') Trigger() - self.assertEqual( - [native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE], - self._PopBreakpointEvents()) + self.assertEqual([native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE], + self._PopBreakpointEvents()) def testGlobalConditionQuotaExceeded(self): + def Trigger(): print('Breakpoint trigger') # BPTAG: GLOBAL_CONDITION_QUOTA @@ -144,6 +153,7 @@ def Trigger(): time.sleep(0.1) def testBreakpointConditionQuotaExceeded(self): + def Trigger(): print('Breakpoint trigger') # BPTAG: PER_BREAKPOINT_CONDITION_QUOTA @@ -153,10 +163,8 @@ def Trigger(): # increase the complexity of a condition until we hit it. base = 100 while True: - self._SetBreakpoint( - Trigger, - 'PER_BREAKPOINT_CONDITION_QUOTA', - '_DoHardWork(%d)' % base) + self._SetBreakpoint(Trigger, 'PER_BREAKPOINT_CONDITION_QUOTA', + '_DoHardWork(%d)' % base) Trigger() self._ClearAllBreakpoints() @@ -174,61 +182,54 @@ def Trigger(): time.sleep(0.1) def testImmutableCallSuccess(self): + def Add(a, b, c): return a + b + c def Magic(): return 'cake' - self.assertEqual( - '643535', - self._CallImmutable(inspect.currentframe(), 'str(643535)')) + self.assertEqual('643535', + self._CallImmutable(inspect.currentframe(), 'str(643535)')) self.assertEqual( 786 + 23 + 891, self._CallImmutable(inspect.currentframe(), 'Add(786, 23, 891)')) - self.assertEqual( - 'cake', - self._CallImmutable(inspect.currentframe(), 'Magic()')) + self.assertEqual('cake', + self._CallImmutable(inspect.currentframe(), 'Magic()')) return Add or Magic def testImmutableCallMutable(self): + def Change(): dictionary['bad'] = True dictionary = {} frame = inspect.currentframe() - self.assertRaises( - SystemError, - lambda: self._CallImmutable(frame, 'Change()')) + self.assertRaises(SystemError, + lambda: self._CallImmutable(frame, 'Change()')) self.assertEqual({}, dictionary) return Change def testImmutableCallExceptionPropagation(self): + def Divide(a, b): return a / b frame = inspect.currentframe() - self.assertRaises( - ZeroDivisionError, - lambda: self._CallImmutable(frame, 'Divide(1, 0)')) + self.assertRaises(ZeroDivisionError, + lambda: self._CallImmutable(frame, 'Divide(1, 0)')) return Divide def testImmutableCallInvalidFrame(self): - self.assertRaises( - TypeError, - lambda: native.CallImmutable(None, lambda: 1)) - self.assertRaises( - TypeError, - lambda: native.CallImmutable('not a frame', lambda: 1)) + self.assertRaises(TypeError, lambda: native.CallImmutable(None, lambda: 1)) + self.assertRaises(TypeError, + lambda: native.CallImmutable('not a frame', lambda: 1)) def testImmutableCallInvalidCallable(self): frame = inspect.currentframe() - self.assertRaises( - TypeError, - lambda: native.CallImmutable(frame, None)) - self.assertRaises( - TypeError, - lambda: native.CallImmutable(frame, 'not a callable')) + self.assertRaises(TypeError, lambda: native.CallImmutable(frame, None)) + self.assertRaises(TypeError, + lambda: native.CallImmutable(frame, 'not a callable')) def _SetBreakpoint(self, method, tag, condition=None): """Sets a breakpoint in this source file. @@ -263,9 +264,8 @@ def _ClearAllBreakpoints(self): def _CallImmutable(self, frame, expression): """Wrapper over native.ImmutableCall for callable.""" - return native.CallImmutable( - frame, - compile(expression, '', 'eval')) + return native.CallImmutable(frame, + compile(expression, '', 'eval')) def _BreakpointEvent(self, event, frame): """Callback on breakpoint event. diff --git a/tests/python_breakpoint_test_disabled.py b/tests/python_breakpoint_test_disabled.py index 2de4d61..a337d12 100644 --- a/tests/python_breakpoint_test_disabled.py +++ b/tests/python_breakpoint_test_disabled.py @@ -30,7 +30,11 @@ def setUp(self): self._template = { 'id': 'BP_ID', 'createTime': python_test_util.DateTimeToTimestamp(self._base_time), - 'location': {'path': path, 'line': line}} + 'location': { + 'path': path, + 'line': line + } + } self._completed = set() self._update_queue = [] @@ -50,23 +54,20 @@ def EnqueueBreakpointUpdate(self, breakpoint): self._update_queue.append(breakpoint) def testClear(self): - breakpoint = python_breakpoint.PythonBreakpoint( - self._template, self, self, None) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) breakpoint.Clear() self.assertFalse(breakpoint._cookie) def testId(self): - breakpoint = python_breakpoint.PythonBreakpoint( - self._template, self, self, None) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) breakpoint.Clear() self.assertEqual('BP_ID', breakpoint.GetBreakpointId()) def testNullBytesInCondition(self): python_breakpoint.PythonBreakpoint( - dict(self._template, condition='\0'), - self, - self, - None) + dict(self._template, condition='\0'), self, self, None) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['status']['isError']) @@ -83,10 +84,10 @@ def testDeferredBreakpoint(self): f.write(' print("Hello from deferred module")\n') python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': 'defer_print.py', 'line': 2}), - self, - self, - None) + dict(self._template, location={ + 'path': 'defer_print.py', + 'line': 2 + }), self, self, None) self.assertFalse(self._completed) self.assertEmpty(self._update_queue) @@ -97,9 +98,8 @@ def testDeferredBreakpoint(self): self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) - self.assertEqual( - 'DoPrint', - self._update_queue[0]['stackFrames'][0]['function']) + self.assertEqual('DoPrint', + self._update_queue[0]['stackFrames'][0]['function']) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertEmpty(imphook2._import_callbacks) @@ -124,26 +124,23 @@ def testSearchUsingSysPathOrder(self): # Search will proceed in sys.path order, and the first match in sys.path # will uniquely identify the full path of the module as inner2_2/mod2.py. python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': 'mod2.py', 'line': 3}), - self, - self, - None) + dict(self._template, location={ + 'path': 'mod2.py', + 'line': 3 + }), self, self, None) self.assertEqual(2, mod2.DoPrint()) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) - self.assertEqual( - 'DoPrint', - self._update_queue[0]['stackFrames'][0]['function']) + self.assertEqual('DoPrint', + self._update_queue[0]['stackFrames'][0]['function']) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertEqual( - 'x', - self._update_queue[0]['stackFrames'][0]['locals'][0]['name']) + 'x', self._update_queue[0]['stackFrames'][0]['locals'][0]['name']) self.assertEqual( - '2', - self._update_queue[0]['stackFrames'][0]['locals'][0]['value']) + '2', self._update_queue[0]['stackFrames'][0]['locals'][0]['value']) self.assertEmpty(imphook2._import_callbacks) @@ -166,10 +163,10 @@ def testMultipleDeferredMatches(self): # This breakpoint will be deferred. It can match any one of the modules # created above. python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': 'defer_print3.py', 'line': 3}), - self, - self, - None) + dict(self._template, location={ + 'path': 'defer_print3.py', + 'line': 3 + }), self, self, None) # Lazy import module. Activates breakpoint on the loaded module. import inner3_1.defer_print3 # pylint: disable=g-import-not-at-top @@ -178,16 +175,13 @@ def testMultipleDeferredMatches(self): self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) - self.assertEqual( - 'DoPrint', - self._update_queue[0]['stackFrames'][0]['function']) + self.assertEqual('DoPrint', + self._update_queue[0]['stackFrames'][0]['function']) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertEqual( - 'x', - self._update_queue[0]['stackFrames'][0]['locals'][0]['name']) + 'x', self._update_queue[0]['stackFrames'][0]['locals'][0]['name']) self.assertEqual( - '1', - self._update_queue[0]['stackFrames'][0]['locals'][0]['value']) + '1', self._update_queue[0]['stackFrames'][0]['locals'][0]['value']) self.assertEmpty(imphook2._import_callbacks) @@ -195,10 +189,10 @@ def testNeverLoadedBreakpoint(self): open(os.path.join(self._test_package_dir, 'never_print.py'), 'w').close() breakpoint = python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': 'never_print.py', 'line': 99}), - self, - self, - None) + dict(self._template, location={ + 'path': 'never_print.py', + 'line': 99 + }), self, self, None) breakpoint.Clear() self.assertFalse(self._completed) @@ -208,10 +202,10 @@ def testDeferredNoCodeAtLine(self): open(os.path.join(self._test_package_dir, 'defer_empty.py'), 'w').close() python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': 'defer_empty.py', 'line': 10}), - self, - self, - None) + dict(self._template, location={ + 'path': 'defer_empty.py', + 'line': 10 + }), self, self, None) self.assertFalse(self._completed) self.assertEmpty(self._update_queue) @@ -235,10 +229,10 @@ def testDeferredBreakpointCancelled(self): open(os.path.join(self._test_package_dir, 'defer_cancel.py'), 'w').close() breakpoint = python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': 'defer_cancel.py', 'line': 11}), - self, - self, - None) + dict(self._template, location={ + 'path': 'defer_cancel.py', + 'line': 11 + }), self, self, None) breakpoint.Clear() self.assertFalse(self._completed) @@ -256,10 +250,10 @@ def testNoCodeAtLine(self): 'NO_CODE_LINE_BELOW') python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': path, 'line': line}), - self, - self, - None) + dict(self._template, location={ + 'path': path, + 'line': line + }), self, self, None) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['isFinalState']) @@ -278,42 +272,46 @@ def testNoCodeAtLine(self): def testBadExtension(self): for path in ['unknown.so', 'unknown', 'unknown.java', 'unknown.pyc']: python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': path, 'line': 83}), - self, - self, - None) + dict(self._template, location={ + 'path': path, + 'line': 83 + }), self, self, None) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertEqual( - {'isError': True, - 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': { - 'format': ('Only files with .py extension are supported')}}, - self._update_queue[0]['status']) + { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': { + 'format': ('Only files with .py extension are supported') + } + }, self._update_queue[0]['status']) self._update_queue = [] def testRootInitFile(self): - for path in ['__init__.py', '/__init__.py', '////__init__.py', - ' __init__.py ', ' //__init__.py']: + for path in [ + '__init__.py', '/__init__.py', '////__init__.py', ' __init__.py ', + ' //__init__.py' + ]: python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': path, 'line': 83}), - self, - self, - None) + dict(self._template, location={ + 'path': path, + 'line': 83 + }), self, self, None) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertEqual( - {'isError': True, - 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', - 'description': { - 'format': - 'Multiple modules matching $0. ' - 'Please specify the module path.', - 'parameters': ['__init__.py'] - }}, - self._update_queue[0]['status']) + { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + 'description': { + 'format': 'Multiple modules matching $0. ' + 'Please specify the module path.', + 'parameters': ['__init__.py'] + } + }, self._update_queue[0]['status']) self._update_queue = [] # Old module search algorithm rejects because there are too many matches. @@ -333,10 +331,10 @@ def testNonRootInitFile(self): for path in ['/a/__init__.py', 'a/__init__.py', 'a/b/__init__.py']: python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': path, 'line': 2}), - self, - self, - None) + dict(self._template, location={ + 'path': path, + 'line': 2 + }), self, self, None) inner4.DoPrint() @@ -344,9 +342,8 @@ def testNonRootInitFile(self): self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) - self.assertEqual( - 'DoPrint', - self._update_queue[0]['stackFrames'][0]['function']) + self.assertEqual('DoPrint', + self._update_queue[0]['stackFrames'][0]['function']) self.assertEmpty(imphook2._import_callbacks) self._update_queue = [] @@ -364,11 +361,11 @@ def testBreakpointInLoadedPackageFile(self): import pkg.pkg # pylint: disable=g-import-not-at-top,unused-variable python_breakpoint.PythonBreakpoint( - dict(self._template, - location={'path': 'pkg/pkg/__init__.py', 'line': 2}), - self, - self, - None) + dict( + self._template, location={ + 'path': 'pkg/pkg/__init__.py', + 'line': 2 + }), self, self, None) pkg.pkg.DoPrint() @@ -396,41 +393,42 @@ def testInternalError(self): import intern_err # pylint: disable=g-import-not-at-top,unused-variable python_breakpoint.PythonBreakpoint( - dict(self._template, location={'path': 'intern_err.py', 'line': 100}), - self, - self, - None) + dict(self._template, location={ + 'path': 'intern_err.py', + 'line': 100 + }), self, self, None) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertEqual( - {'isError': True, - 'description': {'format': 'Internal error occurred'}}, - self._update_queue[0]['status']) + { + 'isError': True, + 'description': { + 'format': 'Internal error occurred' + } + }, self._update_queue[0]['status']) def testInvalidCondition(self): python_breakpoint.PythonBreakpoint( - dict(self._template, condition='2+'), - self, - self, - None) + dict(self._template, condition='2+'), self, self, None) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertEqual( - {'isError': True, - 'refersTo': 'BREAKPOINT_CONDITION', - 'description': { - 'format': 'Expression could not be compiled: $0', - 'parameters': ['unexpected EOF while parsing']}}, - self._update_queue[0]['status']) + { + 'isError': True, + 'refersTo': 'BREAKPOINT_CONDITION', + 'description': { + 'format': 'Expression could not be compiled: $0', + 'parameters': ['unexpected EOF while parsing'] + } + }, self._update_queue[0]['status']) def testHit(self): - breakpoint = python_breakpoint.PythonBreakpoint( - self._template, self, self, None) - breakpoint._BreakpointEvent( - native.BREAKPOINT_EVENT_HIT, - inspect.currentframe()) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) + breakpoint._BreakpointEvent(native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) @@ -441,68 +439,73 @@ def testHitNewTimestamp(self): self._template['createTime'] = python_test_util.DateTimeToTimestampNew( self._base_time) - breakpoint = python_breakpoint.PythonBreakpoint( - self._template, self, self, None) - breakpoint._BreakpointEvent( - native.BREAKPOINT_EVENT_HIT, - inspect.currentframe()) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) + breakpoint._BreakpointEvent(native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) self.assertTrue(self._update_queue[0]['isFinalState']) def testDoubleHit(self): - breakpoint = python_breakpoint.PythonBreakpoint( - self._template, self, self, None) - breakpoint._BreakpointEvent( - native.BREAKPOINT_EVENT_HIT, - inspect.currentframe()) - breakpoint._BreakpointEvent( - native.BREAKPOINT_EVENT_HIT, - inspect.currentframe()) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) + breakpoint._BreakpointEvent(native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) + breakpoint._BreakpointEvent(native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) def testEndToEndUnconditional(self): + def Trigger(): pass # BPTAG: E2E_UNCONDITIONAL path, line = python_test_util.ResolveTag(type(self), 'E2E_UNCONDITIONAL') breakpoint = python_breakpoint.PythonBreakpoint( - {'id': 'BP_ID', - 'location': {'path': path, 'line': line}}, - self, - self, - None) + { + 'id': 'BP_ID', + 'location': { + 'path': path, + 'line': line + } + }, self, self, None) self.assertEmpty(self._update_queue) Trigger() self.assertLen(self._update_queue, 1) breakpoint.Clear() def testEndToEndConditional(self): + def Trigger(): for i in range(2): self.assertLen(self._update_queue, i) # BPTAG: E2E_CONDITIONAL path, line = python_test_util.ResolveTag(type(self), 'E2E_CONDITIONAL') breakpoint = python_breakpoint.PythonBreakpoint( - {'id': 'BP_ID', - 'location': {'path': path, 'line': line}, - 'condition': 'i == 1'}, - self, - self, - None) + { + 'id': 'BP_ID', + 'location': { + 'path': path, + 'line': line + }, + 'condition': 'i == 1' + }, self, self, None) Trigger() breakpoint.Clear() def testEndToEndCleared(self): path, line = python_test_util.ResolveTag(type(self), 'E2E_CLEARED') breakpoint = python_breakpoint.PythonBreakpoint( - {'id': 'BP_ID', - 'location': {'path': path, 'line': line}}, - self, - self, - None) + { + 'id': 'BP_ID', + 'location': { + 'path': path, + 'line': line + } + }, self, self, None) breakpoint.Clear() self.assertEmpty(self._update_queue) # BPTAG: E2E_CLEARED @@ -510,13 +513,11 @@ def testBreakpointCancellationEvent(self): events = [ native.BREAKPOINT_EVENT_GLOBAL_CONDITION_QUOTA_EXCEEDED, native.BREAKPOINT_EVENT_BREAKPOINT_CONDITION_QUOTA_EXCEEDED, - native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE] + native.BREAKPOINT_EVENT_CONDITION_EXPRESSION_MUTABLE + ] for event in events: - breakpoint = python_breakpoint.PythonBreakpoint( - self._template, - self, - self, - None) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, + self, None) breakpoint._BreakpointEvent(event, None) self.assertLen(self._update_queue, 1) self.assertEqual(set(['BP_ID']), self._completed) @@ -525,12 +526,11 @@ def testBreakpointCancellationEvent(self): self._completed = set() def testExpirationTime(self): - breakpoint = python_breakpoint.PythonBreakpoint( - self._template, self, self, None) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) breakpoint.Clear() self.assertEqual( - datetime(year=2015, month=1, day=2), - breakpoint.GetExpirationTime()) + datetime(year=2015, month=1, day=2), breakpoint.GetExpirationTime()) def testExpirationTimeWithExpiresIn(self): definition = self._template.copy() @@ -538,40 +538,45 @@ def testExpirationTimeWithExpiresIn(self): 'seconds': 300 # 5 minutes } - breakpoint = python_breakpoint.PythonBreakpoint( - definition, self, self, None) + breakpoint = python_breakpoint.PythonBreakpoint(definition, self, self, + None) breakpoint.Clear() self.assertEqual( - datetime(year=2015, month=1, day=2), - breakpoint.GetExpirationTime()) + datetime(year=2015, month=1, day=2), breakpoint.GetExpirationTime()) def testExpiration(self): - breakpoint = python_breakpoint.PythonBreakpoint( - self._template, self, self, None) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) breakpoint.ExpireBreakpoint() self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertEqual( - {'isError': True, - 'refersTo': 'BREAKPOINT_AGE', - 'description': {'format': 'The snapshot has expired'}}, - self._update_queue[0]['status']) + { + 'isError': True, + 'refersTo': 'BREAKPOINT_AGE', + 'description': { + 'format': 'The snapshot has expired' + } + }, self._update_queue[0]['status']) def testLogpointExpiration(self): definition = self._template.copy() definition['action'] = 'LOG' - breakpoint = python_breakpoint.PythonBreakpoint( - definition, self, self, None) + breakpoint = python_breakpoint.PythonBreakpoint(definition, self, self, + None) breakpoint.ExpireBreakpoint() self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['isFinalState']) self.assertEqual( - {'isError': True, - 'refersTo': 'BREAKPOINT_AGE', - 'description': {'format': 'The logpoint has expired'}}, - self._update_queue[0]['status']) + { + 'isError': True, + 'refersTo': 'BREAKPOINT_AGE', + 'description': { + 'format': 'The logpoint has expired' + } + }, self._update_queue[0]['status']) def testNormalizePath(self): # Removes leading '/' character. @@ -587,8 +592,10 @@ def testNormalizePath(self): self.assertEqual('__init__.py', python_breakpoint._NormalizePath(path)) # Normalizes the relative path. - for path in [' ./__init__.py', '././__init__.py', ' .//abc/../__init__.py', - ' ///abc///..///def/..////__init__.py']: + for path in [ + ' ./__init__.py', '././__init__.py', ' .//abc/../__init__.py', + ' ///abc///..///def/..////__init__.py' + ]: self.assertEqual('__init__.py', python_breakpoint._NormalizePath(path)) # Does not remove non-leading, non-trailing space, or non-leading '/' @@ -603,5 +610,6 @@ def testNormalizePath(self): 'foo/bar/baz/__in it__.py', python_breakpoint._NormalizePath('/foo/bar/baz/__in it__.py')) + if __name__ == '__main__': absltest.main() diff --git a/tests/python_test_util.py b/tests/python_test_util.py index 44e231c..26d1aef 100644 --- a/tests/python_test_util.py +++ b/tests/python_test_util.py @@ -129,8 +129,8 @@ def PackFrameVariable(breakpoint, name, frame=0, collection='locals'): if variable['name'] == name: return _Pack(variable, breakpoint) - raise AssertionError('Variable %s not found in frame %d collection %s' % ( - name, frame, collection)) + raise AssertionError('Variable %s not found in frame %d collection %s' % + (name, frame, collection)) def PackWatchedExpression(breakpoint, expression): @@ -183,4 +183,3 @@ def _Pack(variable, breakpoint): key=lambda m: m.get('name', '')) return packed - diff --git a/tests/uniquifier_computer_test.py b/tests/uniquifier_computer_test.py index 5157772..3d382b7 100644 --- a/tests/uniquifier_computer_test.py +++ b/tests/uniquifier_computer_test.py @@ -51,75 +51,71 @@ def update(self, s): del sys.path[0] def testEmpty(self): - self.assertListEqual( - [], - self._Compute({})) + self.assertListEqual([], self._Compute({})) def testBundle(self): - self.assertListEqual( - ['first.py:1', - 'in1/__init__.py:6', - 'in1/a.py:3', - 'in1/b.py:4', - 'in1/in2/__init__.py:7', - 'in1/in2/c.py:5', - 'second.py:2'], - self._Compute({ - 'db.app': 'abc', - 'first.py': 'a', - 'second.py': 'bb', - 'in1/a.py': 'ccc', - 'in1/b.py': 'dddd', - 'in1/in2/c.py': 'eeeee', - 'in1/__init__.py': 'ffffff', - 'in1/in2/__init__.py': 'ggggggg'})) + self.assertListEqual([ + 'first.py:1', 'in1/__init__.py:6', 'in1/a.py:3', 'in1/b.py:4', + 'in1/in2/__init__.py:7', 'in1/in2/c.py:5', 'second.py:2' + ], + self._Compute({ + 'db.app': 'abc', + 'first.py': 'a', + 'second.py': 'bb', + 'in1/a.py': 'ccc', + 'in1/b.py': 'dddd', + 'in1/in2/c.py': 'eeeee', + 'in1/__init__.py': 'ffffff', + 'in1/in2/__init__.py': 'ggggggg' + })) def testEmptyFile(self): - self.assertListEqual( - ['empty.py:0'], - self._Compute({ - 'empty.py': ''})) + self.assertListEqual(['empty.py:0'], self._Compute({'empty.py': ''})) def testNonPythonFilesIgnored(self): - self.assertListEqual( - ['real.py:1'], - self._Compute({ - 'file.p': '', - 'file.pya': '', - 'real.py': '1'})) + self.assertListEqual(['real.py:1'], + self._Compute({ + 'file.p': '', + 'file.pya': '', + 'real.py': '1' + })) def testNonPackageDirectoriesIgnored(self): - self.assertListEqual( - ['dir2/__init__.py:1'], - self._Compute({ - 'dir1/file.py': '', - 'dir2/__init__.py': 'a', - 'dir2/image.gif': ''})) + self.assertListEqual(['dir2/__init__.py:1'], + self._Compute({ + 'dir1/file.py': '', + 'dir2/__init__.py': 'a', + 'dir2/image.gif': '' + })) def testDepthLimit(self): - self.assertListEqual( - [''.join(str(n) + '/' for n in range(1, m + 1)) + '__init__.py:%d' % m - for m in range(9, 0, -1)], - self._Compute({ - '1/__init__.py': '1', - '1/2/__init__.py': '2' * 2, - '1/2/3/__init__.py': '3' * 3, - '1/2/3/4/__init__.py': '4' * 4, - '1/2/3/4/5/__init__.py': '5' * 5, - '1/2/3/4/5/6/__init__.py': '6' * 6, - '1/2/3/4/5/6/7/__init__.py': '7' * 7, - '1/2/3/4/5/6/7/8/__init__.py': '8' * 8, - '1/2/3/4/5/6/7/8/9/__init__.py': '9' * 9, - '1/2/3/4/5/6/7/8/9/10/__init__.py': 'a' * 10, - '1/2/3/4/5/6/7/8/9/10/11/__init__.py': 'b' * 11})) + self.assertListEqual([ + ''.join(str(n) + '/' + for n in range(1, m + 1)) + '__init__.py:%d' % m + for m in range(9, 0, -1) + ], + self._Compute({ + '1/__init__.py': '1', + '1/2/__init__.py': '2' * 2, + '1/2/3/__init__.py': '3' * 3, + '1/2/3/4/__init__.py': '4' * 4, + '1/2/3/4/5/__init__.py': '5' * 5, + '1/2/3/4/5/6/__init__.py': '6' * 6, + '1/2/3/4/5/6/7/__init__.py': '7' * 7, + '1/2/3/4/5/6/7/8/__init__.py': '8' * 8, + '1/2/3/4/5/6/7/8/9/__init__.py': '9' * 9, + '1/2/3/4/5/6/7/8/9/10/__init__.py': 'a' * 10, + '1/2/3/4/5/6/7/8/9/10/11/__init__.py': 'b' * 11 + })) def testPrecedence(self): - self.assertListEqual( - ['my.py:3'], - self._Compute({ - 'my.pyo': 'a', - 'my.pyc': 'aa', - 'my.py': 'aaa'})) + self.assertListEqual(['my.py:3'], + self._Compute({ + 'my.pyo': 'a', + 'my.pyc': 'aa', + 'my.py': 'aaa' + })) + if __name__ == '__main__': absltest.main() diff --git a/tests/yaml_data_visibility_config_reader_test.py b/tests/yaml_data_visibility_config_reader_test.py index a197f5c..a74f3e5 100644 --- a/tests/yaml_data_visibility_config_reader_test.py +++ b/tests/yaml_data_visibility_config_reader_test.py @@ -35,18 +35,21 @@ def testOpenAndReadSuccess(self): - bl1 """ path_prefix = 'googleclouddebugger.' - with mock.patch(path_prefix + 'yaml_data_visibility_config_reader.open', - create=True) as m: + with mock.patch( + path_prefix + 'yaml_data_visibility_config_reader.open', + create=True) as m: m.return_value = StringIOOpen(data) config = yaml_data_visibility_config_reader.OpenAndRead() - m.assert_called_with(os.path.join(sys.path[0], 'debugger-blacklist.yaml'), - 'r') + m.assert_called_with( + os.path.join(sys.path[0], 'debugger-blacklist.yaml'), 'r') self.assertEqual(config.blacklist_patterns, ['bl1']) def testOpenAndReadFileNotFound(self): path_prefix = 'googleclouddebugger.' - with mock.patch(path_prefix + 'yaml_data_visibility_config_reader.open', - create=True, side_effect=IOError('IO Error')): + with mock.patch( + path_prefix + 'yaml_data_visibility_config_reader.open', + create=True, + side_effect=IOError('IO Error')): f = yaml_data_visibility_config_reader.OpenAndRead() self.assertIsNone(f) @@ -65,6 +68,7 @@ def testReadDataSuccess(self): self.assertItemsEqual(config.whitelist_patterns, ('wl1', 'wl2.*')) def testYAMLLoadError(self): + class ErrorIO(object): def read(self, size): From f8ce0ef9c8ba8825d7a47728a228d3286b6dfb5b Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 20 Jun 2022 10:09:37 -0400 Subject: [PATCH 202/241] chore: remove six and code that only exists in Python 2.7 (#43) * Update build.sh to use pip to build. * Remove six from source files * Remove Python2 support from native codebase --- README.md | 26 +-- requirements.txt | 1 - src/build.sh | 2 +- .../appengine_pretty_printers.py | 4 +- .../breakpoints_manager.py | 8 +- .../bytecode_manipulator.cc | 209 +----------------- src/googleclouddebugger/capture_collector.py | 24 +- src/googleclouddebugger/gcp_hub_client.py | 5 +- .../immutability_tracer.cc | 44 ---- src/googleclouddebugger/imphook2.py | 24 +- src/googleclouddebugger/module_explorer.py | 21 +- .../yaml_data_visibility_config_reader.py | 3 +- tests/capture_collector_test.py | 17 +- tests/imphook2_test.py | 16 +- tests/integration_test_disabled.py | 2 +- tests/module_explorer_test_disabled.py | 52 ++--- tests/native_module_test.py | 8 +- ...yaml_data_visibility_config_reader_test.py | 2 +- 18 files changed, 80 insertions(+), 388 deletions(-) diff --git a/README.md b/README.md index 836a7ee..ed3f487 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Python Cloud Debugger Agent -Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 2.7, +Google [Cloud Debugger](https://cloud.google.com/debugger/) for Python 3.6, Python 3.7, Python 3.8 and Python 3.9. ## Overview @@ -27,7 +27,7 @@ tested on Debian Linux, but it should work on other distributions as well. Cloud Debugger consists of 3 primary components: -1. The Python debugger agent (this repo implements one for CPython 2.7, 3.6, +1. The Python debugger agent (this repo implements one for CPython 3.6, 3.7, 3.8 and 3.9). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the APIs using @@ -54,21 +54,13 @@ The easiest way to install the Python Cloud Debugger is with PyPI: pip install google-python-cloud-debugger ``` -Alternatively, download the *egg* package from -[Releases](https://github.com/GoogleCloudPlatform/cloud-debug-python/releases) -and install the debugger agent with: - -```shell -easy_install google_python_cloud_debugger-py2.7-linux-x86_64.egg -``` - You can also build the agent from source code: ```shell git clone https://github.com/GoogleCloudPlatform/cloud-debug-python.git cd cloud-debug-python/src/ ./build.sh -easy_install dist/google_python_cloud_debugger-*.egg +pip install dist/google_python_cloud_debugger-*.whl ``` Note that the build script assumes some dependencies. To install these @@ -77,17 +69,11 @@ dependencies on Debian, run this command: ```shell sudo apt-get -y -q --no-install-recommends install \ curl ca-certificates gcc build-essential cmake \ - python python-dev libpython2.7 python-setuptools + python3 python3-dev python3-pip ``` -### Python 3 - -There is support for Python 3.6, Python 3.7, Python 3.8 and Python 3.9. Python -3.0 to 3.5 are not supported, and newer versions have not been tested. - -To build for Python 3.x (x in [6-8]), the `python3.x` and `python3.x-dev` -packages are additionally needed. If Python 3.x is not the default version of -the 'python' command on your system, run the build script as `PYTHON=python3.x +If the desired target version of Python is not the default version of +the 'python3' command on your system, run the build script as `PYTHON=python3.x ./build.sh`. ### Alpine Linux diff --git a/requirements.txt b/requirements.txt index 5bb7131..48ab4e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -six google-auth-httplib2 google-api-python-client google-api-core diff --git a/src/build.sh b/src/build.sh index c8a1b8e..ba9a944 100755 --- a/src/build.sh +++ b/src/build.sh @@ -88,6 +88,6 @@ library_dirs=${ROOT}/build/third_party/lib:${ROOT}/build/third_party/lib64" > ${ # Build the Python Cloud Debugger agent. pushd ${ROOT} # Use custom python command if variable is set -"${PYTHON:-python3}" setup.py bdist_wheel +"${PYTHON:-python3}" -m pip wheel . --no-deps -w dist popd diff --git a/src/googleclouddebugger/appengine_pretty_printers.py b/src/googleclouddebugger/appengine_pretty_printers.py index 9136d04..3908990 100644 --- a/src/googleclouddebugger/appengine_pretty_printers.py +++ b/src/googleclouddebugger/appengine_pretty_printers.py @@ -13,8 +13,6 @@ # limitations under the License. """Formatters for well known objects that don't show up nicely by default.""" -import six - try: from protorpc import messages # pylint: disable=g-import-not-at-top except ImportError: @@ -30,7 +28,7 @@ def PrettyPrinter(obj): """Pretty printers for AppEngine objects.""" if ndb and isinstance(obj, ndb.Model): - return six.iteritems(obj.to_dict()), 'ndb.Model(%s)' % type(obj).__name__ + return obj.to_dict().items(), 'ndb.Model(%s)' % type(obj).__name__ if messages and isinstance(obj, messages.Enum): return [('name', obj.name), ('number', obj.number)], type(obj).__name__ diff --git a/src/googleclouddebugger/breakpoints_manager.py b/src/googleclouddebugger/breakpoints_manager.py index b9843d0..e3f0421 100644 --- a/src/googleclouddebugger/breakpoints_manager.py +++ b/src/googleclouddebugger/breakpoints_manager.py @@ -16,8 +16,6 @@ from datetime import datetime from threading import RLock -import six - from . import python_breakpoint @@ -69,7 +67,7 @@ def SetActiveBreakpoints(self, breakpoints_data): ids = set([x['id'] for x in breakpoints_data]) # Clear breakpoints that no longer show up in active breakpoints list. - for breakpoint_id in six.viewkeys(self._active) - ids: + for breakpoint_id in self._active.keys() - ids: self._active.pop(breakpoint_id).Clear() # Create new breakpoints. @@ -78,7 +76,7 @@ def SetActiveBreakpoints(self, breakpoints_data): python_breakpoint.PythonBreakpoint(x, self._hub_client, self, self.data_visibility_policy)) for x in breakpoints_data - if x['id'] in ids - six.viewkeys(self._active) - self._completed + if x['id'] in ids - self._active.keys() - self._completed ]) # Remove entries from completed_breakpoints_ that weren't listed in @@ -114,7 +112,7 @@ def CheckBreakpointsExpiration(self): expired_breakpoints = [] self._next_expiration = datetime.max - for breakpoint in six.itervalues(self._active): + for breakpoint in self._active.values(): expiration_time = breakpoint.GetExpirationTime() if expiration_time <= current_time: expired_breakpoints.append(breakpoint) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 9c646e3..9ee7e27 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -36,18 +36,13 @@ enum PythonOpcodeType { // Single Python instruction. // -// In Python 2.7, there are 3 types of instructions: -// 1. Instruction without arguments (takes 1 byte). -// 2. Instruction with a single 16 bit argument (takes 3 bytes). -// 3. Instruction with a 32 bit argument (very uncommon; takes 6 bytes). -// // In Python 3.6, there are 4 types of instructions: // 1. Instructions without arguments, or a 8 bit argument (takes 2 bytes). // 2. Instructions with a 16 bit argument (takes 4 bytes). // 3. Instructions with a 24 bit argument (takes 6 bytes). // 4. Instructions with a 32 bit argument (takes 8 bytes). // -// To handle 32 bit arguments in Python 2, or 16-32 bit arguments in Python 3, +// To handle 16-32 bit arguments in Python 3, // a special instruction with an opcode of EXTENDED_ARG is prepended to the // actual instruction. The argument of the EXTENDED_ARG instruction is combined // with the argument of the next instruction to form the full argument. @@ -68,11 +63,7 @@ static PythonInstruction PythonInstructionNoArg(uint8_t opcode) { instruction.opcode = opcode; instruction.argument = 0; -#if PY_MAJOR_VERSION >= 3 instruction.size = 2; -#else - instruction.size = 1; -#endif return instruction; } @@ -86,7 +77,6 @@ static PythonInstruction PythonInstructionArg(uint8_t opcode, instruction.opcode = opcode; instruction.argument = argument; -#if PY_MAJOR_VERSION >= 3 if (argument <= 0xFF) { instruction.size = 2; } else if (argument <= 0xFFFF) { @@ -96,9 +86,6 @@ static PythonInstruction PythonInstructionArg(uint8_t opcode, } else { instruction.size = 8; } -#else - instruction.size = instruction.argument > 0xFFFF ? 6 : 3; -#endif return instruction; } @@ -119,9 +106,7 @@ static int GetInstructionsSize( static PythonOpcodeType GetOpcodeType(uint8_t opcode) { switch (opcode) { case YIELD_VALUE: -#if PY_MAJOR_VERSION >= 3 case YIELD_FROM: -#endif return YIELD_OPCODE; case FOR_ITER: @@ -171,23 +156,6 @@ static int GetBranchTarget(int offset, PythonInstruction instruction) { } -#if PY_MAJOR_VERSION < 3 -// Reads 16 bit value according to Python bytecode encoding. -static uint16 ReadPythonBytecodeUInt16(std::vector::const_iterator it) { - return it[0] | (static_cast(it[1]) << 8); -} - - -// Writes 16 bit value according to Python bytecode encoding. -static void WritePythonBytecodeUInt16( - std::vector::iterator it, - uint16 data) { - it[0] = static_cast(data); - it[1] = data >> 8; -} -#endif - - // Read instruction at the specified offset. Returns kInvalidInstruction // buffer underflow. static PythonInstruction ReadInstruction( @@ -195,7 +163,6 @@ static PythonInstruction ReadInstruction( std::vector::const_iterator it) { PythonInstruction instruction { 0, 0, 0 }; -#if PY_MAJOR_VERSION >= 3 if (bytecode.end() - it < 2) { LOG(ERROR) << "Buffer underflow"; return kInvalidInstruction; @@ -214,39 +181,6 @@ static PythonInstruction ReadInstruction( instruction.opcode = it[0]; instruction.argument = instruction.argument << 8 | it[1]; instruction.size += 2; -#else - if (it == bytecode.end()) { - LOG(ERROR) << "Buffer underflow"; - return kInvalidInstruction; - } - - instruction.opcode = it[0]; - instruction.size = 1; - - auto it_arg = it + 1; - if (instruction.opcode == EXTENDED_ARG) { - if (bytecode.end() - it < 6) { - LOG(ERROR) << "Buffer underflow"; - return kInvalidInstruction; - } - - instruction.opcode = it[3]; - - auto it_ext = it + 4; - instruction.argument = - (static_cast(ReadPythonBytecodeUInt16(it_arg)) << 16) | - ReadPythonBytecodeUInt16(it_ext); - instruction.size = 6; - } else if (HAS_ARG(instruction.opcode)) { - if (bytecode.end() - it < 3) { - LOG(ERROR) << "Buffer underflow"; - return kInvalidInstruction; - } - - instruction.argument = ReadPythonBytecodeUInt16(it_arg); - instruction.size = 3; - } -#endif return instruction; } @@ -256,7 +190,6 @@ static PythonInstruction ReadInstruction( // instruction. static int WriteInstruction(std::vector::iterator it, const PythonInstruction& instruction) { -#if PY_MAJOR_VERSION >= 3 uint32_t arg = instruction.argument; int size_written = 0; // Start writing backwards from the real instruction, followed by any @@ -268,29 +201,6 @@ static int WriteInstruction(std::vector::iterator it, size_written += 2; } return size_written; -#else - if (instruction.size == 6) { - it[0] = EXTENDED_ARG; - WritePythonBytecodeUInt16(it + 1, instruction.argument >> 16); - it[3] = instruction.opcode; - WritePythonBytecodeUInt16( - it + 4, - static_cast(instruction.argument)); - return 6; - } else { - it[0] = instruction.opcode; - - if (HAS_ARG(instruction.opcode)) { - DCHECK_LE(instruction.argument, 0xFFFFU); - WritePythonBytecodeUInt16( - it + 1, - static_cast(instruction.argument)); - return 3; - } - - return 1; - } -#endif } // Write set of instructions to the specified destination. @@ -367,16 +277,6 @@ bool BytecodeManipulator::InjectMethodCall( } -// Use different algorithms to insert method calls for Python 2 and 3. -// Technically the algorithm for Python 3 will work with Python 2, but because -// it is more complicated and the issue of needing to upgrade branch -// instructions to use EXTENDED_ARG is less common, we stick with the existing -// algorithm for better safety. - - -#if PY_MAJOR_VERSION >= 3 - - // Represents a branch instruction in the original bytecode that may need to // have its offsets fixed and/or upgraded to use EXTENDED_ARG. struct UpdatedInstruction { @@ -619,113 +519,6 @@ bool BytecodeManipulator::InsertMethodCall( } -#else - - -bool BytecodeManipulator::InsertMethodCall( - BytecodeManipulator::Data* data, - int offset, - int const_index) const { - const std::vector method_call_instructions = - BuildMethodCall(const_index); - int size = GetInstructionsSize(method_call_instructions); - - bool offset_valid = false; - for (auto it = data->bytecode.begin(); it < data->bytecode.end(); ) { - const int current_offset = it - data->bytecode.begin(); - if (current_offset == offset) { - DCHECK(!offset_valid) << "Each offset should be visited only once"; - offset_valid = true; - } - - int current_fixed_offset = current_offset; - if (current_fixed_offset >= offset) { - current_fixed_offset += size; - } - - PythonInstruction instruction = ReadInstruction(data->bytecode, it); - if (instruction.opcode == kInvalidInstruction.opcode) { - return false; - } - - // Fix targets in branch instructions. - switch (GetOpcodeType(instruction.opcode)) { - case BRANCH_DELTA_OPCODE: { - int32 delta = static_cast(instruction.argument); - int32 target = current_offset + instruction.size + delta; - - if (target > offset) { - target += size; - } - - int32 fixed_delta = target - current_fixed_offset - instruction.size; - if (delta != fixed_delta) { - PythonInstruction new_instruction = - PythonInstructionArg(instruction.opcode, fixed_delta); - if (new_instruction.size != instruction.size) { - LOG(ERROR) << "Upgrading instruction to extended not supported"; - return false; - } - - WriteInstruction(it, new_instruction); - } - break; - } - - case BRANCH_ABSOLUTE_OPCODE: - if (static_cast(instruction.argument) > offset) { - PythonInstruction new_instruction = PythonInstructionArg( - instruction.opcode, instruction.argument + size); - if (new_instruction.size != instruction.size) { - LOG(ERROR) << "Upgrading instruction to extended not supported"; - return false; - } - - WriteInstruction(it, new_instruction); - } - break; - - default: - break; - } - - it += instruction.size; - } - - if (!offset_valid) { - LOG(ERROR) << "Offset " << offset << " is mid instruction or out of range"; - return false; - } - - // Insert the bytecode to invoke the callable. - data->bytecode.insert(data->bytecode.begin() + offset, size, NOP); - WriteInstructions(data->bytecode.begin() + offset, method_call_instructions); - - // Insert a new entry into line table to account for the new bytecode. - if (has_lnotab_) { - int current_offset = 0; - for (auto it = data->lnotab.begin(); it != data->lnotab.end(); it += 2) { - current_offset += it[0]; - - if (current_offset >= offset) { - int remaining_size = size; - while (remaining_size > 0) { - const int current_size = std::min(remaining_size, 0xFF); - it = data->lnotab.insert(it, static_cast(current_size)) + 1; - it = data->lnotab.insert(it, 0) + 1; - remaining_size -= current_size; - } - - break; - } - } - } - - return true; -} -#endif - - // This method does not change line numbers table. The line numbers table // is monotonically growing, which is not going to work for our case. Besides // the trampoline will virtually always fit a single instruction, so we don't diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index d57c7b4..dd3c93a 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -26,8 +26,6 @@ import time import types -import six - from . import cdbg_native as native from . import labels @@ -46,9 +44,8 @@ # Externally defined function to collect the end user id. breakpoint_labels_collector = lambda: {} -_PRIMITIVE_TYPES = (type(None), float, complex, bool, slice, bytearray, - six.text_type, - six.binary_type) + six.integer_types + six.string_types +_PRIMITIVE_TYPES = (type(None), float, complex, bool, slice, bytearray, str, + bytes, int) _DATE_TYPES = (datetime.date, datetime.time, datetime.timedelta) _VECTOR_TYPES = (tuple, list, set) @@ -389,7 +386,7 @@ def CaptureFrameLocals(self, frame): if argname in variables: frame_arguments.append(variables.pop(argname)) - return (frame_arguments, list(six.viewvalues(variables))) + return (frame_arguments, list(variables.values())) def CaptureNamedVariable(self, name, value, depth, limits): """Appends name to the product of CaptureVariable. @@ -597,12 +594,11 @@ def CaptureVariableInternal(self, value, depth, limits, can_enqueue=True): # Add an additional depth for the object itself items = value.__dict__.items() - if six.PY3: - # Make a list of the iterator in Python 3, to avoid 'dict changed size - # during iteration' errors from GC happening in the middle. - # Only limits.max_list_items + 1 items are copied, anything past that will - # get ignored by CaptureVariablesList(). - items = list(itertools.islice(items, limits.max_list_items + 1)) + # Make a list of the iterator in Python 3, to avoid 'dict changed size + # during iteration' errors from GC happening in the middle. + # Only limits.max_list_items + 1 items are copied, anything past that will + # get ignored by CaptureVariablesList(). + items = list(itertools.islice(items, limits.max_list_items + 1)) members = self.CaptureVariablesList(items, depth + 2, OBJECT_HAS_NO_FIELDS, limits) v = {'members': members} @@ -664,7 +660,7 @@ def _CaptureEnvironmentLabels(self): self.breakpoint['labels'] = {} if callable(breakpoint_labels_collector): - for (key, value) in six.iteritems(breakpoint_labels_collector()): + for (key, value) in breakpoint_labels_collector().items(): self._StoreLabel(key, value) def _CaptureRequestLogId(self): @@ -879,7 +875,7 @@ def FormatList(items, formatter, level=0): return str(type(value)) if isinstance(value, dict): - return '{' + FormatList(six.iteritems(value), FormatDictItem) + '}' + return '{' + FormatList(value.items(), FormatDictItem) + '}' if isinstance(value, _VECTOR_TYPES): return _ListTypeFormatString(value).format( diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py index 750c8ed..3b03708 100644 --- a/src/googleclouddebugger/gcp_hub_client.py +++ b/src/googleclouddebugger/gcp_hub_client.py @@ -31,7 +31,6 @@ import googleapiclient import googleapiclient.discovery import httplib2 -import six import google.auth from google.oauth2 import service_account @@ -171,7 +170,7 @@ def InitializeDebuggeeLabels(self, flags): """ self._debuggee_labels = {} - for (label, var_names) in six.iteritems(_DEBUGGEE_LABELS): + for (label, var_names) in _DEBUGGEE_LABELS.items(): # var_names is a list of possible environment variables that may contain # the label value. Find the first one that is set. for name in var_names: @@ -196,7 +195,7 @@ def InitializeDebuggeeLabels(self, flags): if flags: self._debuggee_labels.update({ name: value - for (name, value) in six.iteritems(flags) + for (name, value) in flags.items() if name in _DEBUGGEE_LABELS }) diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index 6cfa66c..d5f102a 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -365,7 +365,6 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { case CONTINUE_LOOP: case SETUP_LOOP: #endif -#if PY_MAJOR_VERSION >= 3 case DUP_TOP_TWO: case BINARY_MATRIX_MULTIPLY: case INPLACE_MATRIX_MULTIPLY: @@ -401,25 +400,6 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { #if PY_VERSION_HEX >= 0x03080000 // Added back in Python 3.8 (was in 2.7 as well) case ROT_FOUR: -#endif -#else - case ROT_FOUR: - case DUP_TOPX: - case UNARY_NOT: - case UNARY_CONVERT: - case BINARY_DIVIDE: - case BINARY_OR: - case INPLACE_DIVIDE: - case SLICE+0: - case SLICE+1: - case SLICE+2: - case SLICE+3: - case LOAD_LOCALS: - case EXEC_STMT: - case JUMP_ABSOLUTE: - case CALL_FUNCTION_VAR: - case CALL_FUNCTION_VAR_KW: - case MAKE_CLOSURE: #endif return OPCODE_NOT_MUTABLE; @@ -450,7 +430,6 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { // Removed in Python 3.8. case SETUP_EXCEPT: #endif -#if PY_MAJOR_VERSION >= 3 case GET_AITER: case GET_ANEXT: case BEFORE_ASYNC_WITH: @@ -489,23 +468,6 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { case RERAISE: case WITH_EXCEPT_START: case LOAD_ASSERTION_ERROR: -#endif -#else - case STORE_SLICE+0: - case STORE_SLICE+1: - case STORE_SLICE+2: - case STORE_SLICE+3: - case DELETE_SLICE+0: - case DELETE_SLICE+1: - case DELETE_SLICE+2: - case DELETE_SLICE+3: - case STORE_MAP: - case PRINT_ITEM_TO: - case PRINT_ITEM: - case PRINT_NEWLINE_TO: - case PRINT_NEWLINE: - case BUILD_CLASS: - case WITH_CLEANUP: #endif return OPCODE_MUTABLE; @@ -525,16 +487,11 @@ void ImmutabilityTracer::ProcessCodeRange(const uint8_t* code_start, // We don't worry about the sizes of instructions with EXTENDED_ARG. // The argument does not really matter and so EXTENDED_ARGs can be // treated as just another instruction with an opcode. -#if PY_MAJOR_VERSION >= 3 opcodes += 2; -#else - opcodes += HAS_ARG(opcode) ? 3 : 1; -#endif DCHECK_LE(opcodes, end); break; case OPCODE_MAYBE_MUTABLE: -#if PY_MAJOR_VERSION >= 3 if (opcode == JUMP_ABSOLUTE) { // Check for a jump to itself, which happens in "while True: pass". // The tracer won't call our tracing function unless there is a jump @@ -551,7 +508,6 @@ void ImmutabilityTracer::ProcessCodeRange(const uint8_t* code_start, DCHECK_LE(opcodes, end); break; } -#endif LOG(WARNING) << "Unknown opcode " << static_cast(opcode); mutable_code_detected_ = true; return; diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook2.py index 3dafd13..844beed 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook2.py @@ -35,8 +35,7 @@ import sys # Must be imported, otherwise import hooks don't work. import threading -import six -from six.moves import builtins # pylint: disable=redefined-builtin +import builtins from . import module_utils2 @@ -111,14 +110,12 @@ def _InstallImportHookBySuffix(): assert _real_import builtins.__import__ = _ImportHookBySuffix - if six.PY3: - # In Python 2, importlib.import_module calls __import__ internally so - # overriding __import__ is enough. In Python 3, they are separate so it also - # needs to be overwritten. - global _real_import_module - _real_import_module = importlib.import_module - assert _real_import_module - importlib.import_module = _ImportModuleHookBySuffix + # importlib.import_module and __import__ are separate in Python 3 so both + # need to be overwritten. + global _real_import_module + _real_import_module = importlib.import_module + assert _real_import_module + importlib.import_module = _ImportModuleHookBySuffix def _IncrementNestLevel(): @@ -181,12 +178,9 @@ def _ImportHookBySuffix(name, if level is None: # A level of 0 means absolute import, positive values means relative - # imports, and -1 means to try both an absolute and relative import. - # Since imports were disambiguated in Python 3, -1 is not a valid value. - # The default values are 0 and -1 for Python 3 and 3 respectively. - # https://docs.python.org/2/library/functions.html#__import__ + # imports. # https://docs.python.org/3/library/functions.html#__import__ - level = 0 if six.PY3 else -1 + level = 0 try: # Really import modules. diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index 2887655..acecea9 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -18,8 +18,6 @@ import sys import types -import six - # Maximum traversal depth when looking for all the code objects referenced by # a module or another code object. _MAX_REFERENTS_BFS_DEPTH = 15 @@ -34,9 +32,8 @@ _MAX_OBJECT_REFERENTS = 1000 # Object types to ignore when looking for the code objects. -_BFS_IGNORE_TYPES = (types.ModuleType, type(None), bool, float, six.binary_type, - six.text_type, types.BuiltinFunctionType, - types.BuiltinMethodType, list) + six.integer_types +_BFS_IGNORE_TYPES = (types.ModuleType, type(None), bool, float, bytes, str, int, + types.BuiltinFunctionType, types.BuiltinMethodType, list) def GetCodeObjectAtLine(module, line): @@ -55,7 +52,7 @@ def GetCodeObjectAtLine(module, line): return (False, (None, None)) prev_line = 0 - next_line = six.MAXSIZE + next_line = sys.maxsize for code_object in _GetModuleCodeObjects(module): for co_line_number in _GetLineNumbers(code_object): @@ -68,7 +65,7 @@ def GetCodeObjectAtLine(module, line): break prev_line = None if prev_line == 0 else prev_line - next_line = None if next_line == six.MAXSIZE else next_line + next_line = None if next_line == sys.maxsize else next_line return (False, (prev_line, next_line)) @@ -84,12 +81,8 @@ def _GetLineNumbers(code_object): # Get the line number deltas, which are the odd number entries, from the # lnotab. See # https://svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt - # In Python 3, this is just a byte array. In Python 2 it is a string so the - # numerical values have to be extracted from the individual characters. - if six.PY3: - line_incrs = code_object.co_lnotab[1::2] - else: - line_incrs = (ord(c) for c in code_object.co_lnotab[1::2]) + # In Python 3, this is just a byte array. + line_incrs = code_object.co_lnotab[1::2] current_line = code_object.co_firstlineno for line_incr in line_incrs: current_line += line_incr @@ -212,7 +205,7 @@ def CheckIgnoreClass(cls): if isinstance(obj, types.CodeType) and CheckIgnoreCodeObject(obj): continue - if isinstance(obj, six.class_types) and CheckIgnoreClass(obj): + if isinstance(obj, type) and CheckIgnoreClass(obj): continue if isinstance(obj, types.CodeType): diff --git a/src/googleclouddebugger/yaml_data_visibility_config_reader.py b/src/googleclouddebugger/yaml_data_visibility_config_reader.py index 1ebc406..dc75673 100644 --- a/src/googleclouddebugger/yaml_data_visibility_config_reader.py +++ b/src/googleclouddebugger/yaml_data_visibility_config_reader.py @@ -26,7 +26,6 @@ import os import sys -import six import yaml @@ -126,7 +125,7 @@ def _CheckData(yaml_data): raise UnknownConfigKeyError('Unknown keys in configuration: %s' % unknown_keys) - for key, data in six.iteritems(yaml_data): + for key, data in yaml_data.items(): _AssertDataIsList(key, data) diff --git a/tests/capture_collector_test.py b/tests/capture_collector_test.py index 8733497..5d58832 100644 --- a/tests/capture_collector_test.py +++ b/tests/capture_collector_test.py @@ -8,8 +8,6 @@ import time from unittest import mock -import six - from absl.testing import absltest from googleclouddebugger import capture_collector @@ -703,7 +701,7 @@ def testCaptureDictionary(self): self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) - frozenset_name = 'frozenset({5, 6})' if six.PY3 else 'frozenset([5, 6])' + frozenset_name = 'frozenset({5, 6})' self.assertCountEqual([{ 'name': "'first'", 'value': '1', @@ -770,9 +768,9 @@ def testEscapeDictionaryKey(self): self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) - unicode_type = 'str' if six.PY3 else 'unicode' - unicode_name = "'\xe0'" if six.PY3 else "u'\\xe0'" - unicode_value = "'\xe0'" if six.PY3 else "u'\\xe0'" + unicode_type = 'str' + unicode_name = "'\xe0'" + unicode_value = "'\xe0'" self.assertCountEqual([{ 'type': 'str', @@ -1010,8 +1008,7 @@ def testExpressionException(self): }) self._collector.Collect(inspect.currentframe()) - zero_division_msg = ('division by zero' - if six.PY3 else 'integer division or modulo by zero') + zero_division_msg = 'division by zero' self.assertListEqual([{ 'name': 'unused_dummy_a/unused_dummy_b', @@ -1742,7 +1739,7 @@ def testNestedRecursionLimit(self): def testNestedRecursionItemLimits(self): unused_list = [1, [1, [1, [2], 3, 4], 3, 4], 3, 4] - list_type = "" if six.PY3 else "" + list_type = "" collector = LogCollectorWithDefaultLocation({ 'logLevel': 'INFO', 'logMessageFormat': '$0', @@ -1756,7 +1753,7 @@ def testNestedRecursionItemLimits(self): 'LOGPOINT: [1, [1, [1, %s, 3, ...], 3, ...], 3, ...]' % list_type)) def testDetermineType(self): - builtin_prefix = 'builtins.' if six.PY3 else '__builtin__.' + builtin_prefix = 'builtins.' path_prefix = 'googleclouddebugger.capture_collector.' test_data = ( (builtin_prefix + 'int', 5), diff --git a/tests/imphook2_test.py b/tests/imphook2_test.py index 8045e56..3a7159d 100644 --- a/tests/imphook2_test.py +++ b/tests/imphook2_test.py @@ -5,7 +5,6 @@ import sys import tempfile -import six from absl.testing import absltest from googleclouddebugger import imphook2 @@ -188,13 +187,10 @@ def testImportLib(self): self._import_callbacks_log = [] # Relative module import from package context with '..'. - if six.PY3: - # In Python 3, the parent module has to be loaded before a relative import - importlib.import_module('testpkg15a.testpkg15c') - self._import_callbacks_log = [] - importlib.import_module('..first', 'testpkg15a.testpkg15c') - else: - importlib.import_module('..first', 'testpkg15a.testpkg15b') + # In Python 3, the parent module has to be loaded before a relative import + importlib.import_module('testpkg15a.testpkg15c') + self._import_callbacks_log = [] + importlib.import_module('..first', 'testpkg15a.testpkg15c') self.assertEqual( [ 'testpkg15a/__init__.py', @@ -439,7 +435,7 @@ def testCleanup(self): cleanup5() self.assertLen(imphook2._import_callbacks, 0) - def _CreateFile(self, path, content='', rewrite_imports_if_py3=True): + def _CreateFile(self, path, content='', rewrite_imports=True): full_path = os.path.join(self._test_package_dir, path) directory, unused_name = os.path.split(full_path) @@ -469,7 +465,7 @@ def RewriteImport(line): return indent + line with open(full_path, 'w') as writer: - if six.PY3 and rewrite_imports_if_py3: + if rewrite_imports: content = '\n'.join(RewriteImport(l) for l in content.split('\n')) writer.write(content) diff --git a/tests/integration_test_disabled.py b/tests/integration_test_disabled.py index 2fcfa00..12321c4 100644 --- a/tests/integration_test_disabled.py +++ b/tests/integration_test_disabled.py @@ -15,7 +15,7 @@ from googleapiclient import discovery import googleclouddebugger as cdbg -from six.moves import queue +import queue import google.auth from absl.testing import absltest diff --git a/tests/module_explorer_test_disabled.py b/tests/module_explorer_test_disabled.py index b05ec53..b7542e1 100644 --- a/tests/module_explorer_test_disabled.py +++ b/tests/module_explorer_test_disabled.py @@ -10,7 +10,6 @@ import sys import tempfile -import six from absl.testing import absltest from googleclouddebugger import module_explorer @@ -29,7 +28,7 @@ def setUp(self): def testGlobalMethod(self): """Verify that global method is found.""" - self.assertIn(six.get_function_code(_GlobalMethod), self._code_objects) + self.assertIn(_GlobalMethod.__code__, self._code_objects) def testInnerMethodOfGlobalMethod(self): """Verify that inner method defined in a global method is found.""" @@ -37,8 +36,7 @@ def testInnerMethodOfGlobalMethod(self): def testInstanceClassMethod(self): """Verify that instance class method is found.""" - self.assertIn( - six.get_function_code(self.testInstanceClassMethod), self._code_objects) + self.assertIn(self.testInstanceClassMethod.__code__, self._code_objects) def testInnerMethodOfInstanceClassMethod(self): """Verify that inner method defined in a class instance method is found.""" @@ -46,13 +44,11 @@ def testInnerMethodOfInstanceClassMethod(self): def InnerMethod(): pass - self.assertIn(six.get_function_code(InnerMethod), self._code_objects) + self.assertIn(InnerMethod.__code__, self._code_objects) def testStaticMethod(self): """Verify that static class method is found.""" - self.assertIn( - six.get_function_code(ModuleExplorerTest._StaticMethod), - self._code_objects) + self.assertIn(ModuleExplorerTest._StaticMethod.__code__, self._code_objects) def testInnerMethodOfStaticMethod(self): """Verify that static class method is found.""" @@ -60,7 +56,7 @@ def testInnerMethodOfStaticMethod(self): def testNonModuleClassMethod(self): """Verify that instance method defined in a base class is not added.""" - self.assertNotIn(six.get_function_code(self.assertTrue), self._code_objects) + self.assertNotIn(self.assertTrue.__code__, self._code_objects) def testDeepInnerMethod(self): """Verify that inner of inner of inner, etc. method is found.""" @@ -76,7 +72,7 @@ def Inner4(): def Inner5(): pass - return six.get_function_code(Inner5) + return Inner5.__code__ return Inner4() @@ -104,9 +100,7 @@ class InnerClass(object): def InnerClassMethod(self): pass - self.assertIn( - six.get_function_code(InnerClass().InnerClassMethod), - self._code_objects) + self.assertIn(InnerClass().InnerClassMethod.__code__, self._code_objects) def testMethodOfInnerOldStyleClass(self): """Verify that method of inner old style class is found.""" @@ -116,9 +110,7 @@ class InnerClass(): def InnerClassMethod(self): pass - self.assertIn( - six.get_function_code(InnerClass().InnerClassMethod), - self._code_objects) + self.assertIn(InnerClass().InnerClassMethod.__code__, self._code_objects) def testGlobalMethodWithClosureDecorator(self): co = self._GetCodeObjectAtLine(self._module, @@ -154,11 +146,11 @@ def testSameFileName(self): def testCodeObjectAtLine(self): """Verify that query of code object at a specified source line.""" - test_cases = [(six.get_function_code(self.testCodeObjectAtLine), - 'TEST_CODE_OBJECT_AT_ASSERT'), - (ModuleExplorerTest._StaticMethod(), - 'INNER_OF_STATIC_METHOD'), - (_GlobalMethod(), 'INNER_OF_GLOBAL_METHOD')] + test_cases = [ + (self.testCodeObjectAtLine.__code__, 'TEST_CODE_OBJECT_AT_ASSERT'), + (ModuleExplorerTest._StaticMethod(), 'INNER_OF_STATIC_METHOD'), + (_GlobalMethod(), 'INNER_OF_GLOBAL_METHOD') + ] for code_object, tag in test_cases: self.assertEqual( # BPTAG: TEST_CODE_OBJECT_AT_ASSERT @@ -184,19 +176,17 @@ def testCodeObjectWithoutModule(self): # with open(module_path, 'w') as f: # f.write('def f():\n pass') # py_compile.compile(module_path) -# if six.PY3: -# module_pyc_path = os.path.join(test_dir, '__pycache__', -# 'module.cpython-37.pyc') -# os.rename(module_pyc_path, module_path + 'c') +# module_pyc_path = os.path.join(test_dir, '__pycache__', +# 'module.cpython-37.pyc') +# os.rename(module_pyc_path, module_path + 'c') # os.remove(module_path) # # import module # pylint: disable=g-import-not-at-top # self.assertEqual('.py', -# os.path.splitext( -# six.get_function_code(module.f).co_filename)[1]) +# os.path.splitext(module.f.__code__.co_filename)[1]) # self.assertEqual('.pyc', os.path.splitext(module.__file__)[1]) # -# func_code = six.get_function_code(module.f) +# func_code = module.f.__code__ # self.assertEqual(func_code, # module_explorer.GetCodeObjectAtLine( # module, @@ -263,7 +253,7 @@ def _StaticMethod(): def InnerMethod(): pass # BPTAG: INNER_OF_STATIC_METHOD - return six.get_function_code(InnerMethod) + return InnerMethod.__code__ def _GetCodeObjectAtLine(self, fn, tag): """Wrapper over GetCodeObjectAtLine for tags in this module.""" @@ -276,7 +266,7 @@ def _GlobalMethod(): def InnerMethod(): pass # BPTAG: INNER_OF_GLOBAL_METHOD - return six.get_function_code(InnerMethod) + return InnerMethod.__code__ def ClosureDecorator(handler): @@ -318,7 +308,7 @@ def FnWithClassDecorator(self): def _MethodWithLambdaExpression(): - return six.get_function_code(lambda x: x**3) + return (lambda x: x**3).__code__ def _MethodWithGeneratorExpression(): diff --git a/tests/native_module_test.py b/tests/native_module_test.py index 2beaada..b3b486b 100644 --- a/tests/native_module_test.py +++ b/tests/native_module_test.py @@ -5,8 +5,6 @@ import threading import time -import six - from absl.testing import absltest from googleclouddebugger import cdbg_native as native @@ -250,9 +248,9 @@ def _SetBreakpoint(self, method, tag, condition=None): if condition is not None: compiled_condition = compile(condition, '', 'eval') - cookie = native.CreateConditionalBreakpoint( - six.get_function_code(method), line, compiled_condition, - self._BreakpointEvent) + cookie = native.CreateConditionalBreakpoint(method.__code__, line, + compiled_condition, + self._BreakpointEvent) self._cookies.append(cookie) native.ActivateConditionalBreakpoint(cookie) diff --git a/tests/yaml_data_visibility_config_reader_test.py b/tests/yaml_data_visibility_config_reader_test.py index a74f3e5..65d3cd0 100644 --- a/tests/yaml_data_visibility_config_reader_test.py +++ b/tests/yaml_data_visibility_config_reader_test.py @@ -4,7 +4,7 @@ import sys from unittest import mock -from six import StringIO +from io import StringIO from absl.testing import absltest from googleclouddebugger import yaml_data_visibility_config_reader From c76f05c8aae299b46011cf997c68f8443244b477 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 20 Jun 2022 17:05:37 -0400 Subject: [PATCH 203/241] fix: add missing six translation to python3 (#44) --- src/googleclouddebugger/capture_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/capture_collector.py index dd3c93a..6420f91 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/capture_collector.py @@ -371,7 +371,7 @@ def CaptureFrameLocals(self, frame): # Capture all local variables (including method arguments). variables = { n: self.CaptureNamedVariable(n, v, 1, self.default_capture_limits) - for n, v in six.viewitems(frame.f_locals) + for n, v in frame.f_locals.items() } # Split between locals and arguments (keeping arguments in the right order). From 7ec598ff622af19027f1490beae6b1ee75377de0 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 21 Jun 2022 09:30:31 -0400 Subject: [PATCH 204/241] fix: handle case where html is returned from metadata server (#45) --- src/googleclouddebugger/application_info.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/googleclouddebugger/application_info.py b/src/googleclouddebugger/application_info.py index 045c06b..c920cce 100644 --- a/src/googleclouddebugger/application_info.py +++ b/src/googleclouddebugger/application_info.py @@ -69,6 +69,11 @@ def GetRegion(): response.raise_for_status() # Example of response text: projects/id/regions/us-central1. So we strip # everything before the last /. - return response.text.split('/')[-1] + region = response.text.split('/')[-1] + if region == 'html>': + # Sometimes we get an html response! + return None + + return region except requests.exceptions.RequestException: return None From 339adabf38f7ecbb5d369ddc443a4fceafab563c Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 21 Jun 2022 16:02:58 -0400 Subject: [PATCH 205/241] chore: rename some files to more appropriate names (#46) * address the TODO to rename capture_collector to just collector * address the TODO to remove the 2 from a few modules --- src/googleclouddebugger/__init__.py | 6 +- .../{capture_collector.py => collector.py} | 2 - .../{imphook2.py => imphook.py} | 7 +- .../{module_search2.py => module_search.py} | 0 .../{module_utils2.py => module_utils.py} | 0 src/googleclouddebugger/python_breakpoint.py | 18 +- ...re_collector_test.py => collector_test.py} | 186 +++++++++--------- tests/{imphook2_test.py => imphook_test.py} | 34 ++-- ..._search2_test.py => module_search_test.py} | 20 +- ...le_utils2_test.py => module_utils_test.py} | 28 +-- 10 files changed, 149 insertions(+), 152 deletions(-) rename src/googleclouddebugger/{capture_collector.py => collector.py} (99%) rename src/googleclouddebugger/{imphook2.py => imphook.py} (98%) rename src/googleclouddebugger/{module_search2.py => module_search.py} (100%) rename src/googleclouddebugger/{module_utils2.py => module_utils.py} (100%) rename tests/{capture_collector_test.py => collector_test.py} (90%) rename tests/{imphook2_test.py => imphook_test.py} (95%) rename tests/{module_search2_test.py => module_search_test.py} (86%) rename tests/{module_utils2_test.py => module_utils_test.py} (84%) diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index c0daec0..00cd217 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -27,7 +27,7 @@ from . import appengine_pretty_printers from . import breakpoints_manager -from . import capture_collector +from . import collector from . import error_data_visibility_policy from . import gcp_hub_client from . import glob_data_visibility_policy @@ -58,9 +58,9 @@ def _StartDebugger(): _hub_client, visibility_policy) # Set up loggers for logpoints. - capture_collector.SetLogger(logging.getLogger()) + collector.SetLogger(logging.getLogger()) - capture_collector.CaptureCollector.pretty_printers.append( + collector.CaptureCollector.pretty_printers.append( appengine_pretty_printers.PrettyPrinter) _hub_client.on_active_breakpoints_changed = ( diff --git a/src/googleclouddebugger/capture_collector.py b/src/googleclouddebugger/collector.py similarity index 99% rename from src/googleclouddebugger/capture_collector.py rename to src/googleclouddebugger/collector.py index 6420f91..82916ab 100644 --- a/src/googleclouddebugger/capture_collector.py +++ b/src/googleclouddebugger/collector.py @@ -13,8 +13,6 @@ # limitations under the License. """Captures application state on a breakpoint hit.""" -# TODO: rename this file to collector.py. - import copy import datetime import inspect diff --git a/src/googleclouddebugger/imphook2.py b/src/googleclouddebugger/imphook.py similarity index 98% rename from src/googleclouddebugger/imphook2.py rename to src/googleclouddebugger/imphook.py index 844beed..2e80648 100644 --- a/src/googleclouddebugger/imphook2.py +++ b/src/googleclouddebugger/imphook.py @@ -13,7 +13,7 @@ # limitations under the License. """Support for breakpoints on modules that haven't been loaded yet. -This is the new module import hook which: +This is the module import hook which: 1. Takes a partial path of the module file excluding the file extension as input (can be as short as 'foo' or longer such as 'sys/path/pkg/foo'). 2. At each (top-level-only) import statement: @@ -26,7 +26,6 @@ b. Checks sys.modules if any of these modules have a file that matches the given path, using suffix match. -For the old module import hook, see imphook.py file. """ import importlib @@ -37,7 +36,7 @@ import builtins -from . import module_utils2 +from . import module_utils # Callbacks to invoke when a module is imported. _import_callbacks = {} @@ -431,7 +430,7 @@ def GetModuleFromName(name, path): if not os.path.isabs(mod_root): mod_root = os.path.join(os.curdir, mod_root) - if module_utils2.IsPathSuffix(mod_root, root): + if module_utils.IsPathSuffix(mod_root, root): for callback in callbacks.copy(): callback(module) break diff --git a/src/googleclouddebugger/module_search2.py b/src/googleclouddebugger/module_search.py similarity index 100% rename from src/googleclouddebugger/module_search2.py rename to src/googleclouddebugger/module_search.py diff --git a/src/googleclouddebugger/module_utils2.py b/src/googleclouddebugger/module_utils.py similarity index 100% rename from src/googleclouddebugger/module_utils2.py rename to src/googleclouddebugger/module_utils.py diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 4d86ce1..7b61fd1 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -18,12 +18,12 @@ import os from threading import Lock -from . import capture_collector +from . import collector from . import cdbg_native as native -from . import imphook2 +from . import imphook from . import module_explorer -from . import module_search2 -from . import module_utils2 +from . import module_search +from . import module_utils # TODO: move to messages.py module. # Use the following schema to define breakpoint error message constant: @@ -194,7 +194,7 @@ def __init__(self, definition, hub_client, breakpoints_manager, self._completed = False if self.definition.get('action') == 'LOG': - self._collector = capture_collector.LogCollector(self.definition) + self._collector = collector.LogCollector(self.definition) path = _NormalizePath(self.definition['location']['path']) @@ -225,13 +225,13 @@ def __init__(self, definition, hub_client, breakpoints_manager, }) return - new_path = module_search2.Search(path) - new_module = module_utils2.GetLoadedModuleBySuffix(new_path) + new_path = module_search.Search(path) + new_module = module_utils.GetLoadedModuleBySuffix(new_path) if new_module: self._ActivateBreakpoint(new_module) else: - self._import_hook_cleanup = imphook2.AddImportCallbackBySuffix( + self._import_hook_cleanup = imphook.AddImportCallbackBySuffix( new_path, self._ActivateBreakpoint) def Clear(self): @@ -420,7 +420,7 @@ def _BreakpointEvent(self, event, frame): self._CompleteBreakpoint({'status': error_status}) return - collector = capture_collector.CaptureCollector(self.definition, + collector = collector.CaptureCollector(self.definition, self.data_visibility_policy) # TODO: This is a temporary try/except. All exceptions should be diff --git a/tests/capture_collector_test.py b/tests/collector_test.py similarity index 90% rename from tests/capture_collector_test.py rename to tests/collector_test.py index 5d58832..fe936ad 100644 --- a/tests/capture_collector_test.py +++ b/tests/collector_test.py @@ -1,4 +1,4 @@ -"""Unit test for capture_collector module.""" +"""Unit test for collector module.""" import copy import datetime @@ -10,7 +10,7 @@ from absl.testing import absltest -from googleclouddebugger import capture_collector +from googleclouddebugger import collector from googleclouddebugger import labels LOGPOINT_PAUSE_MSG = ( @@ -29,8 +29,8 @@ def CaptureCollectorWithDefaultLocation(definition, Returns: A LogCollector """ - definition['location'] = {'path': 'capture_collector_test.py', 'line': 10} - return capture_collector.CaptureCollector(definition, data_visibility_policy) + definition['location'] = {'path': 'collector_test.py', 'line': 10} + return collector.CaptureCollector(definition, data_visibility_policy) def LogCollectorWithDefaultLocation(definition): @@ -42,15 +42,15 @@ def LogCollectorWithDefaultLocation(definition): Returns: A LogCollector """ - definition['location'] = {'path': 'capture_collector_test.py', 'line': 10} - return capture_collector.LogCollector(definition) + definition['location'] = {'path': 'collector_test.py', 'line': 10} + return collector.LogCollector(definition) class CaptureCollectorTest(absltest.TestCase): """Unit test for capture collector.""" def tearDown(self): - capture_collector.CaptureCollector.pretty_printers = [] + collector.CaptureCollector.pretty_printers = [] def testCallStackUnlimitedFrames(self): self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) @@ -70,7 +70,7 @@ def testCallStackLimitedFrames(self): top_frame = self._collector.breakpoint['stackFrames'][0] self.assertEqual('CaptureCollectorTest.testCallStackLimitedFrames', top_frame['function']) - self.assertIn('capture_collector_test.py', top_frame['location']['path']) + self.assertIn('collector_test.py', top_frame['location']['path']) self.assertGreater(top_frame['location']['line'], 1) frame_below = self._collector.breakpoint['stackFrames'][1] @@ -335,12 +335,12 @@ def testLocalVariables(self): }], top_frame['locals']) def testLocalVariablesWithBlacklist(self): - unused_a = capture_collector.LineNoFilter() + unused_a = collector.LineNoFilter() unused_b = 5 # Side effect logic for the mock data visibility object def IsDataVisible(name): - path_prefix = 'googleclouddebugger.capture_collector.' + path_prefix = 'googleclouddebugger.collector.' if name == path_prefix + 'LineNoFilter': return (False, 'data blocked') return (True, None) @@ -382,7 +382,7 @@ def __init__(self): # Side effect logic for the mock data visibility object def IsDataVisible(name): - if name == 'capture_collector_test.TestClass': + if name == 'collector_test.TestClass': return (False, 'data blocked') return (True, None) @@ -1065,7 +1065,7 @@ def PrettyPrinter2(obj): return None return ((('name2_%d' % i, '2_%d' % i) for i in range(3)), 'pp-type2') - capture_collector.CaptureCollector.pretty_printers += [ + collector.CaptureCollector.pretty_printers += [ PrettyPrinter1, PrettyPrinter2 ] @@ -1192,7 +1192,7 @@ def testException(self): }], obj['members']) def testRequestLogIdCapturing(self): - capture_collector.request_log_id_collector = lambda: 'test_log_id' + collector.request_log_id_collector = lambda: 'test_log_id' self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) @@ -1202,17 +1202,17 @@ def testRequestLogIdCapturing(self): self._collector.breakpoint['labels'][labels.Breakpoint.REQUEST_LOG_ID]) def testRequestLogIdCapturingNoId(self): - capture_collector.request_log_id_collector = lambda: None + collector.request_log_id_collector = lambda: None self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) def testRequestLogIdCapturingNoCollector(self): - capture_collector.request_log_id_collector = None + collector.request_log_id_collector = None self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) def testUserIdSuccess(self): - capture_collector.user_id_collector = lambda: ('mdb_user', 'noogler') + collector.user_id_collector = lambda: ('mdb_user', 'noogler') self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) @@ -1223,21 +1223,21 @@ def testUserIdSuccess(self): }, self._collector.breakpoint['evaluatedUserId']) def testUserIdIsNone(self): - capture_collector.user_id_collector = lambda: (None, None) + collector.user_id_collector = lambda: (None, None) self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) self.assertNotIn('evaluatedUserId', self._collector.breakpoint) def testUserIdNoKind(self): - capture_collector.user_id_collector = lambda: (None, 'noogler') + collector.user_id_collector = lambda: (None, 'noogler') self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) self.assertNotIn('evaluatedUserId', self._collector.breakpoint) def testUserIdNoValue(self): - capture_collector.user_id_collector = lambda: ('mdb_user', None) + collector.user_id_collector = lambda: ('mdb_user', None) self._collector = CaptureCollectorWithDefaultLocation({'id': 'BP_ID'}) self._collector.Collect(inspect.currentframe()) @@ -1310,7 +1310,7 @@ def GotMessage(self, if msg != record.msg: logging.error('Expected msg "%s", received "%s"', msg, record.msg) return False - pathname = capture_collector.NormalizePath(frame.f_code.co_filename) + pathname = collector.NormalizePath(frame.f_code.co_filename) if pathname != record.pathname: logging.error('Expected pathname "%s", received "%s"', pathname, record.pathname) @@ -1354,7 +1354,7 @@ def CheckMessageSafe(self, msg): self._verifier = LogVerifier() self._logger.addHandler(self._verifier) self._logger.setLevel(logging.INFO) - capture_collector.SetLogger(self._logger) + collector.SetLogger(self._logger) # Give some time for the global quota to recover time.sleep(0.1) @@ -1381,12 +1381,12 @@ def testLogQuota(self): # recover so the ordering of tests ideally doesn't affect this test. self.ResetGlobalLogQuota() bucket_max_capacity = 250 - collector = LogCollectorWithDefaultLocation({ + log_collector = LogCollectorWithDefaultLocation({ 'logMessageFormat': '$0', 'expressions': ['i'] }) for i in range(0, bucket_max_capacity * 2): - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) if not self._verifier.CheckMessageSafe('LOGPOINT: %s' % i): self.assertGreaterEqual(i, bucket_max_capacity, 'Log quota exhausted earlier than expected') @@ -1394,7 +1394,7 @@ def testLogQuota(self): self._verifier.CheckMessageSafe(LOGPOINT_PAUSE_MSG), 'Quota hit message not logged') time.sleep(0.6) - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.CheckMessageSafe('LOGPOINT: %s' % i), 'Logging not resumed after quota recovery time') @@ -1410,46 +1410,46 @@ def testLogBytesQuota(self): # implemented, it can allow effectively twice that amount to go out in a # very short time frame. So the third 30k message should pause. msg = ' ' * 30000 - collector = LogCollectorWithDefaultLocation({'logMessageFormat': msg}) - self.assertIsNone(collector.Log(inspect.currentframe())) + log_collector = LogCollectorWithDefaultLocation({'logMessageFormat': msg}) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: ' + msg)) - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: ' + msg)) - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.CheckMessageSafe(LOGPOINT_PAUSE_MSG), 'Quota hit message not logged') time.sleep(0.6) - collector._definition['logMessageFormat'] = 'hello' - self.assertIsNone(collector.Log(inspect.currentframe())) + log_collector._definition['logMessageFormat'] = 'hello' + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage('LOGPOINT: hello'), 'Logging was not resumed after quota recovery time') def testMissingLogLevel(self): # Missing is equivalent to INFO. - collector = LogCollectorWithDefaultLocation({'logMessageFormat': 'hello'}) - self.assertIsNone(collector.Log(inspect.currentframe())) + log_collector = LogCollectorWithDefaultLocation({'logMessageFormat': 'hello'}) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: hello')) def testUndefinedLogLevel(self): - capture_collector.log_info_message = None - collector = LogCollectorWithDefaultLocation({'logLevel': 'INFO'}) + collector.log_info_message = None + log_collector = LogCollectorWithDefaultLocation({'logLevel': 'INFO'}) self.assertDictEqual( { 'isError': True, 'description': { 'format': 'Log action on a breakpoint not supported' } - }, collector.Log(inspect.currentframe())) + }, log_collector.Log(inspect.currentframe())) def testLogInfo(self): - collector = LogCollectorWithDefaultLocation({ + log_collector = LogCollectorWithDefaultLocation({ 'logLevel': 'INFO', 'logMessageFormat': 'hello' }) - collector._definition['location']['line'] = 20 - self.assertIsNone(collector.Log(inspect.currentframe())) + log_collector._definition['location']['line'] = 20 + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage( 'LOGPOINT: hello', @@ -1457,11 +1457,11 @@ def testLogInfo(self): line_number=20)) def testLogWarning(self): - collector = LogCollectorWithDefaultLocation({ + log_collector = LogCollectorWithDefaultLocation({ 'logLevel': 'WARNING', 'logMessageFormat': 'hello' }) - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage( 'LOGPOINT: hello', @@ -1469,11 +1469,11 @@ def testLogWarning(self): func_name='LogCollectorTest.testLogWarning')) def testLogError(self): - collector = LogCollectorWithDefaultLocation({ + log_collector = LogCollectorWithDefaultLocation({ 'logLevel': 'ERROR', 'logMessageFormat': 'hello' }) - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage( 'LOGPOINT: hello', @@ -1481,12 +1481,12 @@ def testLogError(self): func_name='LogCollectorTest.testLogError')) def testBadExpression(self): - collector = LogCollectorWithDefaultLocation({ + log_collector = LogCollectorWithDefaultLocation({ 'logLevel': 'INFO', 'logMessageFormat': 'a=$0, b=$1', 'expressions': ['-', '+'] }) - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage( 'LOGPOINT: a=')) def testException(self): - collector = LogCollectorWithDefaultLocation({ + log_collector = LogCollectorWithDefaultLocation({ 'logLevel': 'INFO', 'logMessageFormat': '$0', 'expressions': ['[][1]'] }) - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage( 'LOGPOINT: ')) @@ -1529,12 +1529,12 @@ def testMutableExpression(self): def MutableMethod(): # pylint: disable=unused-variable self.abc = None - collector = LogCollectorWithDefaultLocation({ + log_collector = LogCollectorWithDefaultLocation({ 'logLevel': 'INFO', 'logMessageFormat': '$0', 'expressions': ['MutableMethod()'] }) - self.assertIsNone(collector.Log(inspect.currentframe())) + self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue( self._verifier.GotMessage( 'LOGPOINT: libraries with no real file. So, we # can no longer match them by file path. def testSearchInvalidSourcePath(self): # This is an invalid module that doesn't exist anywhere. - self.assertEqual(module_search2.Search('aaaaa.py'), 'aaaaa.py') + self.assertEqual(module_search.Search('aaaaa.py'), 'aaaaa.py') # This module exists, but the search input is missing the outer package # name. - self.assertEqual(module_search2.Search('absltest.py'), 'absltest.py') + self.assertEqual(module_search.Search('absltest.py'), 'absltest.py') def testSearchInvalidExtension(self): # Test that the module rejects invalid extension in the input. with self.assertRaises(AssertionError): - module_search2.Search('module_search2.x') + module_search.Search('module_search.x') def testSearchPathStartsWithSep(self): # Test that module rejects invalid leading os.sep char in the input. with self.assertRaises(AssertionError): - module_search2.Search('/module_search2') + module_search.Search('/module_search') def testSearchRelativeSysPath(self): # An entry in sys.path is in relative form, and represents the same @@ -64,7 +64,7 @@ def testSearchRelativeSysPath(self): # Returned result should have a successful file match and relative # paths should be kept as-is. - result = module_search2.Search('b/first.py') + result = module_search.Search('b/first.py') self.assertEndsWith(result, 'a/../a/b/first.py') finally: @@ -84,7 +84,7 @@ def testSearchSymLinkInSysPath(self): # Returned result should have a successful file match and symbolic # links should be kept. self.assertEndsWith( - module_search2.Search('b/first.py'), 'link/b/first.py') + module_search.Search('b/first.py'), 'link/b/first.py') finally: sys.path.remove(os.path.join(self._test_package_dir, 'link')) diff --git a/tests/module_utils2_test.py b/tests/module_utils_test.py similarity index 84% rename from tests/module_utils2_test.py rename to tests/module_utils_test.py index 3790acc..0ed0fd2 100644 --- a/tests/module_utils2_test.py +++ b/tests/module_utils_test.py @@ -1,4 +1,4 @@ -"""Tests for googleclouddebugger.module_utils2.""" +"""Tests for googleclouddebugger.module_utils.""" import os import sys @@ -6,7 +6,7 @@ from absl.testing import absltest -from googleclouddebugger import module_utils2 +from googleclouddebugger import module_utils class TestModule(object): @@ -62,7 +62,7 @@ def testSimpleLoadedModuleFromSuffix(self): 'm1.py', 'm1.pyc', 'm1.pyo', 'p1/m1.py', 'b/p1/m1.py', 'a/b/p1/m1.py', '/a/b/p1/m1.py' ]: - m1 = module_utils2.GetLoadedModuleBySuffix(suffix) + m1 = module_utils.GetLoadedModuleBySuffix(suffix) self.assertTrue(m1, 'Module not found') self.assertEqual('/a/b/p1/m1.pyc', m1.__file__) @@ -72,7 +72,7 @@ def testSimpleLoadedModuleFromSuffix(self): 'p1/__init__.py', 'b/p1/__init__.py', 'a/b/p1/__init__.py', '/a/b/p1/__init__.py' ]: - p1 = module_utils2.GetLoadedModuleBySuffix(suffix) + p1 = module_utils.GetLoadedModuleBySuffix(suffix) self.assertTrue(p1, 'Package not found') self.assertEqual('/a/b/p1/__init__.pyc', p1.__file__) @@ -80,7 +80,7 @@ def testSimpleLoadedModuleFromSuffix(self): for suffix in [ 'm2.py', 'p2/m1.py', 'b2/p1/m1.py', 'a2/b/p1/m1.py', '/a2/b/p1/m1.py' ]: - m1 = module_utils2.GetLoadedModuleBySuffix(suffix) + m1 = module_utils.GetLoadedModuleBySuffix(suffix) self.assertFalse(m1, 'Module found unexpectedly') def testComplexLoadedModuleFromSuffix(self): @@ -89,7 +89,7 @@ def testComplexLoadedModuleFromSuffix(self): for suffix in [ 'm1.py', 'p1/m1.py', 'b/p1/m1.py', 'a/b/p1/m1.py', '/a/b/p1/m1.py' ]: - m1 = module_utils2.GetLoadedModuleBySuffix(suffix) + m1 = module_utils.GetLoadedModuleBySuffix(suffix) self.assertTrue(m1, 'Module not found') self.assertEqual('/a/b/p1/m1.pyc', m1.__file__) @@ -99,7 +99,7 @@ def testComplexLoadedModuleFromSuffix(self): 'p1/__init__.py', 'b/p1/__init__.py', 'a/b/p1/__init__.py', '/a/b/p1/__init__.py' ]: - p1 = module_utils2.GetLoadedModuleBySuffix(suffix) + p1 = module_utils.GetLoadedModuleBySuffix(suffix) self.assertTrue(p1, 'Package not found') self.assertEqual('/a/b/p1/__init__.pyc', p1.__file__) @@ -110,7 +110,7 @@ def testSimilarLoadedModuleFromSuffix(self): _AddSysModule('b.p1.m1', '/a1/b/p1/m1.pyc') _AddSysModule('a.b.p1.m1', '/a/b/p1/m1.pyc') - m1 = module_utils2.GetLoadedModuleBySuffix('/a/b/p1/m1.py') + m1 = module_utils.GetLoadedModuleBySuffix('/a/b/p1/m1.py') self.assertTrue(m1, 'Module not found') self.assertEqual('/a/b/p1/m1.pyc', m1.__file__) @@ -118,7 +118,7 @@ def testSimilarLoadedModuleFromSuffix(self): _AddSysModule('p1', '/a1/b1/p1/__init__.pyc') _AddSysModule('b.p1', '/a1/b/p1/__init__.pyc') _AddSysModule('a.b.p1', '/a/b/p1/__init__.pyc') - p1 = module_utils2.GetLoadedModuleBySuffix('/a/b/p1/__init__.py') + p1 = module_utils.GetLoadedModuleBySuffix('/a/b/p1/__init__.py') self.assertTrue(p1, 'Package not found') self.assertEqual('/a/b/p1/__init__.pyc', p1.__file__) @@ -130,29 +130,29 @@ def testDuplicateLoadedModuleFromSuffix(self): _AddSysModule('m1.m1.m1.m1', '/m1/m1/m1/m1.pyc') # Ambiguous request, multiple modules might have matched. - m1 = module_utils2.GetLoadedModuleBySuffix('/m1/__init__.py') + m1 = module_utils.GetLoadedModuleBySuffix('/m1/__init__.py') self.assertTrue(m1, 'Package not found') self.assertIn(m1.__file__, ['/m1/__init__.pyc', '/m1/m1/m1/__init__.pyc']) # Ambiguous request, multiple modules might have matched. - m1m1 = module_utils2.GetLoadedModuleBySuffix('/m1/m1.py') + m1m1 = module_utils.GetLoadedModuleBySuffix('/m1/m1.py') self.assertTrue(m1m1, 'Module not found') self.assertIn(m1m1.__file__, ['/m1/m1.pyc', '/m1/m1/m1/m1.pyc']) # Not ambiguous. Only 1 match possible. - m1m1m1 = module_utils2.GetLoadedModuleBySuffix('/m1/m1/m1/__init__.py') + m1m1m1 = module_utils.GetLoadedModuleBySuffix('/m1/m1/m1/__init__.py') self.assertTrue(m1m1m1, 'Package not found') self.assertEqual('/m1/m1/m1/__init__.pyc', m1m1m1.__file__) # Not ambiguous. Only 1 match possible. - m1m1m1m1 = module_utils2.GetLoadedModuleBySuffix('/m1/m1/m1/m1.py') + m1m1m1m1 = module_utils.GetLoadedModuleBySuffix('/m1/m1/m1/m1.py') self.assertTrue(m1m1m1m1, 'Module not found') self.assertEqual('/m1/m1/m1/m1.pyc', m1m1m1m1.__file__) def testMainLoadedModuleFromSuffix(self): # Lookup complex module. _AddSysModule('__main__', '/a/b/p/m.pyc') - m1 = module_utils2.GetLoadedModuleBySuffix('/a/b/p/m.py') + m1 = module_utils.GetLoadedModuleBySuffix('/a/b/p/m.py') self.assertTrue(m1, 'Module not found') self.assertEqual('/a/b/p/m.pyc', m1.__file__) From 32d7135669c054c1d63b9ab483652546e1f4325b Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 7 Jul 2022 13:35:28 -0400 Subject: [PATCH 206/241] feat: add build and test script for easier testing (#47) --- build_and_test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100755 build_and_test.sh diff --git a/build_and_test.sh b/build_and_test.sh new file mode 100755 index 0000000..8035cce --- /dev/null +++ b/build_and_test.sh @@ -0,0 +1,12 @@ +#!/bin/bash -e + +cd src +./build.sh +cd .. + +python3 -m venv /tmp/cdbg-venv +source /tmp/cdbg-venv/bin/activate +pip3 install -r requirements_dev.txt +pip3 install src/dist/* --force-reinstall +python3 -m pytest tests +deactivate From f23731bbeaa882b8f5e0b8c8687c8740540e89a2 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 20 Jul 2022 13:08:17 -0400 Subject: [PATCH 207/241] chore: add .coverage to gitignore. (#49) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7064ec3..4041f03 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ /src/setup.cfg __pycache__/ *.egg-info/ +.coverage From 4f7cfa512307eec935a2d4f89acd4cc7582f5b64 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 20 Jul 2022 14:21:46 -0400 Subject: [PATCH 208/241] fix: avoid name conflict with collector (#51) --- src/googleclouddebugger/python_breakpoint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 7b61fd1..1339ebe 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -420,13 +420,13 @@ def _BreakpointEvent(self, event, frame): self._CompleteBreakpoint({'status': error_status}) return - collector = collector.CaptureCollector(self.definition, + capture_collector = collector.CaptureCollector(self.definition, self.data_visibility_policy) # TODO: This is a temporary try/except. All exceptions should be # caught inside Collect and converted into breakpoint error messages. try: - collector.Collect(frame) + capture_collector.Collect(frame) except BaseException as e: # pylint: disable=broad-except native.LogInfo('Internal error during data capture: %s' % repr(e)) error_status = { @@ -448,4 +448,4 @@ def _BreakpointEvent(self, event, frame): self._CompleteBreakpoint({'status': error_status}) return - self._CompleteBreakpoint(collector.breakpoint, is_incremental=False) + self._CompleteBreakpoint(capture_collector.breakpoint, is_incremental=False) From f8a9bd2527e0f6f0a7f216b6c4f3d2fd9b6aeb04 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 21 Jul 2022 10:05:13 -0400 Subject: [PATCH 209/241] chore: remove six and sync requirements.txt (#50) --- requirements.txt | 1 + src/setup.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 48ab4e6..dc39cd9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +google-auth>=1.0.0 google-auth-httplib2 google-api-python-client google-api-core diff --git a/src/setup.py b/src/setup.py index 64a5d46..e9e9d19 100644 --- a/src/setup.py +++ b/src/setup.py @@ -107,7 +107,6 @@ def ReadConfig(section, value, default): 'google-auth-httplib2', 'google-api-core', 'pyyaml', - 'six>=1.10.0', ], packages=['googleclouddebugger'], ext_modules=[cdbg_native_module], From e751739f899d20e774da96d2d1e65092b2955a8e Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 21 Jul 2022 10:59:55 -0400 Subject: [PATCH 210/241] chore: release version 2.19 (#52) --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index cb89582..413f974 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.18' +__version__ = '2.19' From 2d5c7e24990ad436be9f0f085d39a9a34da5e4ce Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 26 Jul 2022 15:33:18 -0400 Subject: [PATCH 211/241] feat: add Firebase RTDB backend (#48) This is a flag-controlled alternative backend for the debug agent to use. This is provided as a preview of the functionality and will not be released until additional error handling and customization have been added. This version of the debugger is intended to be used with https://github.com/GoogleCloudPlatform/snapshot-debugger. --- firebase-sample/app.py | 11 + firebase-sample/build-and-run.sh | 20 + firebase-sample/requirements.txt | 1 + requirements.txt | 1 + requirements_dev.txt | 1 + src/googleclouddebugger/__init__.py | 38 +- src/googleclouddebugger/firebase_client.py | 569 +++++++++++++++++++++ src/setup.py | 11 +- tests/firebase_client_test.py | 425 +++++++++++++++ 9 files changed, 1057 insertions(+), 20 deletions(-) create mode 100644 firebase-sample/app.py create mode 100755 firebase-sample/build-and-run.sh create mode 100644 firebase-sample/requirements.txt create mode 100644 src/googleclouddebugger/firebase_client.py create mode 100644 tests/firebase_client_test.py diff --git a/firebase-sample/app.py b/firebase-sample/app.py new file mode 100644 index 0000000..725c7ab --- /dev/null +++ b/firebase-sample/app.py @@ -0,0 +1,11 @@ +import googleclouddebugger +googleclouddebugger.enable(use_firebase= True) + +from flask import Flask + +app = Flask(__name__) + +@app.route("/") +def hello_world(): + return "

Hello World!

" + diff --git a/firebase-sample/build-and-run.sh b/firebase-sample/build-and-run.sh new file mode 100755 index 0000000..a0cc7b1 --- /dev/null +++ b/firebase-sample/build-and-run.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd "${SCRIPT_DIR}/.." + +cd src +./build.sh +cd .. + +python3 -m venv /tmp/cdbg-venv +source /tmp/cdbg-venv/bin/activate +pip3 install -r requirements.txt +pip3 install src/dist/* --force-reinstall + +cd firebase-sample +pip3 install -r requirements.txt +python3 -m flask run +cd .. + +deactivate diff --git a/firebase-sample/requirements.txt b/firebase-sample/requirements.txt new file mode 100644 index 0000000..7e10602 --- /dev/null +++ b/firebase-sample/requirements.txt @@ -0,0 +1 @@ +flask diff --git a/requirements.txt b/requirements.txt index dc39cd9..6b75f8c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ google-auth>=1.0.0 google-auth-httplib2 google-api-python-client google-api-core +firebase_admin pyyaml diff --git a/requirements_dev.txt b/requirements_dev.txt index 14662f3..89aa308 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,3 +1,4 @@ -r requirements.txt absl-py pytest +requests-mock diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 00cd217..259cd88 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -30,6 +30,7 @@ from . import collector from . import error_data_visibility_policy from . import gcp_hub_client +from . import firebase_client from . import glob_data_visibility_policy from . import yaml_data_visibility_config_reader from . import cdbg_native @@ -38,24 +39,38 @@ __version__ = version.__version__ _flags = None -_hub_client = None +_backend_client = None _breakpoints_manager = None def _StartDebugger(): """Configures and starts the debugger.""" - global _hub_client + global _backend_client global _breakpoints_manager cdbg_native.InitializeModule(_flags) cdbg_native.LogInfo( f'Initializing Cloud Debugger Python agent version: {__version__}') - _hub_client = gcp_hub_client.GcpHubClient() + use_firebase = _flags.get('use_firebase') + if use_firebase: + _backend_client = firebase_client.FirebaseClient() + _backend_client.SetupAuth( + _flags.get('project_id'), _flags.get('service_account_json_file'), + _flags.get('firebase_db_url')) + else: + _backend_client = gcp_hub_client.GcpHubClient() + _backend_client.SetupAuth( + _flags.get('project_id'), _flags.get('project_number'), + _flags.get('service_account_json_file')) + _backend_client.SetupCanaryMode( + _flags.get('breakpoint_enable_canary'), + _flags.get('breakpoint_allow_canary_override')) + visibility_policy = _GetVisibilityPolicy() _breakpoints_manager = breakpoints_manager.BreakpointsManager( - _hub_client, visibility_policy) + _backend_client, visibility_policy) # Set up loggers for logpoints. collector.SetLogger(logging.getLogger()) @@ -63,17 +78,12 @@ def _StartDebugger(): collector.CaptureCollector.pretty_printers.append( appengine_pretty_printers.PrettyPrinter) - _hub_client.on_active_breakpoints_changed = ( + _backend_client.on_active_breakpoints_changed = ( _breakpoints_manager.SetActiveBreakpoints) - _hub_client.on_idle = _breakpoints_manager.CheckBreakpointsExpiration - _hub_client.SetupAuth( - _flags.get('project_id'), _flags.get('project_number'), - _flags.get('service_account_json_file')) - _hub_client.SetupCanaryMode( - _flags.get('breakpoint_enable_canary'), - _flags.get('breakpoint_allow_canary_override')) - _hub_client.InitializeDebuggeeLabels(_flags) - _hub_client.Start() + _backend_client.on_idle = _breakpoints_manager.CheckBreakpointsExpiration + + _backend_client.InitializeDebuggeeLabels(_flags) + _backend_client.Start() def _GetVisibilityPolicy(): diff --git a/src/googleclouddebugger/firebase_client.py b/src/googleclouddebugger/firebase_client.py new file mode 100644 index 0000000..0da2a9e --- /dev/null +++ b/src/googleclouddebugger/firebase_client.py @@ -0,0 +1,569 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Communicates with Firebase RTDB backend.""" + +from collections import deque +import hashlib +import json +import os +import platform +import requests +import socket +import sys +import threading +import traceback + +import firebase_admin +import firebase_admin.credentials +import firebase_admin.db +import firebase_admin.exceptions + +from . import backoff +from . import cdbg_native as native +from . import labels +from . import uniquifier_computer +from . import application_info +from . import version +# This module catches all exception. This is safe because it runs in +# a daemon thread (so we are not blocking Ctrl+C). We need to catch all +# the exception because HTTP client is unpredictable as far as every +# exception it can throw. +# pylint: disable=broad-except + +# Set of all known debuggee labels (passed down as flags). The value of +# a map is optional environment variable that can be used to set the flag +# (flags still take precedence). +_DEBUGGEE_LABELS = { + labels.Debuggee.MODULE: [ + 'GAE_SERVICE', 'GAE_MODULE_NAME', 'K_SERVICE', 'FUNCTION_NAME' + ], + labels.Debuggee.VERSION: [ + 'GAE_VERSION', 'GAE_MODULE_VERSION', 'K_REVISION', + 'X_GOOGLE_FUNCTION_VERSION' + ], + labels.Debuggee.MINOR_VERSION: ['GAE_DEPLOYMENT_ID', 'GAE_MINOR_VERSION'] +} + +# Debuggee labels used to format debuggee description (ordered). The minor +# version is excluded for the sake of consistency with AppEngine UX. +_DESCRIPTION_LABELS = [ + labels.Debuggee.PROJECT_ID, labels.Debuggee.MODULE, labels.Debuggee.VERSION +] + +_METADATA_SERVER_URL = 'http://metadata.google.internal/computeMetadata/v1' + +_TRANSIENT_ERROR_CODES = ('UNKNOWN', 'INTERNAL', 'N/A', 'UNAVAILABLE', + 'DEADLINE_EXCEEDED', 'RESOURCE_EXHAUSTED', + 'UNAUTHENTICATED', 'PERMISSION_DENIED') + + +class NoProjectIdError(Exception): + """Used to indicate the project id cannot be determined.""" + + +class FirebaseClient(object): + """Firebase RTDB Backend client. + + Registers the debuggee, subscribes for active breakpoints and sends breakpoint + updates to the backend. + + This class supports two types of authentication: application default + credentials or a manually provided JSON credentials file for a service + account. + + FirebaseClient creates a worker thread that communicates with the backend. The + thread can be stopped with a Stop function, but it is optional since the + worker thread is marked as daemon. + """ + + def __init__(self): + self.on_active_breakpoints_changed = lambda x: None + self.on_idle = lambda: None + self._debuggee_labels = {} + self._credentials = None + self._project_id = None + self._database_url = None + self._debuggee_id = None + self._canary_mode = None + self._breakpoints = {} + self._main_thread = None + self._transmission_thread = None + self._transmission_thread_startup_lock = threading.Lock() + self._transmission_queue = deque(maxlen=100) + self._new_updates = threading.Event() + self._breakpoint_subscription = None + + # Events for unit testing. + self.registration_complete = threading.Event() + self.subscription_complete = threading.Event() + + # + # Configuration options (constants only modified by unit test) + # + + # Delay before retrying failed request. + self.register_backoff = backoff.Backoff() # Register debuggee. + self.update_backoff = backoff.Backoff() # Update breakpoint. + + # Maximum number of times that the message is re-transmitted before it + # is assumed to be poisonous and discarded + self.max_transmit_attempts = 10 + + def InitializeDebuggeeLabels(self, flags): + """Initialize debuggee labels from environment variables and flags. + + The caller passes all the flags that the debuglet got. This function + will only use the flags used to label the debuggee. Flags take precedence + over environment variables. + + Debuggee description is formatted from available flags. + + Args: + flags: dictionary of debuglet command line flags. + """ + self._debuggee_labels = {} + + for (label, var_names) in _DEBUGGEE_LABELS.items(): + # var_names is a list of possible environment variables that may contain + # the label value. Find the first one that is set. + for name in var_names: + value = os.environ.get(name) + if value: + # Special case for module. We omit the "default" module + # to stay consistent with AppEngine. + if label == labels.Debuggee.MODULE and value == 'default': + break + self._debuggee_labels[label] = value + break + + # Special case when FUNCTION_NAME is set and X_GOOGLE_FUNCTION_VERSION + # isn't set. We set the version to 'unversioned' to be consistent with other + # agents. + # TODO: Stop assigning 'unversioned' to a GCF and find the + # actual version. + if ('FUNCTION_NAME' in os.environ and + labels.Debuggee.VERSION not in self._debuggee_labels): + self._debuggee_labels[labels.Debuggee.VERSION] = 'unversioned' + + if flags: + self._debuggee_labels.update({ + name: value + for (name, value) in flags.items() + if name in _DEBUGGEE_LABELS + }) + + self._debuggee_labels[labels.Debuggee.PROJECT_ID] = self._project_id + + platform_enum = application_info.GetPlatform() + self._debuggee_labels[labels.Debuggee.PLATFORM] = platform_enum.value + + if platform_enum == application_info.PlatformType.CLOUD_FUNCTION: + region = application_info.GetRegion() + if region: + self._debuggee_labels[labels.Debuggee.REGION] = region + + def SetupAuth(self, + project_id=None, + service_account_json_file=None, + database_url=None): + """Sets up authentication with Google APIs. + + This will use the credentials from service_account_json_file if provided, + falling back to application default credentials. + See https://cloud.google.com/docs/authentication/production. + + Args: + project_id: GCP project ID (e.g. myproject). If not provided, will attempt + to retrieve it from the credentials. + service_account_json_file: JSON file to use for credentials. If not + provided, will default to application default credentials. + database_url: Firebase realtime database URL to be used. If not + provided, will default to https://{project_id}-cdbg.firebaseio.com + Raises: + NoProjectIdError: If the project id cannot be determined. + """ + if service_account_json_file: + self._credentials = firebase_admin.credentials.Certificate( + service_account_json_file) + if not project_id: + with open(service_account_json_file, encoding='utf-8') as f: + project_id = json.load(f).get('project_id') + else: + if not project_id: + try: + r = requests.get( + f'{_METADATA_SERVER_URL}/project/project-id', + headers={'Metadata-Flavor': 'Google'}) + project_id = r.text + except requests.exceptions.RequestException: + native.LogInfo('Metadata server not available') + + if not project_id: + raise NoProjectIdError( + 'Unable to determine the project id from the API credentials. ' + 'Please specify the project id using the --project_id flag.') + + self._project_id = project_id + + if database_url: + self._database_url = database_url + else: + self._database_url = f'https://{self._project_id}-cdbg.firebaseio.com' + + def Start(self): + """Starts the worker thread.""" + self._shutdown = False + + # Spin up the main thread which will create the other necessary threads. + self._main_thread = threading.Thread(target=self._MainThreadProc) + self._main_thread.name = 'Cloud Debugger main worker thread' + self._main_thread.daemon = True + self._main_thread.start() + + def Stop(self): + """Signals the worker threads to shut down and waits until it exits.""" + self._shutdown = True + self._new_updates.set() # Wake up the transmission thread. + + if self._main_thread is not None: + self._main_thread.join() + self._main_thread = None + + if self._transmission_thread is not None: + self._transmission_thread.join() + self._transmission_thread = None + + if self._breakpoint_subscription is not None: + self._breakpoint_subscription.close() + self._breakpoint_subscription = None + + def EnqueueBreakpointUpdate(self, breakpoint_data): + """Asynchronously updates the specified breakpoint on the backend. + + This function returns immediately. The worker thread is actually doing + all the work. The worker thread is responsible to retry the transmission + in case of transient errors. + + The assumption is that the breakpoint is moving from Active to Final state. + + Args: + breakpoint: breakpoint in either final or non-final state. + """ + with self._transmission_thread_startup_lock: + if self._transmission_thread is None: + self._transmission_thread = threading.Thread( + target=self._TransmissionThreadProc) + self._transmission_thread.name = 'Cloud Debugger transmission thread' + self._transmission_thread.daemon = True + self._transmission_thread.start() + + self._transmission_queue.append((breakpoint_data, 0)) + self._new_updates.set() # Wake up the worker thread to send immediately. + + def _MainThreadProc(self): + """Entry point for the worker thread. + + This thread only serves to register and kick off the firebase subscription + which will run in its own thread. That thread will be owned by + self._breakpoint_subscription. + """ + # Note: if self._credentials is None, default app credentials will be used. + # TODO: Error handling. + firebase_admin.initialize_app(self._credentials, + {'databaseURL': self._database_url}) + + self._RegisterDebuggee() + self.registration_complete.set() + self._SubscribeToBreakpoints() + self.subscription_complete.set() + + def _TransmissionThreadProc(self): + """Entry point for the transmission worker thread.""" + + while not self._shutdown: + self._new_updates.clear() + + delay = self._TransmitBreakpointUpdates() + + self._new_updates.wait(delay) + + def _RegisterDebuggee(self): + """Single attempt to register the debuggee. + + If the registration succeeds, sets self._debuggee_id to the registered + debuggee ID. + + Args: + service: client to use for API calls + + Returns: + (registration_required, delay) tuple + """ + try: + debuggee = self._GetDebuggee() + self._debuggee_id = debuggee['id'] + + try: + debuggee_path = f'cdbg/debuggees/{self._debuggee_id}' + native.LogInfo( + f'registering at {self._database_url}, path: {debuggee_path}') + firebase_admin.db.reference(debuggee_path).set(debuggee) + native.LogInfo( + f'Debuggee registered successfully, ID: {self._debuggee_id}') + self.register_backoff.Succeeded() + return (False, 0) # Proceed immediately to subscribing to breakpoints. + except BaseException: + native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}') + except BaseException: + native.LogWarning('Debuggee information not available: ' + + traceback.format_exc()) + + return (True, self.register_backoff.Failed()) + + def _SubscribeToBreakpoints(self): + # Kill any previous subscriptions first. + if self._breakpoint_subscription is not None: + self._breakpoint_subscription.close() + self._breakpoint_subscription = None + + path = f'cdbg/breakpoints/{self._debuggee_id}/active' + native.LogInfo(f'Subscribing to breakpoint updates at {path}') + ref = firebase_admin.db.reference(path) + self._breakpoint_subscription = ref.listen(self._ActiveBreakpointCallback) + + def _ActiveBreakpointCallback(self, event): + if event.event_type == 'put': + if event.data is None: + # Either deleting a breakpoint or initializing with no breakpoints. + # Initializing with no breakpoints is a no-op. + # If deleting, event.path will be /{breakpointid} + if event.path != '/': + breakpoint_id = event.path[1:] + del self._breakpoints[breakpoint_id] + else: + if event.path == '/': + # New set of breakpoints. + self._breakpoints = {} + for (key, value) in event.data.items(): + self._AddBreakpoint(key, value) + else: + # New breakpoint. + breakpoint_id = event.path[1:] + self._AddBreakpoint(breakpoint_id, event.data) + + elif event.event_type == 'patch': + # New breakpoint or breakpoints. + for (key, value) in event.data.items(): + self._AddBreakpoint(key, value) + else: + native.LogWarning('Unexpected event from Firebase: ' + f'{event.event_type} {event.path} {event.data}') + return + + native.LogInfo(f'Breakpoints list changed, {len(self._breakpoints)} active') + self.on_active_breakpoints_changed(list(self._breakpoints.values())) + + def _AddBreakpoint(self, breakpoint_id, breakpoint_data): + breakpoint_data['id'] = breakpoint_id + self._breakpoints[breakpoint_id] = breakpoint_data + + def _TransmitBreakpointUpdates(self): + """Tries to send pending breakpoint updates to the backend. + + Sends all the pending breakpoint updates. In case of transient failures, + the breakpoint is inserted back to the top of the queue. Application + failures are not retried (for example updating breakpoint in a final + state). + + Each pending breakpoint maintains a retry counter. After repeated transient + failures the breakpoint is discarded and dropped from the queue. + + Args: + service: client to use for API calls + + Returns: + (reconnect, timeout) tuple. The first element ("reconnect") is set to + true on unexpected HTTP responses. The caller should discard the HTTP + connection and create a new one. The second element ("timeout") is + set to None if all pending breakpoints were sent successfully. Otherwise + returns time interval in seconds to stall before retrying. + """ + retry_list = [] + + # There is only one consumer, so two step pop is safe. + while self._transmission_queue: + breakpoint_data, retry_count = self._transmission_queue.popleft() + + bp_id = breakpoint_data['id'] + + try: + # Something has changed on the breakpoint. + # It should be going from active to final, but let's make sure. + if not breakpoint_data['isFinalState']: + raise BaseException( + f'Unexpected breakpoint update requested: {breakpoint_data}') + + # If action is missing, it should be set to 'CAPTURE' + is_logpoint = breakpoint_data.get('action') == 'LOG' + is_snapshot = not is_logpoint + if is_snapshot: + breakpoint_data['action'] = 'CAPTURE' + + # Set the completion time on the server side using a magic value. + breakpoint_data['finalTimeUnixMsec'] = {'.sv': 'timestamp'} + + # First, remove from the active breakpoints. + bp_ref = firebase_admin.db.reference( + f'cdbg/breakpoints/{self._debuggee_id}/active/{bp_id}') + bp_ref.delete() + + # Save snapshot data for snapshots only. + if is_snapshot: + # Note that there may not be snapshot data. + bp_ref = firebase_admin.db.reference( + f'cdbg/breakpoints/{self._debuggee_id}/snapshots/{bp_id}') + bp_ref.set(breakpoint_data) + + # Now strip potential snapshot data. + breakpoint_data.pop('evaluatedExpressions', None) + breakpoint_data.pop('stackFrames', None) + breakpoint_data.pop('variableTable', None) + + # Then add it to the list of final breakpoints. + bp_ref = firebase_admin.db.reference( + f'cdbg/breakpoints/{self._debuggee_id}/final/{bp_id}') + bp_ref.set(breakpoint_data) + + native.LogInfo(f'Breakpoint {bp_id} update transmitted successfully') + + except firebase_admin.exceptions.FirebaseError as err: + if err.code in _TRANSIENT_ERROR_CODES: + if retry_count < self.max_transmit_attempts - 1: + native.LogInfo(f'Failed to send breakpoint {bp_id} update: ' + f'{traceback.format_exc()}') + retry_list.append((breakpoint_data, retry_count + 1)) + else: + native.LogWarning( + f'Breakpoint {bp_id} retry count exceeded maximum') + else: + # This is very common if multiple instances are sending final update + # simultaneously. + native.LogInfo(f'{err}, breakpoint: {bp_id}') + except socket.error as err: + if retry_count < self.max_transmit_attempts - 1: + native.LogInfo(f'Socket error {err.errno} while sending breakpoint ' + f'{bp_id} update: {traceback.format_exc()}') + retry_list.append((breakpoint_data, retry_count + 1)) + else: + native.LogWarning(f'Breakpoint {bp_id} retry count exceeded maximum') + # Socket errors shouldn't persist like this; reconnect. + #reconnect = True + except BaseException: + native.LogWarning(f'Fatal error sending breakpoint {bp_id} update: ' + f'{traceback.format_exc()}') + + self._transmission_queue.extend(retry_list) + + if not self._transmission_queue: + self.update_backoff.Succeeded() + # Nothing to send, wait until next breakpoint update. + return None + else: + return self.update_backoff.Failed() + + def _GetDebuggee(self): + """Builds the debuggee structure.""" + major_version = version.__version__.split('.', maxsplit=1)[0] + python_version = ''.join(platform.python_version().split('.')[:2]) + agent_version = f'google.com/python{python_version}-gcp/v{major_version}' + + debuggee = { + 'description': self._GetDebuggeeDescription(), + 'labels': self._debuggee_labels, + 'agentVersion': agent_version, + } + + source_context = self._ReadAppJsonFile('source-context.json') + if source_context: + debuggee['sourceContexts'] = [source_context] + + debuggee['uniquifier'] = self._ComputeUniquifier(debuggee) + + debuggee['id'] = self._ComputeDebuggeeId(debuggee) + + return debuggee + + def _ComputeDebuggeeId(self, debuggee): + """Computes a debuggee ID. + + The debuggee ID has to be identical on all instances. Therefore the + ID should not include any random elements or elements that may be + different on different instances. + + Args: + debuggee: complete debuggee message (including uniquifier) + + Returns: + Debuggee ID meeting the criteria described above. + """ + fullhash = hashlib.sha1(json.dumps(debuggee, + sort_keys=True).encode()).hexdigest() + return f'd-{fullhash[:8]}' + + def _GetDebuggeeDescription(self): + """Formats debuggee description based on debuggee labels.""" + return '-'.join(self._debuggee_labels[label] + for label in _DESCRIPTION_LABELS + if label in self._debuggee_labels) + + def _ComputeUniquifier(self, debuggee): + """Computes debuggee uniquifier. + + The debuggee uniquifier has to be identical on all instances. Therefore the + uniquifier should not include any random numbers and should only be based + on inputs that are guaranteed to be the same on all instances. + + Args: + debuggee: complete debuggee message without the uniquifier + + Returns: + Hex string of SHA1 hash of project information, debuggee labels and + debuglet version. + """ + uniquifier = hashlib.sha1() + + # Compute hash of application files if we don't have source context. This + # way we can still distinguish between different deployments. + if ('minorversion' not in debuggee.get('labels', []) and + 'sourceContexts' not in debuggee): + uniquifier_computer.ComputeApplicationUniquifier(uniquifier) + + return uniquifier.hexdigest() + + def _ReadAppJsonFile(self, relative_path): + """Reads JSON file from an application directory. + + Args: + relative_path: file name relative to application root directory. + + Returns: + Parsed JSON data or None if the file does not exist, can't be read or + not a valid JSON file. + """ + try: + with open( + os.path.join(sys.path[0], relative_path), 'r', encoding='utf-8') as f: + return json.load(f) + except (IOError, ValueError): + return None diff --git a/src/setup.py b/src/setup.py index e9e9d19..ef92e7c 100644 --- a/src/setup.py +++ b/src/setup.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Python Cloud Debugger build and packaging script.""" from configparser import ConfigParser @@ -49,8 +48,7 @@ def ReadConfig(section, value, default): 'For more details please see ' 'https://github.com/GoogleCloudPlatform/cloud-debug-python\n') -lib_dirs = ReadConfig('build_ext', - 'library_dirs', +lib_dirs = ReadConfig('build_ext', 'library_dirs', sysconfig.get_config_var('LIBDIR')).split(':') extra_compile_args = ReadConfig('cc_options', 'extra_compile_args', '').split() extra_link_args = ReadConfig('cc_options', 'extra_link_args', '').split() @@ -65,9 +63,10 @@ def ReadConfig(section, value, default): assert len(static_libs) == len(deps), (static_libs, deps, lib_dirs) cvars = sysconfig.get_config_vars() -cvars['OPT'] = str.join(' ', RemovePrefixes( - cvars.get('OPT').split(), - ['-g', '-O', '-Wstrict-prototypes'])) +cvars['OPT'] = str.join( + ' ', + RemovePrefixes( + cvars.get('OPT').split(), ['-g', '-O', '-Wstrict-prototypes'])) # Determine the current version of the package. The easiest way would be to # import "googleclouddebugger" and read its __version__ attribute. diff --git a/tests/firebase_client_test.py b/tests/firebase_client_test.py new file mode 100644 index 0000000..c1690b2 --- /dev/null +++ b/tests/firebase_client_test.py @@ -0,0 +1,425 @@ +"""Unit tests for firebase_client module.""" + +import errno +import os +import socket +import sys +import tempfile +from unittest import mock +from unittest.mock import MagicMock +from unittest.mock import call +from unittest.mock import patch +import requests +import requests_mock + +from googleapiclient.errors import HttpError +from googleclouddebugger import version +from googleclouddebugger import firebase_client + +from absl.testing import absltest +from absl.testing import parameterized + +import firebase_admin.credentials + +TEST_PROJECT_ID = 'test-project-id' +METADATA_PROJECT_URL = ('http://metadata.google.internal/computeMetadata/' + 'v1/project/project-id') + + +class FakeEvent: + + def __init__(self, event_type, path, data): + self.event_type = event_type + self.path = path + self.data = data + + +class FakeReference: + + def __init__(self): + self.subscriber = None + + def listen(self, callback): + self.subscriber = callback + + def update(self, event_type, path, data): + if self.subscriber: + event = FakeEvent(event_type, path, data) + self.subscriber(event) + + +class FirebaseClientTest(parameterized.TestCase): + """Simulates service account authentication.""" + + def setUp(self): + version.__version__ = 'test' + + self._client = firebase_client.FirebaseClient() + + self.breakpoints_changed_count = 0 + self.breakpoints = {} + + def tearDown(self): + self._client.Stop() + + def testSetupAuthDefault(self): + # By default, we try getting the project id from the metadata server. + # Note that actual credentials are not fetched. + with requests_mock.Mocker() as m: + m.get(METADATA_PROJECT_URL, text=TEST_PROJECT_ID) + + self._client.SetupAuth() + + self.assertEqual(TEST_PROJECT_ID, self._client._project_id) + self.assertEqual(f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com', + self._client._database_url) + + def testSetupAuthOverrideProjectIdNumber(self): + # If a project id is provided, we use it. + project_id = 'project2' + self._client.SetupAuth(project_id=project_id) + + self.assertEqual(project_id, self._client._project_id) + self.assertEqual(f'https://{project_id}-cdbg.firebaseio.com', + self._client._database_url) + + def testSetupAuthServiceAccountJsonAuth(self): + # We'll load credentials from the provided file (mocked for simplicity) + with mock.patch.object(firebase_admin.credentials, + 'Certificate') as firebase_certificate: + json_file = tempfile.NamedTemporaryFile() + # And load the project id from the file as well. + with open(json_file.name, 'w', encoding='utf-8') as f: + f.write(f'{{"project_id": "{TEST_PROJECT_ID}"}}') + self._client.SetupAuth(service_account_json_file=json_file.name) + + firebase_certificate.assert_called_with(json_file.name) + self.assertEqual(TEST_PROJECT_ID, self._client._project_id) + + def testSetupAuthNoProjectId(self): + # There will be an exception raised if we try to contact the metadata + # server on a non-gcp machine. + with requests_mock.Mocker() as m: + m.get(METADATA_PROJECT_URL, exc=requests.exceptions.RequestException) + + with self.assertRaises(firebase_client.NoProjectIdError): + self._client.SetupAuth() + + @patch('firebase_admin.db.reference') + @patch('firebase_admin.initialize_app') + def testStart(self, mock_initialize_app, mock_db_ref): + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() + + debuggee_id = self._client._debuggee_id + + mock_initialize_app.assert_called_with( + None, {'databaseURL': f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com'}) + self.assertEqual([ + call(f'cdbg/debuggees/{debuggee_id}'), + call(f'cdbg/breakpoints/{debuggee_id}/active') + ], mock_db_ref.call_args_list) + + # TODO: testStartRegisterRetry + # TODO: testStartSubscribeRetry + # - Note: failures don't require retrying registration. + + @patch('firebase_admin.db.reference') + @patch('firebase_admin.initialize_app') + def testBreakpointSubscription(self, mock_initialize_app, mock_db_ref): + mock_register_ref = MagicMock() + fake_subscribe_ref = FakeReference() + mock_db_ref.side_effect = [mock_register_ref, fake_subscribe_ref] + + # This class will keep track of the breakpoint updates and will check + # them against expectations. + class ResultChecker: + + def __init__(self, expected_results, test): + self._expected_results = expected_results + self._test = test + self._change_count = 0 + + def callback(self, new_breakpoints): + self._test.assertEqual(self._expected_results[self._change_count], + new_breakpoints) + self._change_count += 1 + + breakpoints = [ + { + 'id': 'breakpoint-0', + 'location': { + 'path': 'foo.py', + 'line': 18 + } + }, + { + 'id': 'breakpoint-1', + 'location': { + 'path': 'bar.py', + 'line': 23 + } + }, + { + 'id': 'breakpoint-2', + 'location': { + 'path': 'baz.py', + 'line': 45 + } + }, + ] + + expected_results = [[breakpoints[0]], [breakpoints[0], breakpoints[1]], + [breakpoints[0], breakpoints[1], breakpoints[2]], + [breakpoints[1], breakpoints[2]]] + result_checker = ResultChecker(expected_results, self) + + self._client.on_active_breakpoints_changed = result_checker.callback + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() + + # Send in updates to trigger the subscription callback. + fake_subscribe_ref.update('put', '/', + {breakpoints[0]['id']: breakpoints[0]}) + fake_subscribe_ref.update('patch', '/', + {breakpoints[1]['id']: breakpoints[1]}) + fake_subscribe_ref.update('put', f'/{breakpoints[2]["id"]}', breakpoints[2]) + fake_subscribe_ref.update('put', f'/{breakpoints[0]["id"]}', None) + + self.assertEqual(len(expected_results), result_checker._change_count) + + def _TestInitializeLabels(self, module_var, version_var, minor_var): + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + + self._client.InitializeDebuggeeLabels({ + 'module': 'my_module', + 'version': '1', + 'minorversion': '23', + 'something_else': 'irrelevant' + }) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'my_module', + 'version': '1', + 'minorversion': '23', + 'platform': 'default' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-my_module-1', + self._client._GetDebuggeeDescription()) + + uniquifier1 = self._client._ComputeUniquifier( + {'labels': self._client._debuggee_labels}) + self.assertTrue(uniquifier1) # Not empty string. + + try: + os.environ[module_var] = 'env_module' + os.environ[version_var] = '213' + os.environ[minor_var] = '3476734' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'env_module', + 'version': '213', + 'minorversion': '3476734', + 'platform': 'default' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-env_module-213', + self._client._GetDebuggeeDescription()) + + os.environ[module_var] = 'default' + os.environ[version_var] = '213' + os.environ[minor_var] = '3476734' + self._client.InitializeDebuggeeLabels({'minorversion': 'something else'}) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'version': '213', + 'minorversion': 'something else', + 'platform': 'default' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-213', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ[module_var] + del os.environ[version_var] + del os.environ[minor_var] + + def testInitializeLegacyDebuggeeLabels(self): + self._TestInitializeLabels('GAE_MODULE_NAME', 'GAE_MODULE_VERSION', + 'GAE_MINOR_VERSION') + + def testInitializeDebuggeeLabels(self): + self._TestInitializeLabels('GAE_SERVICE', 'GAE_VERSION', + 'GAE_DEPLOYMENT_ID') + + def testInitializeCloudRunDebuggeeLabels(self): + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + + try: + os.environ['K_SERVICE'] = 'env_module' + os.environ['K_REVISION'] = '213' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'env_module', + 'version': '213', + 'platform': 'default' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-env_module-213', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ['K_SERVICE'] + del os.environ['K_REVISION'] + + def testInitializeCloudFunctionDebuggeeLabels(self): + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + + try: + os.environ['FUNCTION_NAME'] = 'fcn-name' + os.environ['X_GOOGLE_FUNCTION_VERSION'] = '213' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': '213', + 'platform': 'cloud_function' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-fcn-name-213', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ['FUNCTION_NAME'] + del os.environ['X_GOOGLE_FUNCTION_VERSION'] + + def testInitializeCloudFunctionUnversionedDebuggeeLabels(self): + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + + try: + os.environ['FUNCTION_NAME'] = 'fcn-name' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': 'unversioned', + 'platform': 'cloud_function' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-fcn-name-unversioned', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ['FUNCTION_NAME'] + + def testInitializeCloudFunctionWithRegionDebuggeeLabels(self): + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + + try: + os.environ['FUNCTION_NAME'] = 'fcn-name' + os.environ['FUNCTION_REGION'] = 'fcn-region' + self._client.InitializeDebuggeeLabels(None) + self.assertEqual( + { + 'projectid': 'test-project-id', + 'module': 'fcn-name', + 'version': 'unversioned', + 'platform': 'cloud_function', + 'region': 'fcn-region' + }, self._client._debuggee_labels) + self.assertEqual('test-project-id-fcn-name-unversioned', + self._client._GetDebuggeeDescription()) + + finally: + del os.environ['FUNCTION_NAME'] + del os.environ['FUNCTION_REGION'] + + def testAppFilesUniquifierNoMinorVersion(self): + """Verify that uniquifier_computer is used if minor version not defined.""" + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + + root = tempfile.mkdtemp('', 'fake_app_') + sys.path.insert(0, root) + try: + uniquifier1 = self._client._ComputeUniquifier({}) + + with open(os.path.join(root, 'app.py'), 'w', encoding='utf-8') as f: + f.write('hello') + uniquifier2 = self._client._ComputeUniquifier({}) + finally: + del sys.path[0] + + self.assertNotEqual(uniquifier1, uniquifier2) + + def testAppFilesUniquifierWithMinorVersion(self): + """Verify that uniquifier_computer not used if minor version is defined.""" + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + + root = tempfile.mkdtemp('', 'fake_app_') + + os.environ['GAE_MINOR_VERSION'] = '12345' + sys.path.insert(0, root) + try: + self._client.InitializeDebuggeeLabels(None) + + uniquifier1 = self._client._GetDebuggee()['uniquifier'] + + with open(os.path.join(root, 'app.py'), 'w', encoding='utf-8') as f: + f.write('hello') + uniquifier2 = self._client._GetDebuggee()['uniquifier'] + finally: + del os.environ['GAE_MINOR_VERSION'] + del sys.path[0] + + self.assertEqual(uniquifier1, uniquifier2) + + def testSourceContext(self): + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + + root = tempfile.mkdtemp('', 'fake_app_') + source_context_path = os.path.join(root, 'source-context.json') + + sys.path.insert(0, root) + try: + debuggee_no_source_context1 = self._client._GetDebuggee() + + with open(source_context_path, 'w', encoding='utf-8') as f: + f.write('not a valid JSON') + debuggee_bad_source_context = self._client._GetDebuggee() + + with open(os.path.join(root, 'fake_app.py'), 'w', encoding='utf-8') as f: + f.write('pretend') + debuggee_no_source_context2 = self._client._GetDebuggee() + + with open(source_context_path, 'w', encoding='utf-8') as f: + f.write('{"what": "source context"}') + debuggee_with_source_context = self._client._GetDebuggee() + + os.remove(source_context_path) + finally: + del sys.path[0] + + self.assertNotIn('sourceContexts', debuggee_no_source_context1) + self.assertNotIn('sourceContexts', debuggee_bad_source_context) + self.assertListEqual([{ + 'what': 'source context' + }], debuggee_with_source_context['sourceContexts']) + + uniquifiers = set() + uniquifiers.add(debuggee_no_source_context1['uniquifier']) + uniquifiers.add(debuggee_with_source_context['uniquifier']) + uniquifiers.add(debuggee_bad_source_context['uniquifier']) + self.assertLen(uniquifiers, 1) + uniquifiers.add(debuggee_no_source_context2['uniquifier']) + self.assertLen(uniquifiers, 2) + + +if __name__ == '__main__': + absltest.main() From b2ecae35ebb48a2207daf40303c207c4e090bec5 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 8 Aug 2022 11:40:03 -0400 Subject: [PATCH 212/241] feat: improve firebase error handling (#53) * add retry for registration * add unit tests for retries * bugfix: data lost when retrying snapshot updates --- src/googleclouddebugger/firebase_client.py | 89 +++--- tests/firebase_client_test.py | 327 +++++++++++++++++++-- 2 files changed, 357 insertions(+), 59 deletions(-) diff --git a/src/googleclouddebugger/firebase_client.py b/src/googleclouddebugger/firebase_client.py index 0da2a9e..4cb414a 100644 --- a/src/googleclouddebugger/firebase_client.py +++ b/src/googleclouddebugger/firebase_client.py @@ -14,14 +14,15 @@ """Communicates with Firebase RTDB backend.""" from collections import deque +import copy import hashlib import json import os import platform import requests -import socket import sys import threading +import time import traceback import firebase_admin @@ -114,6 +115,7 @@ def __init__(self): # Delay before retrying failed request. self.register_backoff = backoff.Backoff() # Register debuggee. + self.subscribe_backoff = backoff.Backoff() # Subscribe to updates. self.update_backoff = backoff.Backoff() # Update breakpoint. # Maximum number of times that the message is re-transmitted before it @@ -279,13 +281,25 @@ def _MainThreadProc(self): self._breakpoint_subscription. """ # Note: if self._credentials is None, default app credentials will be used. - # TODO: Error handling. - firebase_admin.initialize_app(self._credentials, - {'databaseURL': self._database_url}) + try: + firebase_admin.initialize_app(self._credentials, + {'databaseURL': self._database_url}) + except ValueError: + native.LogWarning( + f'Failed to initialize firebase: {traceback.format_exc()}') + native.LogError('Failed to start debugger agent. Giving up.') + return - self._RegisterDebuggee() + registration_required, delay = True, 0 + while registration_required: + time.sleep(delay) + registration_required, delay = self._RegisterDebuggee() self.registration_complete.set() - self._SubscribeToBreakpoints() + + subscription_required, delay = True, 0 + while subscription_required: + time.sleep(delay) + subscription_required, delay = self._SubscribeToBreakpoints() self.subscription_complete.set() def _TransmissionThreadProc(self): @@ -310,26 +324,29 @@ def _RegisterDebuggee(self): Returns: (registration_required, delay) tuple """ + debuggee = None try: debuggee = self._GetDebuggee() self._debuggee_id = debuggee['id'] - - try: - debuggee_path = f'cdbg/debuggees/{self._debuggee_id}' - native.LogInfo( - f'registering at {self._database_url}, path: {debuggee_path}') - firebase_admin.db.reference(debuggee_path).set(debuggee) - native.LogInfo( - f'Debuggee registered successfully, ID: {self._debuggee_id}') - self.register_backoff.Succeeded() - return (False, 0) # Proceed immediately to subscribing to breakpoints. - except BaseException: - native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}') except BaseException: - native.LogWarning('Debuggee information not available: ' + - traceback.format_exc()) + native.LogWarning( + f'Debuggee information not available: {traceback.format_exc()}') + return (True, self.register_backoff.Failed()) - return (True, self.register_backoff.Failed()) + try: + debuggee_path = f'cdbg/debuggees/{self._debuggee_id}' + native.LogInfo( + f'registering at {self._database_url}, path: {debuggee_path}') + firebase_admin.db.reference(debuggee_path).set(debuggee) + native.LogInfo( + f'Debuggee registered successfully, ID: {self._debuggee_id}') + self.register_backoff.Succeeded() + return (False, 0) # Proceed immediately to subscribing to breakpoints. + except BaseException: + # There is no significant benefit to handing different exceptions + # in different ways; we will log and retry regardless. + native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}') + return (True, self.register_backoff.Failed()) def _SubscribeToBreakpoints(self): # Kill any previous subscriptions first. @@ -340,7 +357,13 @@ def _SubscribeToBreakpoints(self): path = f'cdbg/breakpoints/{self._debuggee_id}/active' native.LogInfo(f'Subscribing to breakpoint updates at {path}') ref = firebase_admin.db.reference(path) - self._breakpoint_subscription = ref.listen(self._ActiveBreakpointCallback) + try: + self._breakpoint_subscription = ref.listen(self._ActiveBreakpointCallback) + return (False, 0) + except firebase_admin.exceptions.FirebaseError: + native.LogInfo( + f'Failed to subscribe to breakpoints: {traceback.format_exc()}') + return (True, self.subscribe_backoff.Failed()) def _ActiveBreakpointCallback(self, event): if event.event_type == 'put': @@ -410,7 +433,7 @@ def _TransmitBreakpointUpdates(self): try: # Something has changed on the breakpoint. # It should be going from active to final, but let's make sure. - if not breakpoint_data['isFinalState']: + if not breakpoint_data.get('isFinalState', False): raise BaseException( f'Unexpected breakpoint update requested: {breakpoint_data}') @@ -428,6 +451,7 @@ def _TransmitBreakpointUpdates(self): f'cdbg/breakpoints/{self._debuggee_id}/active/{bp_id}') bp_ref.delete() + summary_data = breakpoint_data # Save snapshot data for snapshots only. if is_snapshot: # Note that there may not be snapshot data. @@ -436,14 +460,15 @@ def _TransmitBreakpointUpdates(self): bp_ref.set(breakpoint_data) # Now strip potential snapshot data. - breakpoint_data.pop('evaluatedExpressions', None) - breakpoint_data.pop('stackFrames', None) - breakpoint_data.pop('variableTable', None) + summary_data = copy.deepcopy(breakpoint_data) + summary_data.pop('evaluatedExpressions', None) + summary_data.pop('stackFrames', None) + summary_data.pop('variableTable', None) # Then add it to the list of final breakpoints. bp_ref = firebase_admin.db.reference( f'cdbg/breakpoints/{self._debuggee_id}/final/{bp_id}') - bp_ref.set(breakpoint_data) + bp_ref.set(summary_data) native.LogInfo(f'Breakpoint {bp_id} update transmitted successfully') @@ -460,15 +485,7 @@ def _TransmitBreakpointUpdates(self): # This is very common if multiple instances are sending final update # simultaneously. native.LogInfo(f'{err}, breakpoint: {bp_id}') - except socket.error as err: - if retry_count < self.max_transmit_attempts - 1: - native.LogInfo(f'Socket error {err.errno} while sending breakpoint ' - f'{bp_id} update: {traceback.format_exc()}') - retry_list.append((breakpoint_data, retry_count + 1)) - else: - native.LogWarning(f'Breakpoint {bp_id} retry count exceeded maximum') - # Socket errors shouldn't persist like this; reconnect. - #reconnect = True + except BaseException: native.LogWarning(f'Fatal error sending breakpoint {bp_id} update: ' f'{traceback.format_exc()}') diff --git a/tests/firebase_client_test.py b/tests/firebase_client_test.py index c1690b2..1986a9a 100644 --- a/tests/firebase_client_test.py +++ b/tests/firebase_client_test.py @@ -1,10 +1,9 @@ """Unit tests for firebase_client module.""" -import errno import os -import socket import sys import tempfile +import time from unittest import mock from unittest.mock import MagicMock from unittest.mock import call @@ -12,7 +11,6 @@ import requests import requests_mock -from googleapiclient.errors import HttpError from googleclouddebugger import version from googleclouddebugger import firebase_client @@ -20,6 +18,7 @@ from absl.testing import parameterized import firebase_admin.credentials +from firebase_admin.exceptions import FirebaseError TEST_PROJECT_ID = 'test-project-id' METADATA_PROJECT_URL = ('http://metadata.google.internal/computeMetadata/' @@ -59,6 +58,31 @@ def setUp(self): self.breakpoints_changed_count = 0 self.breakpoints = {} + # Speed up the delays for retry loops. + for backoff in [ + self._client.register_backoff, self._client.subscribe_backoff, + self._client.update_backoff + ]: + backoff.min_interval_sec /= 100000.0 + backoff.max_interval_sec /= 100000.0 + backoff._current_interval_sec /= 100000.0 + + # Set up patchers. + patcher = patch('firebase_admin.initialize_app') + self._mock_initialize_app = patcher.start() + self.addCleanup(patcher.stop) + + patcher = patch('firebase_admin.db.reference') + self._mock_db_ref = patcher.start() + self.addCleanup(patcher.stop) + + # Set up the mocks for the database refs. + self._mock_register_ref = MagicMock() + self._fake_subscribe_ref = FakeReference() + self._mock_db_ref.side_effect = [ + self._mock_register_ref, self._fake_subscribe_ref + ] + def tearDown(self): self._client.Stop() @@ -105,33 +129,58 @@ def testSetupAuthNoProjectId(self): with self.assertRaises(firebase_client.NoProjectIdError): self._client.SetupAuth() - @patch('firebase_admin.db.reference') - @patch('firebase_admin.initialize_app') - def testStart(self, mock_initialize_app, mock_db_ref): + def testStart(self): self._client.SetupAuth(project_id=TEST_PROJECT_ID) self._client.Start() self._client.subscription_complete.wait() debuggee_id = self._client._debuggee_id - mock_initialize_app.assert_called_with( + self._mock_initialize_app.assert_called_with( None, {'databaseURL': f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com'}) self.assertEqual([ call(f'cdbg/debuggees/{debuggee_id}'), call(f'cdbg/breakpoints/{debuggee_id}/active') - ], mock_db_ref.call_args_list) + ], self._mock_db_ref.call_args_list) + + # Verify that the register call has been made. + self._mock_register_ref.set.assert_called_once_with( + self._client._GetDebuggee()) + + def testStartRegisterRetry(self): + # A new db ref is fetched on each retry. + self._mock_db_ref.side_effect = [ + self._mock_register_ref, self._mock_register_ref, + self._fake_subscribe_ref + ] + + # Fail once, then succeed on retry. + self._mock_register_ref.set.side_effect = [FirebaseError(1, 'foo'), None] + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.registration_complete.wait() - # TODO: testStartRegisterRetry - # TODO: testStartSubscribeRetry - # - Note: failures don't require retrying registration. + self.assertEqual(2, self._mock_register_ref.set.call_count) - @patch('firebase_admin.db.reference') - @patch('firebase_admin.initialize_app') - def testBreakpointSubscription(self, mock_initialize_app, mock_db_ref): - mock_register_ref = MagicMock() - fake_subscribe_ref = FakeReference() - mock_db_ref.side_effect = [mock_register_ref, fake_subscribe_ref] + def testStartSubscribeRetry(self): + mock_subscribe_ref = MagicMock() + mock_subscribe_ref.listen.side_effect = FirebaseError(1, 'foo') + # A new db ref is fetched on each retry. + self._mock_db_ref.side_effect = [ + self._mock_register_ref, + mock_subscribe_ref, # Fail the first time + self._fake_subscribe_ref # Succeed the second time + ] + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() + + self.assertEqual(3, self._mock_db_ref.call_count) + + def testBreakpointSubscription(self): # This class will keep track of the breakpoint updates and will check # them against expectations. class ResultChecker: @@ -182,15 +231,247 @@ def callback(self, new_breakpoints): self._client.subscription_complete.wait() # Send in updates to trigger the subscription callback. - fake_subscribe_ref.update('put', '/', - {breakpoints[0]['id']: breakpoints[0]}) - fake_subscribe_ref.update('patch', '/', - {breakpoints[1]['id']: breakpoints[1]}) - fake_subscribe_ref.update('put', f'/{breakpoints[2]["id"]}', breakpoints[2]) - fake_subscribe_ref.update('put', f'/{breakpoints[0]["id"]}', None) + self._fake_subscribe_ref.update('put', '/', + {breakpoints[0]['id']: breakpoints[0]}) + self._fake_subscribe_ref.update('patch', '/', + {breakpoints[1]['id']: breakpoints[1]}) + self._fake_subscribe_ref.update('put', f'/{breakpoints[2]["id"]}', + breakpoints[2]) + self._fake_subscribe_ref.update('put', f'/{breakpoints[0]["id"]}', None) self.assertEqual(len(expected_results), result_checker._change_count) + def testEnqueueBreakpointUpdate(self): + active_ref_mock = MagicMock() + snapshot_ref_mock = MagicMock() + final_ref_mock = MagicMock() + + self._mock_db_ref.side_effect = [ + self._mock_register_ref, self._fake_subscribe_ref, active_ref_mock, + snapshot_ref_mock, final_ref_mock + ] + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() + + debuggee_id = self._client._debuggee_id + breakpoint_id = 'breakpoint-0' + + input_breakpoint = { + 'id': breakpoint_id, + 'location': { + 'path': 'foo.py', + 'line': 18 + }, + 'isFinalState': True, + 'evaluatedExpressions': ['expressions go here'], + 'stackFrames': ['stuff goes here'], + 'variableTable': ['lots', 'of', 'variables'], + } + short_breakpoint = { + 'id': breakpoint_id, + 'location': { + 'path': 'foo.py', + 'line': 18 + }, + 'isFinalState': True, + 'action': 'CAPTURE', + 'finalTimeUnixMsec': { + '.sv': 'timestamp' + } + } + full_breakpoint = { + 'id': breakpoint_id, + 'location': { + 'path': 'foo.py', + 'line': 18 + }, + 'isFinalState': True, + 'action': 'CAPTURE', + 'evaluatedExpressions': ['expressions go here'], + 'stackFrames': ['stuff goes here'], + 'variableTable': ['lots', 'of', 'variables'], + 'finalTimeUnixMsec': { + '.sv': 'timestamp' + } + } + + self._client.EnqueueBreakpointUpdate(input_breakpoint) + + # Wait for the breakpoint to be sent. + while self._client._transmission_queue: + time.sleep(0.1) + + db_ref_calls = self._mock_db_ref.call_args_list + self.assertEqual( + call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}'), + db_ref_calls[2]) + self.assertEqual( + call(f'cdbg/breakpoints/{debuggee_id}/snapshots/{breakpoint_id}'), + db_ref_calls[3]) + self.assertEqual( + call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}'), + db_ref_calls[4]) + + active_ref_mock.delete.assert_called_once() + snapshot_ref_mock.set.assert_called_once_with(full_breakpoint) + final_ref_mock.set.assert_called_once_with(short_breakpoint) + + def testEnqueueBreakpointUpdateWithLogpoint(self): + active_ref_mock = MagicMock() + final_ref_mock = MagicMock() + + self._mock_db_ref.side_effect = [ + self._mock_register_ref, self._fake_subscribe_ref, active_ref_mock, + final_ref_mock + ] + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() + + debuggee_id = self._client._debuggee_id + breakpoint_id = 'logpoint-0' + + input_breakpoint = { + 'id': breakpoint_id, + 'location': { + 'path': 'foo.py', + 'line': 18 + }, + 'action': 'LOG', + 'isFinalState': True, + 'status': { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + }, + } + output_breakpoint = { + 'id': breakpoint_id, + 'location': { + 'path': 'foo.py', + 'line': 18 + }, + 'isFinalState': True, + 'action': 'LOG', + 'status': { + 'isError': True, + 'refersTo': 'BREAKPOINT_SOURCE_LOCATION', + }, + 'finalTimeUnixMsec': { + '.sv': 'timestamp' + } + } + + self._client.EnqueueBreakpointUpdate(input_breakpoint) + + # Wait for the breakpoint to be sent. + while self._client._transmission_queue: + time.sleep(0.1) + + db_ref_calls = self._mock_db_ref.call_args_list + self.assertEqual( + call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}'), + db_ref_calls[2]) + self.assertEqual( + call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}'), + db_ref_calls[3]) + + active_ref_mock.delete.assert_called_once() + final_ref_mock.set.assert_called_once_with(output_breakpoint) + + # Make sure that the snapshots node was not accessed. + self.assertTrue( + call(f'cdbg/breakpoints/{debuggee_id}/snapshots/{breakpoint_id}') not in + db_ref_calls) + + def testEnqueueBreakpointUpdateRetry(self): + active_ref_mock = MagicMock() + snapshot_ref_mock = MagicMock() + final_ref_mock = MagicMock() + + # This test will have three failures, one for each of the firebase writes. + # UNAVAILABLE errors are retryable. + active_ref_mock.delete.side_effect = [ + FirebaseError('UNAVAILABLE', 'active error'), None, None, None + ] + snapshot_ref_mock.set.side_effect = [ + FirebaseError('UNAVAILABLE', 'snapshot error'), None, None + ] + final_ref_mock.set.side_effect = [ + FirebaseError('UNAVAILABLE', 'final error'), None + ] + + self._mock_db_ref.side_effect = [ + self._mock_register_ref, + self._fake_subscribe_ref, # setup + active_ref_mock, # attempt 1 + active_ref_mock, + snapshot_ref_mock, # attempt 2 + active_ref_mock, + snapshot_ref_mock, + final_ref_mock, # attempt 3 + active_ref_mock, + snapshot_ref_mock, + final_ref_mock # attempt 4 + ] + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() + + breakpoint_id = 'breakpoint-0' + + input_breakpoint = { + 'id': breakpoint_id, + 'location': { + 'path': 'foo.py', + 'line': 18 + }, + 'isFinalState': True, + 'evaluatedExpressions': ['expressions go here'], + 'stackFrames': ['stuff goes here'], + 'variableTable': ['lots', 'of', 'variables'], + } + short_breakpoint = { + 'id': breakpoint_id, + 'location': { + 'path': 'foo.py', + 'line': 18 + }, + 'isFinalState': True, + 'action': 'CAPTURE', + 'finalTimeUnixMsec': { + '.sv': 'timestamp' + } + } + full_breakpoint = { + 'id': breakpoint_id, + 'location': { + 'path': 'foo.py', + 'line': 18 + }, + 'isFinalState': True, + 'action': 'CAPTURE', + 'evaluatedExpressions': ['expressions go here'], + 'stackFrames': ['stuff goes here'], + 'variableTable': ['lots', 'of', 'variables'], + 'finalTimeUnixMsec': { + '.sv': 'timestamp' + } + } + + self._client.EnqueueBreakpointUpdate(input_breakpoint) + + # Wait for the breakpoint to be sent. Retries will have occured. + while self._client._transmission_queue: + time.sleep(0.1) + + active_ref_mock.delete.assert_has_calls([call()] * 4) + snapshot_ref_mock.set.assert_has_calls([call(full_breakpoint)] * 3) + final_ref_mock.set.assert_has_calls([call(short_breakpoint)] * 2) + def _TestInitializeLabels(self, module_var, version_var, minor_var): self._client.SetupAuth(project_id=TEST_PROJECT_ID) From 4d953f27cb71bbf9bc33b9d9d9762fefe60e5278 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 9 Aug 2022 09:54:29 -0400 Subject: [PATCH 213/241] chore: release 3.0 (#54) * Pin to stable firebase_admin version * Update README with section on firebase backend * Bump version number to 3.0 --- README.md | 37 ++++++++++++++++++++++++++++++ requirements.txt | 2 +- src/googleclouddebugger/version.py | 2 +- src/setup.py | 1 + 4 files changed, 40 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ed3f487..51adde7 100644 --- a/README.md +++ b/README.md @@ -224,6 +224,43 @@ Alternatively, you can pass the `--noreload` flag when running the Django using the `--noreload` flag disables the autoreload feature in Django, which means local changes to files will not be automatically picked up by Django. +### Experimental Firebase Realtime Database Backend + +This functionality is available for release 3.0 onward of this agent. + +The agent can be configured to use Firebase Realtime Database as a backend +instead of the deprecated Cloud Debugger service. If the Firebase backend is +used, breakpoints can be viewed and set using the Snapshot Debugger CLI instead +of the Cloud Console. + +To use the Firebase backend, set the flag when enabling the agent: + +```python +try: + import googleclouddebugger + googleclouddebugger.enable(use_firebase=True) +except ImportError: + pass +``` + +Additional configuration can be provided if necessary: + +```python +try: + import googleclouddebugger + googleclouddebugger.enable( + use_firebase=True, + project_id='my-project-id', + firebase_db_url='https://my-database-url.firebaseio.com', + service_account_json_file='path/to/service_account.json', + ) +except ImportError: + pass +``` + +See https://github.com/GoogleCloudPlatform/snapshot-debugger for more details. + + ## Flag Reference The agent offers various flags to configure its behavior. Flags can be specified diff --git a/requirements.txt b/requirements.txt index 6b75f8c..4de89aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ google-auth>=1.0.0 google-auth-httplib2 google-api-python-client google-api-core -firebase_admin +firebase_admin==5.2.0 pyyaml diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 413f974..30aa735 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '2.19' +__version__ = '3.0' diff --git a/src/setup.py b/src/setup.py index ef92e7c..61c8262 100644 --- a/src/setup.py +++ b/src/setup.py @@ -105,6 +105,7 @@ def ReadConfig(section, value, default): 'google-auth>=1.0.0', 'google-auth-httplib2', 'google-api-core', + 'firebase-admin==5.2.0', 'pyyaml', ], packages=['googleclouddebugger'], From c891b1a2542f2533ef6a43c8bcff3355d3388b41 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 9 Aug 2022 12:18:01 -0400 Subject: [PATCH 214/241] build: bump to manylinux2014 (#55) --- build-dist.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-dist.sh b/build-dist.sh index b0fc6d3..fffe140 100755 --- a/build-dist.sh +++ b/build-dist.sh @@ -1,4 +1,4 @@ -DOCKER_IMAGE='quay.io/pypa/manylinux2010_x86_64' +DOCKER_IMAGE='quay.io/pypa/manylinux2014_x86_64' docker pull "$DOCKER_IMAGE" docker container run -t --rm -v "$(pwd)":/io "$DOCKER_IMAGE" /io/src/build-wheels.sh From d2414399fdf7d34bfd846c1d734e00c353d4eb9f Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 10 Aug 2022 14:28:13 -0400 Subject: [PATCH 215/241] fix: save data in /snapshot instead of /snapshots. (#56) --- src/googleclouddebugger/firebase_client.py | 2 +- tests/firebase_client_test.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/googleclouddebugger/firebase_client.py b/src/googleclouddebugger/firebase_client.py index 4cb414a..7dc363b 100644 --- a/src/googleclouddebugger/firebase_client.py +++ b/src/googleclouddebugger/firebase_client.py @@ -456,7 +456,7 @@ def _TransmitBreakpointUpdates(self): if is_snapshot: # Note that there may not be snapshot data. bp_ref = firebase_admin.db.reference( - f'cdbg/breakpoints/{self._debuggee_id}/snapshots/{bp_id}') + f'cdbg/breakpoints/{self._debuggee_id}/snapshot/{bp_id}') bp_ref.set(breakpoint_data) # Now strip potential snapshot data. diff --git a/tests/firebase_client_test.py b/tests/firebase_client_test.py index 1986a9a..99057f9 100644 --- a/tests/firebase_client_test.py +++ b/tests/firebase_client_test.py @@ -308,7 +308,7 @@ def testEnqueueBreakpointUpdate(self): call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}'), db_ref_calls[2]) self.assertEqual( - call(f'cdbg/breakpoints/{debuggee_id}/snapshots/{breakpoint_id}'), + call(f'cdbg/breakpoints/{debuggee_id}/snapshot/{breakpoint_id}'), db_ref_calls[3]) self.assertEqual( call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}'), @@ -381,9 +381,9 @@ def testEnqueueBreakpointUpdateWithLogpoint(self): active_ref_mock.delete.assert_called_once() final_ref_mock.set.assert_called_once_with(output_breakpoint) - # Make sure that the snapshots node was not accessed. + # Make sure that the snapshot node was not accessed. self.assertTrue( - call(f'cdbg/breakpoints/{debuggee_id}/snapshots/{breakpoint_id}') not in + call(f'cdbg/breakpoints/{debuggee_id}/snapshot/{breakpoint_id}') not in db_ref_calls) def testEnqueueBreakpointUpdateRetry(self): From 82dc9a77d42c2b4a7dee22de771e24cb65204a77 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 10 Aug 2022 14:32:31 -0400 Subject: [PATCH 216/241] chore: release version 3.1 (#57) --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 30aa735..a52b8ed 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '3.0' +__version__ = '3.1' From 2db6e8610b1752a4ee9d43963520d6c4148ef4ef Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 12 Aug 2022 14:32:23 -0400 Subject: [PATCH 217/241] fix: safely handle multiple deletions of a breakpoint (#58) --- src/googleclouddebugger/firebase_client.py | 3 ++- tests/firebase_client_test.py | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/googleclouddebugger/firebase_client.py b/src/googleclouddebugger/firebase_client.py index 7dc363b..0fd3fd0 100644 --- a/src/googleclouddebugger/firebase_client.py +++ b/src/googleclouddebugger/firebase_client.py @@ -373,7 +373,8 @@ def _ActiveBreakpointCallback(self, event): # If deleting, event.path will be /{breakpointid} if event.path != '/': breakpoint_id = event.path[1:] - del self._breakpoints[breakpoint_id] + # Breakpoint may have already been deleted, so pop for possible no-op. + self._breakpoints.pop(breakpoint_id, None) else: if event.path == '/': # New set of breakpoints. diff --git a/tests/firebase_client_test.py b/tests/firebase_client_test.py index 99057f9..cf60e3e 100644 --- a/tests/firebase_client_test.py +++ b/tests/firebase_client_test.py @@ -219,9 +219,12 @@ def callback(self, new_breakpoints): }, ] - expected_results = [[breakpoints[0]], [breakpoints[0], breakpoints[1]], + expected_results = [[breakpoints[0]], + [breakpoints[0], breakpoints[1]], [breakpoints[0], breakpoints[1], breakpoints[2]], - [breakpoints[1], breakpoints[2]]] + [breakpoints[1], breakpoints[2]], + [breakpoints[1], breakpoints[2]] + ] result_checker = ResultChecker(expected_results, self) self._client.on_active_breakpoints_changed = result_checker.callback @@ -231,12 +234,19 @@ def callback(self, new_breakpoints): self._client.subscription_complete.wait() # Send in updates to trigger the subscription callback. + + # Initial state. self._fake_subscribe_ref.update('put', '/', {breakpoints[0]['id']: breakpoints[0]}) + # Add a breakpoint via patch. self._fake_subscribe_ref.update('patch', '/', {breakpoints[1]['id']: breakpoints[1]}) + # Add a breakpoint via put. self._fake_subscribe_ref.update('put', f'/{breakpoints[2]["id"]}', breakpoints[2]) + # Delete a breakpoint. + self._fake_subscribe_ref.update('put', f'/{breakpoints[0]["id"]}', None) + # Delete the breakpoint a second time; should handle this gracefully. self._fake_subscribe_ref.update('put', f'/{breakpoints[0]["id"]}', None) self.assertEqual(len(expected_results), result_checker._change_count) From e21c3333e3e3ecd3330a5b8a58b1317ecd0ade79 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 8 Dec 2022 15:21:53 -0500 Subject: [PATCH 218/241] docs: Add instructions for development (#61) * docs: Add instructions for development * clarify that build & release is for project owners --- README.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/README.md b/README.md index 51adde7..15be0b9 100644 --- a/README.md +++ b/README.md @@ -300,3 +300,52 @@ environment variable. It expects a boolean value (`True`/`False`) or a string, with `'True'` interpreted as `True` and any other string interpreted as `False`). If not provided, the breakpoint canarying will not be enabled. + +## Development + +The following instructions are intended to help with modifying the codebase. + +### Testing + +#### Unit tests + +Run the `build_and_test.sh` script from the root of the repository to build and +run the unit tests using the locally installed version of Python. + +#### Local development + +You may want to run an agent with local changes in an application in order to +validate functionality in a way that unit tests don't fully cover. To do this, +you will need to build the agent: +``` +cd src +./build.sh +cd .. +``` + +The built agent will be available in the `src/dist` directory. You can now +force the installation of the agent using: +``` +pip3 install src/dist/* --force-reinstall +``` + +You can now run your test application using the development build of the agent +in whatever way you desire. + +It is recommended that you do this within a +[virtual environment](https://docs.python.org/3/library/venv.html). + +### Build & Release (for project owners) + +Before performing a release, be sure to update the version number in +`src/googleclouddebugger/version.py`. Tag the commit that increments the +version number (eg. `v3.1`) and create a Github release. + +Run the `build-dist.sh` script from the root of the repository to build, +test, and generate the distribution whls. You may need to use `sudo` +depending on your system's docker setup. + +Build artifacts will be placed in `/dist` and can be pushed to pypi by running: +``` +twine upload dist/*.whl +``` From 58e483de352658dbd3731e73a494eaf261b35e87 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 8 Dec 2022 15:22:39 -0500 Subject: [PATCH 219/241] chore: bump firebase-admin version (#62) 5.4.0 deprecated support for Python 3.6 and 6.0.0 removed it, while this library still maintains support for 3.6. For now, I'm changing the restriction to firebase_admin>=5.3.0 which should address the concerns. --- requirements.txt | 2 +- src/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4de89aa..13f973e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ google-auth>=1.0.0 google-auth-httplib2 google-api-python-client google-api-core -firebase_admin==5.2.0 +firebase_admin>=5.3.0 pyyaml diff --git a/src/setup.py b/src/setup.py index 61c8262..0c24bad 100644 --- a/src/setup.py +++ b/src/setup.py @@ -105,7 +105,7 @@ def ReadConfig(section, value, default): 'google-auth>=1.0.0', 'google-auth-httplib2', 'google-api-core', - 'firebase-admin==5.2.0', + 'firebase-admin>=5.3.0', 'pyyaml', ], packages=['googleclouddebugger'], From d31385422dc4c1b46231a53ea4a94fe099cc8355 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 15 Dec 2022 12:36:54 -0500 Subject: [PATCH 220/241] feat: add active debuggee support (#64) * add registration and update timestamps on register * add periodic marking of active debuggee fixes #63 --- firebase-sample/app.py | 5 +- src/googleclouddebugger/firebase_client.py | 66 +++++++++++++-- tests/firebase_client_test.py | 93 +++++++++++++++++----- 3 files changed, 137 insertions(+), 27 deletions(-) diff --git a/firebase-sample/app.py b/firebase-sample/app.py index 725c7ab..0916e7c 100644 --- a/firebase-sample/app.py +++ b/firebase-sample/app.py @@ -1,11 +1,12 @@ import googleclouddebugger -googleclouddebugger.enable(use_firebase= True) + +googleclouddebugger.enable(use_firebase=True) from flask import Flask app = Flask(__name__) + @app.route("/") def hello_world(): return "

Hello World!

" - diff --git a/src/googleclouddebugger/firebase_client.py b/src/googleclouddebugger/firebase_client.py index 0fd3fd0..be59a3c 100644 --- a/src/googleclouddebugger/firebase_client.py +++ b/src/googleclouddebugger/firebase_client.py @@ -102,6 +102,8 @@ def __init__(self): self._transmission_thread = None self._transmission_thread_startup_lock = threading.Lock() self._transmission_queue = deque(maxlen=100) + self._mark_active_timer = None + self._mark_active_interval_sec = 60 * 60 # 1 hour in seconds self._new_updates = threading.Event() self._breakpoint_subscription = None @@ -206,7 +208,8 @@ def SetupAuth(self, try: r = requests.get( f'{_METADATA_SERVER_URL}/project/project-id', - headers={'Metadata-Flavor': 'Google'}) + headers={'Metadata-Flavor': 'Google'}, + timeout=1) project_id = r.text except requests.exceptions.RequestException: native.LogInfo('Metadata server not available') @@ -246,6 +249,10 @@ def Stop(self): self._transmission_thread.join() self._transmission_thread = None + if self._mark_active_timer is not None: + self._mark_active_timer.cancel() + self._mark_active_timer = None + if self._breakpoint_subscription is not None: self._breakpoint_subscription.close() self._breakpoint_subscription = None @@ -302,6 +309,8 @@ def _MainThreadProc(self): subscription_required, delay = self._SubscribeToBreakpoints() self.subscription_complete.set() + self._StartMarkActiveTimer() + def _TransmissionThreadProc(self): """Entry point for the transmission worker thread.""" @@ -312,6 +321,22 @@ def _TransmissionThreadProc(self): self._new_updates.wait(delay) + def _MarkActiveTimerFunc(self): + """Entry point for the mark active timer.""" + + try: + self._MarkDebuggeeActive() + except: + native.LogInfo( + f'Failed to mark debuggee as active: {traceback.format_exc()}') + finally: + self._StartMarkActiveTimer() + + def _StartMarkActiveTimer(self): + self._mark_active_timer = threading.Timer(self._mark_active_interval_sec, + self._MarkActiveTimerFunc) + self._mark_active_timer.start() + def _RegisterDebuggee(self): """Single attempt to register the debuggee. @@ -334,12 +359,21 @@ def _RegisterDebuggee(self): return (True, self.register_backoff.Failed()) try: - debuggee_path = f'cdbg/debuggees/{self._debuggee_id}' - native.LogInfo( - f'registering at {self._database_url}, path: {debuggee_path}') - firebase_admin.db.reference(debuggee_path).set(debuggee) + present = self._CheckDebuggeePresence() + if present: + self._MarkDebuggeeActive() + else: + debuggee_path = f'cdbg/debuggees/{self._debuggee_id}' + native.LogInfo( + f'registering at {self._database_url}, path: {debuggee_path}') + debuggee_data = copy.deepcopy(debuggee) + debuggee_data['registrationTimeUnixMsec'] = {'.sv': 'timestamp'} + debuggee_data['lastUpdateTimeUnixMsec'] = {'.sv': 'timestamp'} + firebase_admin.db.reference(debuggee_path).set(debuggee_data) + native.LogInfo( f'Debuggee registered successfully, ID: {self._debuggee_id}') + self.register_backoff.Succeeded() return (False, 0) # Proceed immediately to subscribing to breakpoints. except BaseException: @@ -348,6 +382,26 @@ def _RegisterDebuggee(self): native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}') return (True, self.register_backoff.Failed()) + def _CheckDebuggeePresence(self): + path = f'cdbg/debuggees/{self._debuggee_id}/registrationTimeUnixMsec' + try: + snapshot = firebase_admin.db.reference(path).get() + # The value doesn't matter; just return true if there's any value. + return snapshot is not None + except BaseException: + native.LogInfo( + f'Failed to check debuggee presence: {traceback.format_exc()}') + return False + + def _MarkDebuggeeActive(self): + active_path = f'cdbg/debuggees/{self._debuggee_id}/lastUpdateTimeUnixMsec' + try: + server_time = {'.sv': 'timestamp'} + firebase_admin.db.reference(active_path).set(server_time) + except BaseException: + native.LogInfo( + f'Failed to mark debuggee active: {traceback.format_exc()}') + def _SubscribeToBreakpoints(self): # Kill any previous subscriptions first. if self._breakpoint_subscription is not None: @@ -374,7 +428,7 @@ def _ActiveBreakpointCallback(self, event): if event.path != '/': breakpoint_id = event.path[1:] # Breakpoint may have already been deleted, so pop for possible no-op. - self._breakpoints.pop(breakpoint_id, None) + self._breakpoints.pop(breakpoint_id, None) else: if event.path == '/': # New set of breakpoints. diff --git a/tests/firebase_client_test.py b/tests/firebase_client_test.py index cf60e3e..5cd8fb6 100644 --- a/tests/firebase_client_test.py +++ b/tests/firebase_client_test.py @@ -1,5 +1,6 @@ """Unit tests for firebase_client module.""" +import copy import os import sys import tempfile @@ -77,10 +78,14 @@ def setUp(self): self.addCleanup(patcher.stop) # Set up the mocks for the database refs. + self._mock_presence_ref = MagicMock() + self._mock_presence_ref.get.return_value = None + self._mock_active_ref = MagicMock() self._mock_register_ref = MagicMock() self._fake_subscribe_ref = FakeReference() self._mock_db_ref.side_effect = [ - self._mock_register_ref, self._fake_subscribe_ref + self._mock_presence_ref, self._mock_register_ref, + self._fake_subscribe_ref ] def tearDown(self): @@ -139,18 +144,46 @@ def testStart(self): self._mock_initialize_app.assert_called_with( None, {'databaseURL': f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com'}) self.assertEqual([ + call(f'cdbg/debuggees/{debuggee_id}/registrationTimeUnixMsec'), call(f'cdbg/debuggees/{debuggee_id}'), call(f'cdbg/breakpoints/{debuggee_id}/active') ], self._mock_db_ref.call_args_list) # Verify that the register call has been made. - self._mock_register_ref.set.assert_called_once_with( - self._client._GetDebuggee()) + expected_data = copy.deepcopy(self._client._GetDebuggee()) + expected_data['registrationTimeUnixMsec'] = {'.sv': 'timestamp'} + expected_data['lastUpdateTimeUnixMsec'] = {'.sv': 'timestamp'} + self._mock_register_ref.set.assert_called_once_with(expected_data) + + def testStartAlreadyPresent(self): + # Create a mock for just this test that claims the debuggee is registered. + mock_presence_ref = MagicMock() + mock_presence_ref.get.return_value = 'present!' + + self._mock_db_ref.side_effect = [ + mock_presence_ref, self._mock_active_ref, self._fake_subscribe_ref + ] + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() + + debuggee_id = self._client._debuggee_id + + self.assertEqual([ + call(f'cdbg/debuggees/{debuggee_id}/registrationTimeUnixMsec'), + call(f'cdbg/debuggees/{debuggee_id}/lastUpdateTimeUnixMsec'), + call(f'cdbg/breakpoints/{debuggee_id}/active') + ], self._mock_db_ref.call_args_list) + + # Verify that the register call has been made. + self._mock_active_ref.set.assert_called_once_with({'.sv': 'timestamp'}) def testStartRegisterRetry(self): - # A new db ref is fetched on each retry. + # A new set of db refs are fetched on each retry. self._mock_db_ref.side_effect = [ - self._mock_register_ref, self._mock_register_ref, + self._mock_presence_ref, self._mock_register_ref, + self._mock_presence_ref, self._mock_register_ref, self._fake_subscribe_ref ] @@ -169,6 +202,7 @@ def testStartSubscribeRetry(self): # A new db ref is fetched on each retry. self._mock_db_ref.side_effect = [ + self._mock_presence_ref, self._mock_register_ref, mock_subscribe_ref, # Fail the first time self._fake_subscribe_ref # Succeed the second time @@ -178,7 +212,28 @@ def testStartSubscribeRetry(self): self._client.Start() self._client.subscription_complete.wait() - self.assertEqual(3, self._mock_db_ref.call_count) + self.assertEqual(4, self._mock_db_ref.call_count) + + def testMarkActiveTimer(self): + # Make sure that there are enough refs queued up. + refs = list(self._mock_db_ref.side_effect) + refs.extend([self._mock_active_ref] * 10) + self._mock_db_ref.side_effect = refs + + # Speed things WAY up rather than waiting for hours. + self._client._mark_active_interval_sec = 0.1 + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() + + # wait long enough for the timer to trigger a few times. + time.sleep(0.5) + + print(f'Timer triggered {self._mock_active_ref.set.call_count} times') + self.assertTrue(self._mock_active_ref.set.call_count > 3) + self._mock_active_ref.set.assert_called_with({'.sv': 'timestamp'}) + def testBreakpointSubscription(self): # This class will keep track of the breakpoint updates and will check @@ -219,12 +274,10 @@ def callback(self, new_breakpoints): }, ] - expected_results = [[breakpoints[0]], - [breakpoints[0], breakpoints[1]], + expected_results = [[breakpoints[0]], [breakpoints[0], breakpoints[1]], [breakpoints[0], breakpoints[1], breakpoints[2]], [breakpoints[1], breakpoints[2]], - [breakpoints[1], breakpoints[2]] - ] + [breakpoints[1], breakpoints[2]]] result_checker = ResultChecker(expected_results, self) self._client.on_active_breakpoints_changed = result_checker.callback @@ -257,8 +310,9 @@ def testEnqueueBreakpointUpdate(self): final_ref_mock = MagicMock() self._mock_db_ref.side_effect = [ - self._mock_register_ref, self._fake_subscribe_ref, active_ref_mock, - snapshot_ref_mock, final_ref_mock + self._mock_presence_ref, self._mock_register_ref, + self._fake_subscribe_ref, active_ref_mock, snapshot_ref_mock, + final_ref_mock ] self._client.SetupAuth(project_id=TEST_PROJECT_ID) @@ -316,13 +370,13 @@ def testEnqueueBreakpointUpdate(self): db_ref_calls = self._mock_db_ref.call_args_list self.assertEqual( call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}'), - db_ref_calls[2]) + db_ref_calls[3]) self.assertEqual( call(f'cdbg/breakpoints/{debuggee_id}/snapshot/{breakpoint_id}'), - db_ref_calls[3]) + db_ref_calls[4]) self.assertEqual( call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}'), - db_ref_calls[4]) + db_ref_calls[5]) active_ref_mock.delete.assert_called_once() snapshot_ref_mock.set.assert_called_once_with(full_breakpoint) @@ -333,8 +387,8 @@ def testEnqueueBreakpointUpdateWithLogpoint(self): final_ref_mock = MagicMock() self._mock_db_ref.side_effect = [ - self._mock_register_ref, self._fake_subscribe_ref, active_ref_mock, - final_ref_mock + self._mock_presence_ref, self._mock_register_ref, + self._fake_subscribe_ref, active_ref_mock, final_ref_mock ] self._client.SetupAuth(project_id=TEST_PROJECT_ID) @@ -383,10 +437,10 @@ def testEnqueueBreakpointUpdateWithLogpoint(self): db_ref_calls = self._mock_db_ref.call_args_list self.assertEqual( call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}'), - db_ref_calls[2]) + db_ref_calls[3]) self.assertEqual( call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}'), - db_ref_calls[3]) + db_ref_calls[4]) active_ref_mock.delete.assert_called_once() final_ref_mock.set.assert_called_once_with(output_breakpoint) @@ -414,6 +468,7 @@ def testEnqueueBreakpointUpdateRetry(self): ] self._mock_db_ref.side_effect = [ + self._mock_presence_ref, self._mock_register_ref, self._fake_subscribe_ref, # setup active_ref_mock, # attempt 1 From 8375462e56c91705d8f79b76c524ea14de3eeca5 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 15 Dec 2022 13:07:44 -0500 Subject: [PATCH 221/241] chore: release version 3.2 (#65) --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index a52b8ed..267e47d 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '3.1' +__version__ = '3.2' From 9af555a5be721395d3c4c12766a113230f857821 Mon Sep 17 00:00:00 2001 From: jasonborg <48138260+jasonborg@users.noreply.github.com> Date: Tue, 10 Jan 2023 10:22:01 -0500 Subject: [PATCH 222/241] docs: Update Firebase notes in README (#66) --- README.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 15be0b9..9e364c5 100644 --- a/README.md +++ b/README.md @@ -224,14 +224,16 @@ Alternatively, you can pass the `--noreload` flag when running the Django using the `--noreload` flag disables the autoreload feature in Django, which means local changes to files will not be automatically picked up by Django. -### Experimental Firebase Realtime Database Backend +### Snapshot Debugger - Firebase Realtime Database Backend -This functionality is available for release 3.0 onward of this agent. +This functionality is available for release 3.0 onward of this agent and +provides support for the Snapshot Debugger, which is being provided as a +replacement for the deprecated Cloud Debugger service. The agent can be configured to use Firebase Realtime Database as a backend -instead of the deprecated Cloud Debugger service. If the Firebase backend is -used, breakpoints can be viewed and set using the Snapshot Debugger CLI instead -of the Cloud Console. +instead of the Cloud Debugger service. If the Firebase backend is used, +breakpoints can be viewed and set using the Snapshot Debugger CLI instead of the +Cloud Console. To use the Firebase backend, set the flag when enabling the agent: @@ -258,7 +260,8 @@ except ImportError: pass ``` -See https://github.com/GoogleCloudPlatform/snapshot-debugger for more details. +See https://github.com/GoogleCloudPlatform/snapshot-debugger and +https://cloud.google.com/debugger/docs/deprecations for more details. ## Flag Reference From d59817ba838e5201072e840084a172521dbc4196 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 23 Jan 2023 10:57:19 -0500 Subject: [PATCH 223/241] chore: Add first c++ unit tests. (#67) Tests are added under tests/cpp and require bazel to run --- .gitignore | 1 + BUILD | 2 + README.md | 3 + WORKSPACE | 50 ++ cc-tests/BUILD | 10 + src/googleclouddebugger/BUILD | 102 +++ tests/cpp/BUILD | 10 + tests/cpp/bytecode_manipulator_test.cc | 971 +++++++++++++++++++++++++ 8 files changed, 1149 insertions(+) create mode 100644 BUILD create mode 100644 WORKSPACE create mode 100644 cc-tests/BUILD create mode 100644 src/googleclouddebugger/BUILD create mode 100644 tests/cpp/BUILD create mode 100644 tests/cpp/bytecode_manipulator_test.cc diff --git a/.gitignore b/.gitignore index 4041f03..ab9464c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__/ *.egg-info/ .coverage +/bazel-* diff --git a/BUILD b/BUILD new file mode 100644 index 0000000..ae821f1 --- /dev/null +++ b/BUILD @@ -0,0 +1,2 @@ +package(default_visibility = ["//visibility:public"]) + diff --git a/README.md b/README.md index 9e364c5..95aaac8 100644 --- a/README.md +++ b/README.md @@ -315,6 +315,9 @@ The following instructions are intended to help with modifying the codebase. Run the `build_and_test.sh` script from the root of the repository to build and run the unit tests using the locally installed version of Python. +Run `bazel test tests/cpp:all` from the root of the repository to run unit +tests against the C++ portion of the codebase. + #### Local development You may want to run an agent with local changes in an application in order to diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 0000000..55013f2 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,50 @@ +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "bazel_skylib", + sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + ], +) +load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace") +bazel_skylib_workspace() + +http_archive( + name = "com_github_gflags_gflags", + sha256 = "34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf", + strip_prefix = "gflags-2.2.2", + urls = ["https://github.com/gflags/gflags/archive/v2.2.2.tar.gz"], +) + +http_archive( + name = "com_github_google_glog", + sha256 = "21bc744fb7f2fa701ee8db339ded7dce4f975d0d55837a97be7d46e8382dea5a", + strip_prefix = "glog-0.5.0", + urls = ["https://github.com/google/glog/archive/v0.5.0.zip"], +) + +# Pinning to 1.12.1, the last release that supports C++11 +http_archive( + name = "com_google_googletest", + urls = ["https://github.com/google/googletest/archive/58d77fa8070e8cec2dc1ed015d66b454c8d78850.tar.gz"], + strip_prefix = "googletest-58d77fa8070e8cec2dc1ed015d66b454c8d78850", +) + +# Used to build against Python.h +http_archive( + name = "pybind11_bazel", + strip_prefix = "pybind11_bazel-faf56fb3df11287f26dbc66fdedf60a2fc2c6631", + urls = ["https://github.com/pybind/pybind11_bazel/archive/faf56fb3df11287f26dbc66fdedf60a2fc2c6631.zip"], +) + +http_archive( + name = "pybind11", + build_file = "@pybind11_bazel//:pybind11.BUILD", + strip_prefix = "pybind11-2.9.2", + urls = ["https://github.com/pybind/pybind11/archive/v2.9.2.tar.gz"], +) +load("@pybind11_bazel//:python_configure.bzl", "python_configure") +python_configure(name = "local_config_python")#, python_interpreter_target = interpreter) + diff --git a/cc-tests/BUILD b/cc-tests/BUILD new file mode 100644 index 0000000..7536fe9 --- /dev/null +++ b/cc-tests/BUILD @@ -0,0 +1,10 @@ +package(default_visibility = ["//visibility:public"]) + +cc_test( + name = "bytecode_manipulator_test", + srcs = ["bytecode_manipulator_test.cc"], + deps = [ + "//src/googleclouddebugger:bytecode_manipulator", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/src/googleclouddebugger/BUILD b/src/googleclouddebugger/BUILD new file mode 100644 index 0000000..dedf1aa --- /dev/null +++ b/src/googleclouddebugger/BUILD @@ -0,0 +1,102 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "common", + hdrs = ["common.h"], + deps = [ + "@com_github_google_glog//:glog", + "@local_config_python//:python_headers", + ], +) + +cc_library( + name = "nullable", + hdrs = ["nullable.h"], + deps = [ + ":common", + ], +) + +cc_library( + name = "python_util", + srcs = ["python_util.cc"], + hdrs = ["python_util.h"], + deps = [ + ":common", + ":nullable", + ], +) + + +cc_library( + name = "python_callback", + srcs = ["python_callback.cc"], + hdrs = ["python_callback.h"], + deps = [ + ":common", + ":python_util", + ], +) + +cc_library( + name = "leaky_bucket", + srcs = ["leaky_bucket.cc"], + hdrs = ["leaky_bucket.h"], + deps = [ + ":common", + ], +) + +cc_library( + name = "rate_limit", + srcs = ["rate_limit.cc"], + hdrs = ["rate_limit.h"], + deps = [ + ":common", + ":leaky_bucket", + ], +) + +cc_library( + name = "bytecode_manipulator", + srcs = ["bytecode_manipulator.cc"], + hdrs = ["bytecode_manipulator.h"], + deps = [ + ":common", + ], +) + +cc_library( + name = "bytecode_breakpoint", + srcs = ["bytecode_breakpoint.cc"], + hdrs = ["bytecode_breakpoint.h"], + deps = [ + ":bytecode_manipulator", + ":common", + ":python_callback", + ":python_util", + ], +) + +cc_library( + name = "immutability_tracer", + srcs = ["immutability_tracer.cc"], + hdrs = ["immutability_tracer.h"], + deps = [ + ":common", + ":python_util", + ], +) + +cc_library( + name = "conditional_breakpoint", + srcs = ["conditional_breakpoint.cc"], + hdrs = ["conditional_breakpoint.h"], + deps = [ + ":common", + ":immutability_tracer", + ":python_util", + ":rate_limit", + ":leaky_bucket", + ], +) diff --git a/tests/cpp/BUILD b/tests/cpp/BUILD new file mode 100644 index 0000000..7536fe9 --- /dev/null +++ b/tests/cpp/BUILD @@ -0,0 +1,10 @@ +package(default_visibility = ["//visibility:public"]) + +cc_test( + name = "bytecode_manipulator_test", + srcs = ["bytecode_manipulator_test.cc"], + deps = [ + "//src/googleclouddebugger:bytecode_manipulator", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tests/cpp/bytecode_manipulator_test.cc b/tests/cpp/bytecode_manipulator_test.cc new file mode 100644 index 0000000..6f21ae1 --- /dev/null +++ b/tests/cpp/bytecode_manipulator_test.cc @@ -0,0 +1,971 @@ +/** + * Copyright 2023 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/googleclouddebugger/bytecode_manipulator.h" + +#include +#include + +namespace devtools { +namespace cdbg { + +static std::string FormatOpcode(uint8_t opcode) { + switch (opcode) { + case POP_TOP: return "POP_TOP"; + case ROT_TWO: return "ROT_TWO"; + case ROT_THREE: return "ROT_THREE"; + case DUP_TOP: return "DUP_TOP"; + case NOP: return "NOP"; + case UNARY_POSITIVE: return "UNARY_POSITIVE"; + case UNARY_NEGATIVE: return "UNARY_NEGATIVE"; + case UNARY_NOT: return "UNARY_NOT"; + case UNARY_INVERT: return "UNARY_INVERT"; + case BINARY_POWER: return "BINARY_POWER"; + case BINARY_MULTIPLY: return "BINARY_MULTIPLY"; + case BINARY_MODULO: return "BINARY_MODULO"; + case BINARY_ADD: return "BINARY_ADD"; + case BINARY_SUBTRACT: return "BINARY_SUBTRACT"; + case BINARY_SUBSCR: return "BINARY_SUBSCR"; + case BINARY_FLOOR_DIVIDE: return "BINARY_FLOOR_DIVIDE"; + case BINARY_TRUE_DIVIDE: return "BINARY_TRUE_DIVIDE"; + case INPLACE_FLOOR_DIVIDE: return "INPLACE_FLOOR_DIVIDE"; + case INPLACE_TRUE_DIVIDE: return "INPLACE_TRUE_DIVIDE"; + case INPLACE_ADD: return "INPLACE_ADD"; + case INPLACE_SUBTRACT: return "INPLACE_SUBTRACT"; + case INPLACE_MULTIPLY: return "INPLACE_MULTIPLY"; + case INPLACE_MODULO: return "INPLACE_MODULO"; + case STORE_SUBSCR: return "STORE_SUBSCR"; + case DELETE_SUBSCR: return "DELETE_SUBSCR"; + case BINARY_LSHIFT: return "BINARY_LSHIFT"; + case BINARY_RSHIFT: return "BINARY_RSHIFT"; + case BINARY_AND: return "BINARY_AND"; + case BINARY_XOR: return "BINARY_XOR"; + case BINARY_OR: return "BINARY_OR"; + case INPLACE_POWER: return "INPLACE_POWER"; + case GET_ITER: return "GET_ITER"; + case PRINT_EXPR: return "PRINT_EXPR"; + case INPLACE_LSHIFT: return "INPLACE_LSHIFT"; + case INPLACE_RSHIFT: return "INPLACE_RSHIFT"; + case INPLACE_AND: return "INPLACE_AND"; + case INPLACE_XOR: return "INPLACE_XOR"; + case INPLACE_OR: return "INPLACE_OR"; + case RETURN_VALUE: return "RETURN_VALUE"; + case IMPORT_STAR: return "IMPORT_STAR"; + case YIELD_VALUE: return "YIELD_VALUE"; + case POP_BLOCK: return "POP_BLOCK"; +#if PY_VERSION_HEX <= 0x03080000 + case END_FINALLY: return "END_FINALLY"; +#endif + case STORE_NAME: return "STORE_NAME"; + case DELETE_NAME: return "DELETE_NAME"; + case UNPACK_SEQUENCE: return "UNPACK_SEQUENCE"; + case FOR_ITER: return "FOR_ITER"; + case LIST_APPEND: return "LIST_APPEND"; + case STORE_ATTR: return "STORE_ATTR"; + case DELETE_ATTR: return "DELETE_ATTR"; + case STORE_GLOBAL: return "STORE_GLOBAL"; + case DELETE_GLOBAL: return "DELETE_GLOBAL"; + case LOAD_CONST: return "LOAD_CONST"; + case LOAD_NAME: return "LOAD_NAME"; + case BUILD_TUPLE: return "BUILD_TUPLE"; + case BUILD_LIST: return "BUILD_LIST"; + case BUILD_SET: return "BUILD_SET"; + case BUILD_MAP: return "BUILD_MAP"; + case LOAD_ATTR: return "LOAD_ATTR"; + case COMPARE_OP: return "COMPARE_OP"; + case IMPORT_NAME: return "IMPORT_NAME"; + case IMPORT_FROM: return "IMPORT_FROM"; + case JUMP_FORWARD: return "JUMP_FORWARD"; + case JUMP_IF_FALSE_OR_POP: return "JUMP_IF_FALSE_OR_POP"; + case JUMP_IF_TRUE_OR_POP: return "JUMP_IF_TRUE_OR_POP"; + case JUMP_ABSOLUTE: return "JUMP_ABSOLUTE"; + case POP_JUMP_IF_FALSE: return "POP_JUMP_IF_FALSE"; + case POP_JUMP_IF_TRUE: return "POP_JUMP_IF_TRUE"; + case LOAD_GLOBAL: return "LOAD_GLOBAL"; + case SETUP_FINALLY: return "SETUP_FINALLY"; + case LOAD_FAST: return "LOAD_FAST"; + case STORE_FAST: return "STORE_FAST"; + case DELETE_FAST: return "DELETE_FAST"; + case RAISE_VARARGS: return "RAISE_VARARGS"; + case CALL_FUNCTION: return "CALL_FUNCTION"; + case MAKE_FUNCTION: return "MAKE_FUNCTION"; + case BUILD_SLICE: return "BUILD_SLICE"; + case LOAD_CLOSURE: return "LOAD_CLOSURE"; + case LOAD_DEREF: return "LOAD_DEREF"; + case STORE_DEREF: return "STORE_DEREF"; + case CALL_FUNCTION_KW: return "CALL_FUNCTION_KW"; + case SETUP_WITH: return "SETUP_WITH"; + case EXTENDED_ARG: return "EXTENDED_ARG"; + case SET_ADD: return "SET_ADD"; + case MAP_ADD: return "MAP_ADD"; +#if PY_VERSION_HEX < 0x03080000 + case BREAK_LOOP: return "BREAK_LOOP"; + case CONTINUE_LOOP: return "CONTINUE_LOOP"; + case SETUP_LOOP: return "SETUP_LOOP"; + case SETUP_EXCEPT: return "SETUP_EXCEPT"; +#endif +#if PY_MAJOR_VERSION >= 3 + case DUP_TOP_TWO: return "DUP_TOP_TWO"; + case BINARY_MATRIX_MULTIPLY: return "BINARY_MATRIX_MULTIPLY"; + case INPLACE_MATRIX_MULTIPLY: return "INPLACE_MATRIX_MULTIPLY"; + case GET_AITER: return "GET_AITER"; + case GET_ANEXT: return "GET_ANEXT"; + case BEFORE_ASYNC_WITH: return "BEFORE_ASYNC_WITH"; + case GET_YIELD_FROM_ITER: return "GET_YIELD_FROM_ITER"; + case LOAD_BUILD_CLASS: return "LOAD_BUILD_CLASS"; + case YIELD_FROM: return "YIELD_FROM"; + case GET_AWAITABLE: return "GET_AWAITABLE"; +#if PY_VERSION_HEX <= 0x03080000 + case WITH_CLEANUP_START: return "WITH_CLEANUP_START"; + case WITH_CLEANUP_FINISH: return "WITH_CLEANUP_FINISH"; +#endif + case SETUP_ANNOTATIONS: return "SETUP_ANNOTATIONS"; + case POP_EXCEPT: return "POP_EXCEPT"; + case UNPACK_EX: return "UNPACK_EX"; +#if PY_VERSION_HEX < 0x03070000 + case STORE_ANNOTATION: return "STORE_ANNOTATION"; +#endif + case CALL_FUNCTION_EX: return "CALL_FUNCTION_EX"; + case LOAD_CLASSDEREF: return "LOAD_CLASSDEREF"; +#if PY_VERSION_HEX <= 0x03080000 + case BUILD_LIST_UNPACK: return "BUILD_LIST_UNPACK"; + case BUILD_MAP_UNPACK: return "BUILD_MAP_UNPACK"; + case BUILD_MAP_UNPACK_WITH_CALL: return "BUILD_MAP_UNPACK_WITH_CALL"; + case BUILD_TUPLE_UNPACK: return "BUILD_TUPLE_UNPACK"; + case BUILD_SET_UNPACK: return "BUILD_SET_UNPACK"; +#endif + case SETUP_ASYNC_WITH: return "SETUP_ASYNC_WITH"; + case FORMAT_VALUE: return "FORMAT_VALUE"; + case BUILD_CONST_KEY_MAP: return "BUILD_CONST_KEY_MAP"; + case BUILD_STRING: return "BUILD_STRING"; +#if PY_VERSION_HEX <= 0x03080000 + case BUILD_TUPLE_UNPACK_WITH_CALL: return "BUILD_TUPLE_UNPACK_WITH_CALL"; +#endif +#if PY_VERSION_HEX >= 0x03070000 + case LOAD_METHOD: return "LOAD_METHOD"; + case CALL_METHOD: return "CALL_METHOD"; +#endif +#if PY_VERSION_HEX >= 0x03080000 && PY_VERSION_HEX < 0x03090000 + case BEGIN_FINALLY: return "BEGIN_FINALLY": + case POP_FINALLY: return "POP_FINALLY"; +#endif +#if PY_VERSION_HEX >= 0x03080000 + case ROT_FOUR: return "ROT_FOUR"; + case END_ASYNC_FOR: return "END_ASYNC_FOR"; +#endif +#if PY_VERSION_HEX >= 0x03080000 && PY_VERSION_HEX < 0x03090000 + // Added in Python 3.8 and removed in 3.9 + case CALL_FINALLY: return "CALL_FINALLY"; +#endif +#if PY_VERSION_HEX >= 0x03090000 + case RERAISE: return "RERAISE"; + case WITH_EXCEPT_START: return "WITH_EXCEPT_START"; + case LOAD_ASSERTION_ERROR: return "LOAD_ASSERTION_ERROR"; + case LIST_TO_TUPLE: return "LIST_TO_TUPLE"; + case IS_OP: return "IS_OP"; + case CONTAINS_OP: return "CONTAINS_OP"; + case JUMP_IF_NOT_EXC_MATCH: return "JUMP_IF_NOT_EXC_MATCH"; + case LIST_EXTEND: return "LIST_EXTEND"; + case SET_UPDATE: return "SET_UPDATE"; + case DICT_MERGE: return "DICT_MERGE"; + case DICT_UPDATE: return "DICT_UPDATE"; +#endif +#else + case STOP_CODE: return "STOP_CODE"; + case ROT_FOUR: return "ROT_FOUR"; + case UNARY_CONVERT: return "UNARY_CONVERT"; + case BINARY_DIVIDE: return "BINARY_DIVIDE"; + case SLICE: return "SLICE"; + case SLICE_1: return "SLICE_1"; + case SLICE_2: return "SLICE_2"; + case SLICE_3: return "SLICE_3"; + case STORE_SLICE: return "STORE_SLICE"; + case STORE_SLICE_1: return "STORE_SLICE_1"; + case STORE_SLICE_2: return "STORE_SLICE_2"; + case STORE_SLICE_3: return "STORE_SLICE_3"; + case DELETE_SLICE: return "DELETE_SLICE"; + case DELETE_SLICE_1: return "DELETE_SLICE_1"; + case DELETE_SLICE_2: return "DELETE_SLICE_2"; + case DELETE_SLICE_3: return "DELETE_SLICE_3"; + case STORE_MAP: return "STORE_MAP"; + case INPLACE_DIVIDE: return "INPLACE_DIVIDE"; + case PRINT_NEWLINE: return "PRINT_NEWLINE"; + case PRINT_ITEM: return "PRINT_ITEM"; + case PRINT_ITEM_TO: return "PRINT_ITEM_TO"; + case PRINT_NEWLINE_TO: return "PRINT_NEWLINE_TO"; + case LOAD_LOCALS: return "LOAD_LOCALS"; + case EXEC_STMT: return "EXEC_STMT"; + case BUILD_CLASS: return "BUILD_CLASS"; + case DUP_TOPX: return "DUP_TOPX"; + case MAKE_CLOSURE: return "MAKE_CLOSURE"; + case CALL_FUNCTION_VAR: return "CALL_FUNCTION_VAR"; + case CALL_FUNCTION_VAR_KW: return "CALL_FUNCTION_VAR_KW"; + case WITH_CLEANUP: return "WITH_CLEANUP"; +#endif + default: return std::to_string(static_cast(opcode)); + } +} + +static std::string FormatBytecode(const std::vector& bytecode, + int indent) { + std::string rc; + int remaining_argument_bytes = 0; + for (auto it = bytecode.begin(); it != bytecode.end(); ++it) { + std::string line; + if (remaining_argument_bytes == 0) { + line = FormatOpcode(*it); + remaining_argument_bytes = 1; + } else { + line = std::to_string(static_cast(*it)); + --remaining_argument_bytes; + } + + if (it < bytecode.end() - 1) { + line += ','; + } + + line.resize(20, ' '); + line += "// offset "; + line += std::to_string(it - bytecode.begin()); + line += '.'; + + rc += std::string(indent, ' '); + rc += line; + + if (it < bytecode.end() - 1) { + rc += '\n'; + } + } + + return rc; +} + +static void VerifyBytecode(const BytecodeManipulator& bytecode_manipulator, + std::vector expected_bytecode) { + EXPECT_EQ(expected_bytecode, bytecode_manipulator.bytecode()) + << "Actual bytecode:\n" + << " {\n" + << FormatBytecode(bytecode_manipulator.bytecode(), 10) << "\n" + << " }"; +} + +static void VerifyLineNumbersTable( + const BytecodeManipulator& bytecode_manipulator, + std::vector expected_lnotab) { + // Convert to integers to better logging by EXPECT_EQ. + std::vector expected(expected_lnotab.begin(), expected_lnotab.end()); + std::vector actual( + bytecode_manipulator.lnotab().begin(), + bytecode_manipulator.lnotab().end()); + + EXPECT_EQ(expected, actual); +} + +TEST(BytecodeManipulatorTest, EmptyBytecode) { + BytecodeManipulator instance({}, false, {}); + EXPECT_FALSE(instance.InjectMethodCall(0, 0)); +} + + +TEST(BytecodeManipulatorTest, HasLineNumbersTable) { + BytecodeManipulator instance1({}, false, {}); + EXPECT_FALSE(instance1.has_lnotab()); + + BytecodeManipulator instance2({}, true, {}); + EXPECT_TRUE(instance2.has_lnotab()); +} + + + + +TEST(BytecodeManipulatorTest, InsertionSimple) { + BytecodeManipulator instance({ NOP, 0, RETURN_VALUE, 0 }, false, {}); + ASSERT_TRUE(instance.InjectMethodCall(2, 47)); + + VerifyBytecode( + instance, + { + NOP, // offset 0. + 0, // offset 1. + LOAD_CONST, // offset 4. + 47, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + RETURN_VALUE, // offset 10. + 0 // offset 11. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionExtended) { + BytecodeManipulator instance({ NOP, 0, RETURN_VALUE, 0 }, false, {}); + ASSERT_TRUE(instance.InjectMethodCall(2, 0x12345678)); + + VerifyBytecode( + instance, + { + NOP, // offset 0. + 0, // offset 1. + EXTENDED_ARG, // offset 2. + 0x12, // offset 3. + EXTENDED_ARG, // offset 2. + 0x34, // offset 3. + EXTENDED_ARG, // offset 2. + 0x56, // offset 3. + LOAD_CONST, // offset 4. + 0x78, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + RETURN_VALUE, // offset 10. + 0 // offset 11. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionBeginning) { + BytecodeManipulator instance({ NOP, 0, RETURN_VALUE, 0 }, false, {}); + ASSERT_TRUE(instance.InjectMethodCall(0, 47)); + + VerifyBytecode( + instance, + { + LOAD_CONST, // offset 0. + 47, // offset 1. + CALL_FUNCTION, // offset 2. + 0, // offset 3. + POP_TOP, // offset 4. + 0, // offset 5. + NOP, // offset 6. + 0, // offset 7. + RETURN_VALUE, // offset 8. + 0 // offset 9. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionOffsetUpdates) { + BytecodeManipulator instance( + { + JUMP_FORWARD, + 12, + NOP, + 0, + JUMP_ABSOLUTE, + 34, + }, + false, + {}); + ASSERT_TRUE(instance.InjectMethodCall(2, 47)); + + VerifyBytecode( + instance, + { + JUMP_FORWARD, // offset 0. + 12 + 6, // offset 1. + LOAD_CONST, // offset 2. + 47, // offset 3. + CALL_FUNCTION, // offset 4. + 0, // offset 5. + POP_TOP, // offset 6. + 0, // offset 7. + NOP, // offset 8. + 0, // offset 9. + JUMP_ABSOLUTE, // offset 10. + 34 + 6 // offset 11. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionExtendedOffsetUpdates) { + BytecodeManipulator instance( + { + EXTENDED_ARG, + 12, + EXTENDED_ARG, + 34, + EXTENDED_ARG, + 56, + JUMP_FORWARD, + 78, + NOP, + 0, + EXTENDED_ARG, + 98, + EXTENDED_ARG, + 76, + EXTENDED_ARG, + 54, + JUMP_ABSOLUTE, + 32 + }, + false, + {}); + ASSERT_TRUE(instance.InjectMethodCall(8, 11)); + + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 12, // offset 1. + EXTENDED_ARG, // offset 2. + 34, // offset 3. + EXTENDED_ARG, // offset 4. + 56, // offset 5. + JUMP_FORWARD, // offset 6. + 78 + 6, // offset 7. + LOAD_CONST, // offset 8. + 11, // offset 9. + CALL_FUNCTION, // offset 10. + 0, // offset 11. + POP_TOP, // offset 12. + 0, // offset 13. + NOP, // offset 14. + 0, // offset 15. + EXTENDED_ARG, // offset 16. + 98, // offset 17. + EXTENDED_ARG, // offset 18. + 76, // offset 19. + EXTENDED_ARG, // offset 20. + 54, // offset 21. + JUMP_ABSOLUTE, // offset 22. + 32 + 6 // offset 23. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionDeltaOffsetNoUpdate) { + BytecodeManipulator instance( + { + JUMP_FORWARD, + 2, + NOP, + 0, + RETURN_VALUE, + 0, + JUMP_FORWARD, + 2, + }, + false, {}); + ASSERT_TRUE(instance.InjectMethodCall(4, 99)); + + VerifyBytecode( + instance, + { + JUMP_FORWARD, // offset 0. + 2, // offset 1. + NOP, // offset 2. + 0, // offset 3. + LOAD_CONST, // offset 4. + 99, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + RETURN_VALUE, // offset 10. + 0, // offset 11. + JUMP_FORWARD, // offset 12. + 2 // offset 13. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionAbsoluteOffsetNoUpdate) { + BytecodeManipulator instance( + { + JUMP_ABSOLUTE, + 2, + RETURN_VALUE, + 0 + }, + false, + {}); + ASSERT_TRUE(instance.InjectMethodCall(2, 99)); + + VerifyBytecode( + instance, + { + JUMP_ABSOLUTE, // offset 0. + 2, // offset 1. + LOAD_CONST, // offset 2. + 99, // offset 3. + CALL_FUNCTION, // offset 4. + 0, // offset 5. + POP_TOP, // offset 6. + 0, // offset 7. + RETURN_VALUE, // offset 8. + 0 // offset 9. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionOffsetUneededExtended) { + BytecodeManipulator instance( + { EXTENDED_ARG, 0, JUMP_FORWARD, 2, NOP, 0 }, + false, + {}); + ASSERT_TRUE(instance.InjectMethodCall(4, 11)); + + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 0, // offset 1. + JUMP_FORWARD, // offset 2. + 8, // offset 3. + LOAD_CONST, // offset 4. + 11, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + NOP, // offset 10. + 0 // offset 11. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtended) { + BytecodeManipulator instance({ JUMP_ABSOLUTE, 250 , NOP, 0 }, false, {}); + ASSERT_TRUE(instance.InjectMethodCall(2, 11)); + + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 1, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 2, // offset 3. + LOAD_CONST, // offset 4. + 11, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + NOP, // offset 10. + 0 // offset 11. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtendedTwice) { + BytecodeManipulator instance( + { JUMP_ABSOLUTE, 248, JUMP_ABSOLUTE, 250, NOP, 0 }, + false, + {}); + ASSERT_TRUE(instance.InjectMethodCall(4, 12)); + + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 1, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 2, // offset 3. + EXTENDED_ARG, // offset 4. + 1, // offset 5. + JUMP_ABSOLUTE, // offset 6. + 4, // offset 7. + LOAD_CONST, // offset 8. + 12, // offset 9. + CALL_FUNCTION, // offset 10. + 0, // offset 11. + POP_TOP, // offset 12. + 0, // offset 13. + NOP, // offset 14. + 0 // offset 15. + }); +} + + +TEST(BytecodeManipulatorTest, InsertionBadInstruction) { + BytecodeManipulator instance( + { NOP, 0, NOP, 0, LOAD_CONST }, + false, + {}); + EXPECT_FALSE(instance.InjectMethodCall(2, 0)); +} + + +TEST(BytecodeManipulatorTest, InsertionNegativeOffset) { + BytecodeManipulator instance({ NOP, 0, RETURN_VALUE, 0 }, false, {}); + EXPECT_FALSE(instance.InjectMethodCall(-1, 0)); +} + + +TEST(BytecodeManipulatorTest, InsertionOutOfRangeOffset) { + BytecodeManipulator instance({ NOP, 0, RETURN_VALUE, 0 }, false, {}); + EXPECT_FALSE(instance.InjectMethodCall(4, 0)); +} + + +TEST(BytecodeManipulatorTest, InsertionMidInstruction) { + BytecodeManipulator instance( + { NOP, 0, LOAD_CONST, 0, NOP, 0 }, + false, + {}); + + EXPECT_FALSE(instance.InjectMethodCall(1, 0)); + EXPECT_FALSE(instance.InjectMethodCall(3, 0)); + EXPECT_FALSE(instance.InjectMethodCall(5, 0)); +} + + +TEST(BytecodeManipulatorTest, InsertionTooManyUpgrades) { + BytecodeManipulator instance( + { + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 250, + NOP, 0 + }, + false, + {}); + EXPECT_FALSE(instance.InjectMethodCall(20, 0)); +} + + +TEST(BytecodeManipulatorTest, IncompleteBytecodeInsert) { + BytecodeManipulator instance({ NOP, 0, LOAD_CONST }, false, {}); + EXPECT_FALSE(instance.InjectMethodCall(2, 0)); +} + + +TEST(BytecodeManipulatorTest, IncompleteBytecodeAppend) { + BytecodeManipulator instance( + { YIELD_VALUE, 0, NOP, 0, LOAD_CONST }, + false, {}); + EXPECT_FALSE(instance.InjectMethodCall(4, 0)); +} + + +TEST(BytecodeManipulatorTest, LineNumbersTableUpdateBeginning) { + BytecodeManipulator instance( + { NOP, 0, RETURN_VALUE, 0 }, + true, + { 2, 1, 2, 1 }); + ASSERT_TRUE(instance.InjectMethodCall(0, 99)); + + VerifyLineNumbersTable(instance, { 8, 1, 2, 1 }); +} + + +TEST(BytecodeManipulatorTest, LineNumbersTableUpdateLineBoundary) { + BytecodeManipulator instance( + { NOP, 0, RETURN_VALUE, 0 }, + true, + { 0, 1, 2, 1, 2, 1 }); + ASSERT_TRUE(instance.InjectMethodCall(2, 99)); + + VerifyLineNumbersTable(instance, { 0, 1, 2, 1, 8, 1 }); +} + + +TEST(BytecodeManipulatorTest, LineNumbersTableUpdateMidLine) { + BytecodeManipulator instance( + { NOP, 0, NOP, 0, RETURN_VALUE, 0 }, + true, + { 0, 1, 4, 1 }); + ASSERT_TRUE(instance.InjectMethodCall(2, 99)); + + VerifyLineNumbersTable(instance, { 0, 1, 10, 1 }); +} + + +TEST(BytecodeManipulatorTest, LineNumbersTablePastEnd) { + BytecodeManipulator instance( + { NOP, 0, NOP, 0, NOP, 0, RETURN_VALUE, 0 }, + true, + { 0, 1 }); + ASSERT_TRUE(instance.InjectMethodCall(6, 99)); + + VerifyLineNumbersTable(instance, { 0, 1 }); +} + + +TEST(BytecodeManipulatorTest, LineNumbersTableUpgradeExtended) { + BytecodeManipulator instance( + { JUMP_ABSOLUTE, 250, RETURN_VALUE, 0 }, + true, + { 2, 1, 2, 1 }); + ASSERT_TRUE(instance.InjectMethodCall(2, 99)); + + VerifyLineNumbersTable(instance, { 4, 1, 8, 1 }); +} + + +TEST(BytecodeManipulatorTest, LineNumbersTableOverflow) { + std::vector bytecode(300, 0); + BytecodeManipulator instance( + bytecode, + true, + { 254, 1 }); + ASSERT_TRUE(instance.InjectMethodCall(2, 99)); + + VerifyLineNumbersTable(instance, { 255, 0, 5, 1 }); +} + + +TEST(BytecodeManipulatorTest, SuccessAppend) { + BytecodeManipulator instance( + { YIELD_VALUE, 0, LOAD_CONST, 0, NOP, 0 }, + false, + {}); + ASSERT_TRUE(instance.InjectMethodCall(2, 57)); + + VerifyBytecode( + instance, + { + YIELD_VALUE, // offset 0. + 0, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 6, // offset 3. + NOP, // offset 4. + 0, // offset 5. + LOAD_CONST, // offset 6. + 57, // offset 7. + CALL_FUNCTION, // offset 8. + 0, // offset 9. + POP_TOP, // offset 10. + 0, // offset 11. + LOAD_CONST, // offset 12. + 0, // offset 13. + JUMP_ABSOLUTE, // offset 14. + 4 // offset 15. + }); +} + + +TEST(BytecodeManipulatorTest, SuccessAppendYieldFrom) { + BytecodeManipulator instance( + { YIELD_FROM, 0, LOAD_CONST, 0, NOP, 0 }, + false, + {}); + ASSERT_TRUE(instance.InjectMethodCall(2, 57)); + + VerifyBytecode( + instance, + { + YIELD_FROM, // offset 0. + 0, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 6, // offset 3. + NOP, // offset 4. + 0, // offset 5. + LOAD_CONST, // offset 6. + 57, // offset 7. + CALL_FUNCTION, // offset 8. + 0, // offset 9. + POP_TOP, // offset 10. + 0, // offset 11. + LOAD_CONST, // offset 12. + 0, // offset 13. + JUMP_ABSOLUTE, // offset 14. + 4 // offset 15. + }); +} + + +TEST(BytecodeManipulatorTest, AppendExtraPadding) { + BytecodeManipulator instance( + { + YIELD_VALUE, + 0, + EXTENDED_ARG, + 15, + EXTENDED_ARG, + 16, + EXTENDED_ARG, + 17, + LOAD_CONST, + 18, + RETURN_VALUE, + 0 + }, + false, {}); + ASSERT_TRUE(instance.InjectMethodCall(2, 0x7273)); + + VerifyBytecode( + instance, + { + YIELD_VALUE, // offset 0. + 0, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 12, // offset 3. + NOP, // offset 4. Args for NOP do not matter. + 9, // offset 5. + NOP, // offset 6. + 9, // offset 7. + NOP, // offset 8. + 9, // offset 9. + RETURN_VALUE, // offset 10. + 0, // offset 11. + EXTENDED_ARG, // offset 12. + 0x72, // offset 13. + LOAD_CONST, // offset 14. + 0x73, // offset 15. + CALL_FUNCTION, // offset 16. + 0, // offset 17. + POP_TOP, // offset 18. + 0, // offset 19. + EXTENDED_ARG, // offset 20. + 15, // offset 21. + EXTENDED_ARG, // offset 22. + 16, // offset 23. + EXTENDED_ARG, // offset 24. + 17, // offset 25. + LOAD_CONST, // offset 26. + 18, // offset 27. + JUMP_ABSOLUTE, // offset 28. + 10 // offset 29. + }); +} + + +TEST(BytecodeManipulatorTest, AppendToEnd) { + std::vector bytecode = {YIELD_VALUE, 0}; + // Case where trampoline requires 4 bytes to write. + bytecode.resize(300); + BytecodeManipulator instance(bytecode, false, {}); + + // This scenario could be supported in theory, but it's not. The purpose of + // this test case is to verify there are no crashes or corruption. + ASSERT_FALSE(instance.InjectMethodCall(298, 0x12)); +} + + +TEST(BytecodeManipulatorTest, NoSpaceForTrampoline) { + const std::vector test_cases[] = { + {YIELD_VALUE, 0, YIELD_VALUE, 0, NOP, 0}, + {YIELD_VALUE, 0, FOR_ITER, 0, NOP, 0}, + {YIELD_VALUE, 0, JUMP_FORWARD, 0, NOP, 0}, +#if PY_VERSION_HEX < 0x03080000 + {YIELD_VALUE, 0, SETUP_LOOP, 0, NOP, 0}, +#endif + {YIELD_VALUE, 0, SETUP_FINALLY, 0, NOP, 0}, +#if PY_VERSION_HEX < 0x03080000 + {YIELD_VALUE, 0, SETUP_LOOP, 0, NOP, 0}, + {YIELD_VALUE, 0, SETUP_EXCEPT, 0, NOP, 0}, +#endif +#if PY_VERSION_HEX >= 0x03080000 && PY_VERSION_HEX < 0x03090000 + {YIELD_VALUE, 0, CALL_FINALLY, 0, NOP, 0}, +#endif + }; + + for (const auto& test_case : test_cases) { + BytecodeManipulator instance(test_case, false, {}); + EXPECT_FALSE(instance.InjectMethodCall(2, 0)) + << "Input:\n" + << FormatBytecode(test_case, 4) << "\n" + << "Unexpected output:\n" + << FormatBytecode(instance.bytecode(), 4); + } + + // Case where trampoline requires 4 bytes to write. + std::vector bytecode(300, 0); + bytecode[0] = YIELD_VALUE; + bytecode[2] = NOP; + bytecode[4] = YIELD_VALUE; + BytecodeManipulator instance(bytecode, false, {}); + ASSERT_FALSE(instance.InjectMethodCall(2, 0x12)); +} + +// Tests that we don't allow jumping into the middle of the space reserved for +// the trampoline. See the comments in AppendMethodCall() in +// bytecode_manipulator.cc. +TEST(BytecodeManipulatorTest, JumpMidRelocatedInstructions) { + std::vector test_cases[] = { + {YIELD_VALUE, 0, FOR_ITER, 2, LOAD_CONST, 0}, + {YIELD_VALUE, 0, JUMP_FORWARD, 2, LOAD_CONST, 0}, + {YIELD_VALUE, 0, SETUP_FINALLY, 2, LOAD_CONST, 0}, + {YIELD_VALUE, 0, SETUP_WITH, 2, LOAD_CONST, 0}, + {YIELD_VALUE, 0, SETUP_FINALLY, 2, LOAD_CONST, 0}, + {YIELD_VALUE, 0, JUMP_IF_FALSE_OR_POP, 6, LOAD_CONST, 0}, + {YIELD_VALUE, 0, JUMP_IF_TRUE_OR_POP, 6, LOAD_CONST, 0}, + {YIELD_VALUE, 0, JUMP_ABSOLUTE, 6, LOAD_CONST, 0}, + {YIELD_VALUE, 0, POP_JUMP_IF_FALSE, 6, LOAD_CONST, 0}, + {YIELD_VALUE, 0, POP_JUMP_IF_TRUE, 6, LOAD_CONST, 0}, +#if PY_VERSION_HEX < 0x03080000 + {YIELD_VALUE, 0, SETUP_LOOP, 2, LOAD_CONST, 0}, + {YIELD_VALUE, 0, CONTINUE_LOOP, 6, LOAD_CONST, 0}, +#endif + }; + + for (auto& test_case : test_cases) { + // Case where trampoline requires 4 bytes to write. + test_case.resize(300); + BytecodeManipulator instance(test_case, false, {}); + EXPECT_FALSE(instance.InjectMethodCall(4, 0)) + << "Input:\n" + << FormatBytecode(test_case, 4) << "\n" + << "Unexpected output:\n" + << FormatBytecode(instance.bytecode(), 4); + } +} + + +// Test that we allow jumping to the start of the space reserved for the +// trampoline. +TEST(BytecodeManipulatorTest, JumpStartOfRelocatedInstructions) { + const std::vector test_cases[] = { + {YIELD_VALUE, 0, FOR_ITER, 0, LOAD_CONST, 0}, + {YIELD_VALUE, 0, SETUP_WITH, 0, LOAD_CONST, 0}, + {YIELD_VALUE, 0, JUMP_ABSOLUTE, 4, LOAD_CONST, 0}}; + + for (const auto& test_case : test_cases) { + BytecodeManipulator instance(test_case, false, {}); + EXPECT_TRUE(instance.InjectMethodCall(4, 0)) + << "Input:\n" << FormatBytecode(test_case, 4); + } +} + + +// Test that we allow jumping after the space reserved for the trampoline. +TEST(BytecodeManipulatorTest, JumpAfterRelocatedInstructions) { + const std::vector test_cases[] = { + {YIELD_VALUE, 0, FOR_ITER, 2, LOAD_CONST, 0, NOP, 0}, + {YIELD_VALUE, 0, SETUP_WITH, 2, LOAD_CONST, 0, NOP, 0}, + {YIELD_VALUE, 0, JUMP_ABSOLUTE, 6, LOAD_CONST, 0, NOP, 0}}; + + for (const auto& test_case : test_cases) { + BytecodeManipulator instance(test_case, false, {}); + EXPECT_TRUE(instance.InjectMethodCall(4, 0)) + << "Input:\n" << FormatBytecode(test_case, 4); + } +} + + +TEST(BytecodeManipulatorTest, InsertionRevertOnFailure) { + const std::vector input{JUMP_FORWARD, 0, NOP, 0, JUMP_ABSOLUTE, 2}; + + BytecodeManipulator instance(input, false, {}); + ASSERT_FALSE(instance.InjectMethodCall(1, 47)); + + VerifyBytecode(instance, input); +} + + +} // namespace cdbg +} // namespace devtools From 32cbfbb6b0cf145bc14e67f672e405eff866de23 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 23 Jan 2023 11:53:02 -0500 Subject: [PATCH 224/241] chore: move python tests into a subdirectory (#68) --- build_and_test.sh | 2 +- src/build-wheels.sh | 2 +- tests/{ => py}/application_info_test.py | 0 tests/{ => py}/backoff_test.py | 0 tests/{ => py}/breakpoints_manager_test.py | 0 tests/{ => py}/collector_test.py | 0 tests/{ => py}/error_data_visibility_policy_test.py | 0 tests/{ => py}/firebase_client_test.py | 0 tests/{ => py}/gcp_hub_client_test.py | 0 tests/{ => py}/glob_data_visibility_policy_test.py | 0 tests/{ => py}/imphook_test.py | 0 tests/{ => py}/integration_test_disabled.py | 0 tests/{ => py}/integration_test_helper.py | 0 tests/{ => py}/labels_test.py | 0 tests/{ => py}/module_explorer_test_disabled.py | 0 tests/{ => py}/module_search_test.py | 0 tests/{ => py}/module_utils_test.py | 0 tests/{ => py}/native_module_test.py | 0 tests/{ => py}/python_breakpoint_test_disabled.py | 0 tests/{ => py}/python_test_util.py | 0 tests/{ => py}/uniquifier_computer_test.py | 0 tests/{ => py}/yaml_data_visibility_config_reader_test.py | 0 22 files changed, 2 insertions(+), 2 deletions(-) rename tests/{ => py}/application_info_test.py (100%) rename tests/{ => py}/backoff_test.py (100%) rename tests/{ => py}/breakpoints_manager_test.py (100%) rename tests/{ => py}/collector_test.py (100%) rename tests/{ => py}/error_data_visibility_policy_test.py (100%) rename tests/{ => py}/firebase_client_test.py (100%) rename tests/{ => py}/gcp_hub_client_test.py (100%) rename tests/{ => py}/glob_data_visibility_policy_test.py (100%) rename tests/{ => py}/imphook_test.py (100%) rename tests/{ => py}/integration_test_disabled.py (100%) rename tests/{ => py}/integration_test_helper.py (100%) rename tests/{ => py}/labels_test.py (100%) rename tests/{ => py}/module_explorer_test_disabled.py (100%) rename tests/{ => py}/module_search_test.py (100%) rename tests/{ => py}/module_utils_test.py (100%) rename tests/{ => py}/native_module_test.py (100%) rename tests/{ => py}/python_breakpoint_test_disabled.py (100%) rename tests/{ => py}/python_test_util.py (100%) rename tests/{ => py}/uniquifier_computer_test.py (100%) rename tests/{ => py}/yaml_data_visibility_config_reader_test.py (100%) diff --git a/build_and_test.sh b/build_and_test.sh index 8035cce..8e742ea 100755 --- a/build_and_test.sh +++ b/build_and_test.sh @@ -8,5 +8,5 @@ python3 -m venv /tmp/cdbg-venv source /tmp/cdbg-venv/bin/activate pip3 install -r requirements_dev.txt pip3 install src/dist/* --force-reinstall -python3 -m pytest tests +python3 -m pytest tests/py deactivate diff --git a/src/build-wheels.sh b/src/build-wheels.sh index 2d6e92d..9fec334 100755 --- a/src/build-wheels.sh +++ b/src/build-wheels.sh @@ -72,7 +72,7 @@ for PY_VERSION in ${SUPPORTED_VERSIONS[@]}; do echo "Running tests" "/opt/python/${PY_VERSION}/bin/pip" install google-python-cloud-debugger --no-index -f /io/dist - "/opt/python/${PY_VERSION}/bin/pytest" /io/tests + "/opt/python/${PY_VERSION}/bin/pytest" /io/tests/py done popd diff --git a/tests/application_info_test.py b/tests/py/application_info_test.py similarity index 100% rename from tests/application_info_test.py rename to tests/py/application_info_test.py diff --git a/tests/backoff_test.py b/tests/py/backoff_test.py similarity index 100% rename from tests/backoff_test.py rename to tests/py/backoff_test.py diff --git a/tests/breakpoints_manager_test.py b/tests/py/breakpoints_manager_test.py similarity index 100% rename from tests/breakpoints_manager_test.py rename to tests/py/breakpoints_manager_test.py diff --git a/tests/collector_test.py b/tests/py/collector_test.py similarity index 100% rename from tests/collector_test.py rename to tests/py/collector_test.py diff --git a/tests/error_data_visibility_policy_test.py b/tests/py/error_data_visibility_policy_test.py similarity index 100% rename from tests/error_data_visibility_policy_test.py rename to tests/py/error_data_visibility_policy_test.py diff --git a/tests/firebase_client_test.py b/tests/py/firebase_client_test.py similarity index 100% rename from tests/firebase_client_test.py rename to tests/py/firebase_client_test.py diff --git a/tests/gcp_hub_client_test.py b/tests/py/gcp_hub_client_test.py similarity index 100% rename from tests/gcp_hub_client_test.py rename to tests/py/gcp_hub_client_test.py diff --git a/tests/glob_data_visibility_policy_test.py b/tests/py/glob_data_visibility_policy_test.py similarity index 100% rename from tests/glob_data_visibility_policy_test.py rename to tests/py/glob_data_visibility_policy_test.py diff --git a/tests/imphook_test.py b/tests/py/imphook_test.py similarity index 100% rename from tests/imphook_test.py rename to tests/py/imphook_test.py diff --git a/tests/integration_test_disabled.py b/tests/py/integration_test_disabled.py similarity index 100% rename from tests/integration_test_disabled.py rename to tests/py/integration_test_disabled.py diff --git a/tests/integration_test_helper.py b/tests/py/integration_test_helper.py similarity index 100% rename from tests/integration_test_helper.py rename to tests/py/integration_test_helper.py diff --git a/tests/labels_test.py b/tests/py/labels_test.py similarity index 100% rename from tests/labels_test.py rename to tests/py/labels_test.py diff --git a/tests/module_explorer_test_disabled.py b/tests/py/module_explorer_test_disabled.py similarity index 100% rename from tests/module_explorer_test_disabled.py rename to tests/py/module_explorer_test_disabled.py diff --git a/tests/module_search_test.py b/tests/py/module_search_test.py similarity index 100% rename from tests/module_search_test.py rename to tests/py/module_search_test.py diff --git a/tests/module_utils_test.py b/tests/py/module_utils_test.py similarity index 100% rename from tests/module_utils_test.py rename to tests/py/module_utils_test.py diff --git a/tests/native_module_test.py b/tests/py/native_module_test.py similarity index 100% rename from tests/native_module_test.py rename to tests/py/native_module_test.py diff --git a/tests/python_breakpoint_test_disabled.py b/tests/py/python_breakpoint_test_disabled.py similarity index 100% rename from tests/python_breakpoint_test_disabled.py rename to tests/py/python_breakpoint_test_disabled.py diff --git a/tests/python_test_util.py b/tests/py/python_test_util.py similarity index 100% rename from tests/python_test_util.py rename to tests/py/python_test_util.py diff --git a/tests/uniquifier_computer_test.py b/tests/py/uniquifier_computer_test.py similarity index 100% rename from tests/uniquifier_computer_test.py rename to tests/py/uniquifier_computer_test.py diff --git a/tests/yaml_data_visibility_config_reader_test.py b/tests/py/yaml_data_visibility_config_reader_test.py similarity index 100% rename from tests/yaml_data_visibility_config_reader_test.py rename to tests/py/yaml_data_visibility_config_reader_test.py From 51c0391326b7ea13708c089ee771f36a755a2a5a Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 27 Jan 2023 15:58:05 -0500 Subject: [PATCH 225/241] chore: clean up unused BUILD file. (#69) --- cc-tests/BUILD | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 cc-tests/BUILD diff --git a/cc-tests/BUILD b/cc-tests/BUILD deleted file mode 100644 index 7536fe9..0000000 --- a/cc-tests/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -cc_test( - name = "bytecode_manipulator_test", - srcs = ["bytecode_manipulator_test.cc"], - deps = [ - "//src/googleclouddebugger:bytecode_manipulator", - "@com_google_googletest//:gtest_main", - ], -) From 514836684ec8c1fa696572d3024117cad448e0e9 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 1 Feb 2023 10:31:36 -0500 Subject: [PATCH 226/241] feat: Add support for Python 3.10 (#70) This PR adds support for Python 3.10 to the debug agent. A summary of the key changes are: * Add handling for new opcodes * Add support for PEP-626 - modified API for line numbers: * Abstract handling of line numbers so that both old and new APIs can be supported * Use co_lines() in Python code for enumerating line numbers in a code object * Copy in and use the _PyLineTable functions for handling co_linetable in C++ code * Modify InsertAndUpdateLineData to update the code object's linetable data using the 3.10 format. Note that this will need to be modified again for Python 3.11 which uses a condensed version of the linetable. * Update tests to handle the linetable changes * Update tests to handle modified JUMP offsets * Update tests to handle modified error messages --- README.md | 4 +- src/build-wheels.sh | 2 +- src/googleclouddebugger/BUILD | 1 + .../bytecode_breakpoint.cc | 67 ++++-- src/googleclouddebugger/bytecode_breakpoint.h | 7 +- .../bytecode_manipulator.cc | 74 ++++-- .../bytecode_manipulator.h | 14 +- .../immutability_tracer.cc | 14 ++ src/googleclouddebugger/module_explorer.py | 27 ++- src/googleclouddebugger/python_util.cc | 33 ++- src/googleclouddebugger/python_util.h | 21 +- src/setup.py | 1 + src/third_party/BUILD | 7 + src/third_party/pylinetable.h | 210 ++++++++++++++++++ tests/cpp/bytecode_manipulator_test.cc | 198 ++++++++++++----- tests/py/collector_test.py | 20 +- tests/py/module_search_test.py | 3 +- 17 files changed, 566 insertions(+), 137 deletions(-) create mode 100644 src/third_party/BUILD create mode 100644 src/third_party/pylinetable.h diff --git a/README.md b/README.md index 95aaac8..2a836ba 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Python Cloud Debugger Agent Google [Cloud Debugger](https://cloud.google.com/debugger/) for -Python 3.6, Python 3.7, Python 3.8 and Python 3.9. +Python 3.6, Python 3.7, Python 3.8, Python 3.9, and Python 3.10. ## Overview @@ -28,7 +28,7 @@ tested on Debian Linux, but it should work on other distributions as well. Cloud Debugger consists of 3 primary components: 1. The Python debugger agent (this repo implements one for CPython 3.6, - 3.7, 3.8 and 3.9). + 3.7, 3.8, 3.9, and 3.10). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the APIs using [APIs Explorer](https://cloud.google.com/debugger/api/reference/rest/). diff --git a/src/build-wheels.sh b/src/build-wheels.sh index 9fec334..1e4a0c6 100755 --- a/src/build-wheels.sh +++ b/src/build-wheels.sh @@ -3,7 +3,7 @@ GFLAGS_URL=https://github.com/gflags/gflags/archive/v2.2.2.tar.gz GLOG_URL=https://github.com/google/glog/archive/v0.4.0.tar.gz -SUPPORTED_VERSIONS=(cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39) +SUPPORTED_VERSIONS=(cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39 cp310-cp310) ROOT=$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null; /bin/pwd -P) diff --git a/src/googleclouddebugger/BUILD b/src/googleclouddebugger/BUILD index dedf1aa..c0d6ae7 100644 --- a/src/googleclouddebugger/BUILD +++ b/src/googleclouddebugger/BUILD @@ -24,6 +24,7 @@ cc_library( deps = [ ":common", ":nullable", + "//src/third_party:pylinetable", ], ) diff --git a/src/googleclouddebugger/bytecode_breakpoint.cc b/src/googleclouddebugger/bytecode_breakpoint.cc index 8b782d7..dd1af6e 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.cc +++ b/src/googleclouddebugger/bytecode_breakpoint.cc @@ -82,7 +82,7 @@ int BytecodeBreakpoint::CreateBreakpoint( // table in case "code_object" is already patched with another breakpoint. CodeObjectLinesEnumerator lines_enumerator( code_object->co_firstlineno, - code_object_breakpoints->original_lnotab.get()); + code_object_breakpoints->original_linedata.get()); while (lines_enumerator.line_number() != line) { if (!lines_enumerator.Next()) { LOG(ERROR) << "Line " << line << " not found in " @@ -237,8 +237,14 @@ BytecodeBreakpoint::PreparePatchCodeObject( return nullptr; // Probably a built-in method or uninitialized code object. } - data->original_lnotab = + // Store the original (unmodified) line data. +#if PY_VERSION_HEX < 0x030A0000 + data->original_linedata = ScopedPyObject::NewReference(code_object.get()->co_lnotab); +#else + data->original_linedata = + ScopedPyObject::NewReference(code_object.get()->co_linetable); +#endif patches_[code_object] = data.get(); return data.release(); @@ -262,29 +268,38 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { << " from patched " << code->zombie_refs.back().get(); Py_INCREF(code_object->co_code); + // Restore the original line data to the code object. +#if PY_VERSION_HEX < 0x030A0000 if (code_object->co_lnotab != nullptr) { code->zombie_refs.push_back(ScopedPyObject(code_object->co_lnotab)); } - code_object->co_lnotab = code->original_lnotab.get(); + code_object->co_lnotab = code->original_linedata.get(); Py_INCREF(code_object->co_lnotab); +#else + if (code_object->co_linetable != nullptr) { + code->zombie_refs.push_back(ScopedPyObject(code_object->co_linetable)); + } + code_object->co_linetable = code->original_linedata.get(); + Py_INCREF(code_object->co_linetable); +#endif return; } std::vector bytecode = PyBytesToByteArray(code->original_code.get()); - bool has_lnotab = false; - std::vector lnotab; - if (!code->original_lnotab.is_null() && - PyBytes_CheckExact(code->original_lnotab.get())) { - has_lnotab = true; - lnotab = PyBytesToByteArray(code->original_lnotab.get()); + bool has_linedata = false; + std::vector linedata; + if (!code->original_linedata.is_null() && + PyBytes_CheckExact(code->original_linedata.get())) { + has_linedata = true; + linedata = PyBytesToByteArray(code->original_linedata.get()); } BytecodeManipulator bytecode_manipulator( std::move(bytecode), - has_lnotab, - std::move(lnotab)); + has_linedata, + std::move(linedata)); // Add callbacks to code object constants and patch the bytecode. std::vector callbacks; @@ -306,17 +321,16 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { callbacks.push_back(breakpoint.hit_callable.get()); -#if PY_MAJOR_VERSION >= 3 // In Python 3, since we allow upgrading of instructions to use // EXTENDED_ARG, the offsets for lines originally calculated might not be // accurate, so we need to recalculate them each insertion. offset_found = false; - if (bytecode_manipulator.has_lnotab()) { - ScopedPyObject lnotab(PyBytes_FromStringAndSize( - reinterpret_cast(bytecode_manipulator.lnotab().data()), - bytecode_manipulator.lnotab().size())); + if (bytecode_manipulator.has_linedata()) { + ScopedPyObject linedata(PyBytes_FromStringAndSize( + reinterpret_cast(bytecode_manipulator.linedata().data()), + bytecode_manipulator.linedata().size())); CodeObjectLinesEnumerator lines_enumerator(code_object->co_firstlineno, - lnotab.release()); + linedata.release()); while (lines_enumerator.line_number() != breakpoint.line) { if (!lines_enumerator.Next()) { break; @@ -325,7 +339,6 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { } offset_found = lines_enumerator.line_number() == breakpoint.line; } -#endif if (!offset_found || !bytecode_manipulator.InjectMethodCall(offset, const_index)) { @@ -355,14 +368,26 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { << " reassigned to " << code_object->co_code << ", original was " << code->original_code.get(); - if (has_lnotab) { + // Update the line data in the code object. +#if PY_VERSION_HEX < 0x030A0000 + if (has_linedata) { code->zombie_refs.push_back(ScopedPyObject(code_object->co_lnotab)); ScopedPyObject lnotab_string(PyBytes_FromStringAndSize( - reinterpret_cast(bytecode_manipulator.lnotab().data()), - bytecode_manipulator.lnotab().size())); + reinterpret_cast(bytecode_manipulator.linedata().data()), + bytecode_manipulator.linedata().size())); DCHECK(!lnotab_string.is_null()); code_object->co_lnotab = lnotab_string.release(); } +#else + if (has_linedata) { + code->zombie_refs.push_back(ScopedPyObject(code_object->co_linetable)); + ScopedPyObject linetable_string(PyBytes_FromStringAndSize( + reinterpret_cast(bytecode_manipulator.linedata().data()), + bytecode_manipulator.linedata().size())); + DCHECK(!linetable_string.is_null()); + code_object->co_linetable = linetable_string.release(); + } +#endif // Invoke error callback after everything else is done. The callback may // decide to remove the breakpoint, which will change "code". diff --git a/src/googleclouddebugger/bytecode_breakpoint.h b/src/googleclouddebugger/bytecode_breakpoint.h index 057766f..5eaa893 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.h +++ b/src/googleclouddebugger/bytecode_breakpoint.h @@ -162,9 +162,10 @@ class BytecodeBreakpoint { // Original value of PyCodeObject::co_code before patching. ScopedPyObject original_code; - // Original value of PythonCode::co_lnotab before patching. - // "lnotab" stands for "line numbers table" in CPython lingo. - ScopedPyObject original_lnotab; + // Original value of PythonCode::co_lnotab or PythonCode::co_linetable + // before patching. This is the line numbers table in CPython <= 3.9 and + // CPython >= 3.10 respectively + ScopedPyObject original_linedata; }; // Loads code object into "patches_" if not there yet. Returns nullptr if diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 9ee7e27..3c95edd 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -228,11 +228,11 @@ static std::vector BuildMethodCall(int const_index) { } BytecodeManipulator::BytecodeManipulator(std::vector bytecode, - const bool has_lnotab, - std::vector lnotab) - : has_lnotab_(has_lnotab) { + const bool has_linedata, + std::vector linedata) + : has_linedata_(has_linedata) { data_.bytecode = std::move(bytecode); - data_.lnotab = std::move(lnotab); + data_.linedata = std::move(linedata); strategy_ = STRATEGY_INSERT; // Default strategy. for (auto it = data_.bytecode.begin(); it < data_.bytecode.end(); ) { @@ -296,21 +296,13 @@ struct Insertion { // InsertAndUpdateBranchInstructions. static const int kMaxInsertionIterations = 10; - +#if PY_VERSION_HEX < 0x030A0000 // Updates the line number table for an insertion in the bytecode. -// This is different than what the Python 2 version of InsertMethodCall() does. -// It should be more accurate, but is confined to Python 3 only for safety. -// This handles the case of adding insertion for EXTENDED_ARG better. // Example for inserting 2 bytes at offset 2: -// lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta} -// Old algorithm: [{2, 0}, {2, 1}, {4, 1}] -// New algorithm: [{2, 1}, {6, 1}] -// In the old version, trying to get the offset to insert a breakpoint right -// before line 1 would result in an offset of 2, which is inaccurate as the -// instruction before is an EXTENDED_ARG which will now be applied to the first -// instruction inserted instead of its original target. -static void InsertAndUpdateLnotab(int offset, int size, - std::vector* lnotab) { +// lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta} +// updated: [{2, 1}, {6, 1}] +static void InsertAndUpdateLineData(int offset, int size, + std::vector* lnotab) { int current_offset = 0; for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) { current_offset += it[0]; @@ -330,6 +322,36 @@ static void InsertAndUpdateLnotab(int offset, int size, } } } +#else +// Updates the line number table for an insertion in the bytecode. +// Example for inserting 2 bytes at offset 2: +// linetable: [{2, 1}, {4, 1}] // {address_end_delta, line_delta} +// updated: [{2, 1}, {6, 1}] +// +// For more information on the linetable format in Python 3.10, see: +// https://github.com/python/cpython/blob/main/Objects/lnotab_notes.txt +static void InsertAndUpdateLineData(int offset, int size, + std::vector* linetable) { + int current_offset = 0; + for (auto it = linetable->begin(); it != linetable->end(); it += 2) { + current_offset += it[0]; + + if (current_offset > offset) { + int remaining_size = it[0] + size; + int remaining_lines = it[1]; + it = linetable->erase(it, it + 2); + while (remaining_size > 0xFE) { // Max address delta is listed as 254. + it = linetable->insert(it, 0xFE) + 1; + it = linetable->insert(it, 0) + 1; + remaining_size -= 0xFE; + } + it = linetable->insert(it, remaining_size) + 1; + it = linetable->insert(it, remaining_lines) + 1; + return; + } + } +} +#endif // Reserves space for instructions to be inserted into the bytecode, and // calculates the new offsets and arguments of branch instructions. @@ -426,8 +448,16 @@ static bool InsertAndUpdateBranchInstructions( } if (need_to_update) { +#if PY_VERSION_HEX < 0x030A0000 + int delta = insertion.size; +#else + // Changed in version 3.10: The argument of jump, exception handling + // and loop instructions is now the instruction offset rather than the + // byte offset. + int delta = insertion.size / 2; +#endif PythonInstruction new_instruction = - PythonInstructionArg(instruction.opcode, arg + insertion.size); + PythonInstructionArg(instruction.opcode, arg + delta); int size_diff = new_instruction.size - instruction.size; if (size_diff > 0) { insertions.push_back(Insertion { size_diff, it->current_offset }); @@ -490,8 +520,8 @@ bool BytecodeManipulator::InsertMethodCall( // Insert the method call. data->bytecode.insert(data->bytecode.begin() + offset, method_call_size, NOP); WriteInstructions(data->bytecode.begin() + offset, method_call_instructions); - if (has_lnotab_) { - InsertAndUpdateLnotab(offset, method_call_size, &data->lnotab); + if (has_linedata_) { + InsertAndUpdateLineData(offset, method_call_size, &data->linedata); } // Write new branch instructions. @@ -503,8 +533,8 @@ bool BytecodeManipulator::InsertMethodCall( int offset = it->current_offset; if (size_diff > 0) { data->bytecode.insert(data->bytecode.begin() + offset, size_diff, NOP); - if (has_lnotab_) { - InsertAndUpdateLnotab(it->current_offset, size_diff, &data->lnotab); + if (has_linedata_) { + InsertAndUpdateLineData(it->current_offset, size_diff, &data->linedata); } } else if (size_diff < 0) { // The Python compiler sometimes prematurely adds EXTENDED_ARG with an diff --git a/src/googleclouddebugger/bytecode_manipulator.h b/src/googleclouddebugger/bytecode_manipulator.h index d3a7de4..31a5e46 100644 --- a/src/googleclouddebugger/bytecode_manipulator.h +++ b/src/googleclouddebugger/bytecode_manipulator.h @@ -71,17 +71,17 @@ namespace cdbg { // 19 JUMP_ABSOLUTE 3 class BytecodeManipulator { public: - BytecodeManipulator(std::vector bytecode, const bool has_lnotab, - std::vector lnotab); + BytecodeManipulator(std::vector bytecode, const bool has_linedata, + std::vector linedata); // Gets the transformed method bytecode. const std::vector& bytecode() const { return data_.bytecode; } // Returns true if this class was initialized with line numbers table. - bool has_lnotab() const { return has_lnotab_; } + bool has_linedata() const { return has_linedata_; } // Gets the method line numbers table or empty vector if not available. - const std::vector& lnotab() const { return data_.lnotab; } + const std::vector& linedata() const { return data_.linedata; } // Rewrites the method bytecode to invoke callable at the specified offset. // Return false if the method call could not be inserted. The bytecode @@ -109,8 +109,8 @@ class BytecodeManipulator { // Bytecode of a transformed method. std::vector bytecode; - // Method line numbers table or empty vector if "has_lnotab_" is false. - std::vector lnotab; + // Method line numbers table or empty vector if "has_linedata_" is false. + std::vector linedata; }; // Insert space into the bytecode. This space is later used to add new @@ -130,7 +130,7 @@ class BytecodeManipulator { Data data_; // True if the method has line number table. - const bool has_lnotab_; + const bool has_linedata_; // Algorithm to insert breakpoint callback into method bytecode. Strategy strategy_; diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index d5f102a..c05d407 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -400,6 +400,16 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { #if PY_VERSION_HEX >= 0x03080000 // Added back in Python 3.8 (was in 2.7 as well) case ROT_FOUR: +#endif +#if PY_VERSION_HEX >= 0x030A0000 + // Added in Python 3.10 + case COPY_DICT_WITHOUT_KEYS: + case GET_LEN: + case MATCH_MAPPING: + case MATCH_SEQUENCE: + case MATCH_KEYS: + case MATCH_CLASS: + case ROT_N: #endif return OPCODE_NOT_MUTABLE; @@ -468,6 +478,10 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { case RERAISE: case WITH_EXCEPT_START: case LOAD_ASSERTION_ERROR: +#endif +#if PY_VERSION_HEX >= 0x030A0000 + // Added in Python 3.10 + case GEN_START: #endif return OPCODE_MUTABLE; diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index acecea9..99829df 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -78,15 +78,24 @@ def _GetLineNumbers(code_object): Yields: The next line number in the code object. """ - # Get the line number deltas, which are the odd number entries, from the - # lnotab. See - # https://svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt - # In Python 3, this is just a byte array. - line_incrs = code_object.co_lnotab[1::2] - current_line = code_object.co_firstlineno - for line_incr in line_incrs: - current_line += line_incr - yield current_line + + if sys.version_info.minor < 10: + # Get the line number deltas, which are the odd number entries, from the + # lnotab. See + # https://svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt + # In Python 3, prior to 3.10, this is just a byte array. + line_incrs = code_object.co_lnotab[1::2] + current_line = code_object.co_firstlineno + for line_incr in line_incrs: + current_line += line_incr + yield current_line + else: + # Get the line numbers directly, which are the third entry in the tuples. + # https://peps.python.org/pep-0626/#the-new-co-lines-method-of-code-objects + line_numbers = [entry[2] for entry in code_object.co_lines()] + for line_number in line_numbers: + if line_number is not None: + yield line_number def _GetModuleCodeObjects(module): diff --git a/src/googleclouddebugger/python_util.cc b/src/googleclouddebugger/python_util.cc index 90b67ce..e28a142 100644 --- a/src/googleclouddebugger/python_util.cc +++ b/src/googleclouddebugger/python_util.cc @@ -23,6 +23,11 @@ #include +#if PY_VERSION_HEX >= 0x030A0000 +#include "../third_party/pylinetable.h" +#endif // PY_VERSION_HEX >= 0x030A0000 + + namespace devtools { namespace cdbg { @@ -32,17 +37,22 @@ static PyObject* g_debuglet_module = nullptr; CodeObjectLinesEnumerator::CodeObjectLinesEnumerator( PyCodeObject* code_object) { +#if PY_VERSION_HEX < 0x030A0000 Initialize(code_object->co_firstlineno, code_object->co_lnotab); +#else + Initialize(code_object->co_firstlineno, code_object->co_linetable); +#endif // PY_VERSION_HEX < 0x030A0000 } CodeObjectLinesEnumerator::CodeObjectLinesEnumerator( int firstlineno, - PyObject* lnotab) { - Initialize(firstlineno, lnotab); + PyObject* linedata) { + Initialize(firstlineno, linedata); } +#if PY_VERSION_HEX < 0x030A0000 void CodeObjectLinesEnumerator::Initialize( int firstlineno, PyObject* lnotab) { @@ -86,7 +96,26 @@ bool CodeObjectLinesEnumerator::Next() { } } } +#else + +void CodeObjectLinesEnumerator::Initialize( + int firstlineno, + PyObject* linetable) { + Py_ssize_t length = PyBytes_Size(linetable); + _PyLineTable_InitAddressRange(PyBytes_AsString(linetable), length, firstlineno, &range_); +} +bool CodeObjectLinesEnumerator::Next() { + while (_PyLineTable_NextAddressRange(&range_)) { + if (range_.ar_line >= 0) { + line_number_ = range_.ar_line; + offset_ = range_.ar_start; + return true; + } + } + return false; +} +#endif // PY_VERSION_HEX < 0x030A0000 PyObject* GetDebugletModule() { DCHECK(g_debuglet_module != nullptr); diff --git a/src/googleclouddebugger/python_util.h b/src/googleclouddebugger/python_util.h index 57b5425..10116be 100644 --- a/src/googleclouddebugger/python_util.h +++ b/src/googleclouddebugger/python_util.h @@ -178,7 +178,7 @@ class CodeObjectLinesEnumerator { explicit CodeObjectLinesEnumerator(PyCodeObject* code_object); // Uses explicitly provided line table. - CodeObjectLinesEnumerator(int firstlineno, PyObject* lnotab); + CodeObjectLinesEnumerator(int firstlineno, PyObject* linedata); // Moves over to the next entry in code object line table. bool Next(); @@ -190,24 +190,31 @@ class CodeObjectLinesEnumerator { int32_t line_number() const { return line_number_; } private: - void Initialize(int firstlineno, PyObject* lnotab); + void Initialize(int firstlineno, PyObject* linedata); private: + // Bytecode offset of the current line. + int32_t offset_; + + // Current source code line number + int32_t line_number_; + +#if PY_VERSION_HEX < 0x030A0000 // Number of remaining entries in line table. int remaining_entries_; // Pointer to the next entry of line table. const uint8_t* next_entry_; - // Bytecode offset of the current line. - int32_t offset_; - - // Current source code line number - int32_t line_number_; +#else + // Current address range in the linetable data. + PyCodeAddressRange range_; +#endif DISALLOW_COPY_AND_ASSIGN(CodeObjectLinesEnumerator); }; + template bool operator== (TPointer* ref1, const ScopedPyObjectT& ref2) { return ref2 == ref1; diff --git a/src/setup.py b/src/setup.py index 0c24bad..6b380d5 100644 --- a/src/setup.py +++ b/src/setup.py @@ -117,6 +117,7 @@ def ReadConfig(section, value, default): 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', ]) diff --git a/src/third_party/BUILD b/src/third_party/BUILD new file mode 100644 index 0000000..bcce1e2 --- /dev/null +++ b/src/third_party/BUILD @@ -0,0 +1,7 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "pylinetable", + hdrs = ["pylinetable.h"], +) + diff --git a/src/third_party/pylinetable.h b/src/third_party/pylinetable.h new file mode 100644 index 0000000..ea44c64 --- /dev/null +++ b/src/third_party/pylinetable.h @@ -0,0 +1,210 @@ +/** + * Copyright (c) 2001-2023 Python Software Foundation; All Rights Reserved + * + * You may obtain a copy of the PSF License at + * + * https://docs.python.org/3/license.html + */ + +#ifndef DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYLINETABLE_H_ +#define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYLINETABLE_H_ + +/* Python Linetable helper methods. + * They are not part of the cpython api. + * This code has been extracted from: + * https://github.com/python/cpython/blob/main/Objects/codeobject.c + * + * See https://peps.python.org/pep-0626/#out-of-process-debuggers-and-profilers + * for more information about this code and its usage. + */ + +#if PY_VERSION_HEX >= 0x030B0000 +// Things are different in 3.11 than 3.10. +// See https://github.com/python/cpython/blob/main/Objects/locations.md + +typedef enum _PyCodeLocationInfoKind { + /* short forms are 0 to 9 */ + PY_CODE_LOCATION_INFO_SHORT0 = 0, + /* one lineforms are 10 to 12 */ + PY_CODE_LOCATION_INFO_ONE_LINE0 = 10, + PY_CODE_LOCATION_INFO_ONE_LINE1 = 11, + PY_CODE_LOCATION_INFO_ONE_LINE2 = 12, + + PY_CODE_LOCATION_INFO_NO_COLUMNS = 13, + PY_CODE_LOCATION_INFO_LONG = 14, + PY_CODE_LOCATION_INFO_NONE = 15 +} _PyCodeLocationInfoKind; + +/** Out of process API for initializing the location table. */ +extern void _PyLineTable_InitAddressRange( + const char *linetable, + Py_ssize_t length, + int firstlineno, + PyCodeAddressRange *range); + +/** API for traversing the line number table. */ +extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range); + + +void _PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) { + range->opaque.lo_next = linetable; + range->opaque.limit = range->opaque.lo_next + length; + range->ar_start = -1; + range->ar_end = 0; + range->opaque.computed_line = firstlineno; + range->ar_line = -1; +} + +static int +scan_varint(const uint8_t *ptr) +{ + unsigned int read = *ptr++; + unsigned int val = read & 63; + unsigned int shift = 0; + while (read & 64) { + read = *ptr++; + shift += 6; + val |= (read & 63) << shift; + } + return val; +} + +static int +scan_signed_varint(const uint8_t *ptr) +{ + unsigned int uval = scan_varint(ptr); + if (uval & 1) { + return -(int)(uval >> 1); + } + else { + return uval >> 1; + } +} + +static int +get_line_delta(const uint8_t *ptr) +{ + int code = ((*ptr) >> 3) & 15; + switch (code) { + case PY_CODE_LOCATION_INFO_NONE: + return 0; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + case PY_CODE_LOCATION_INFO_LONG: + return scan_signed_varint(ptr+1); + case PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0; + case PY_CODE_LOCATION_INFO_ONE_LINE1: + return 1; + case PY_CODE_LOCATION_INFO_ONE_LINE2: + return 2; + default: + /* Same line */ + return 0; + } +} + +static int +is_no_line_marker(uint8_t b) +{ + return (b >> 3) == 0x1f; +} + + +#define ASSERT_VALID_BOUNDS(bounds) \ + assert(bounds->opaque.lo_next <= bounds->opaque.limit && \ + (bounds->ar_line == -1 || bounds->ar_line == bounds->opaque.computed_line) && \ + (bounds->opaque.lo_next == bounds->opaque.limit || \ + (*bounds->opaque.lo_next) & 128)) + +static int +next_code_delta(PyCodeAddressRange *bounds) +{ + assert((*bounds->opaque.lo_next) & 128); + return (((*bounds->opaque.lo_next) & 7) + 1) * sizeof(_Py_CODEUNIT); +} + +static void +advance(PyCodeAddressRange *bounds) +{ + ASSERT_VALID_BOUNDS(bounds); + bounds->opaque.computed_line += get_line_delta(reinterpret_cast(bounds->opaque.lo_next)); + if (is_no_line_marker(*bounds->opaque.lo_next)) { + bounds->ar_line = -1; + } + else { + bounds->ar_line = bounds->opaque.computed_line; + } + bounds->ar_start = bounds->ar_end; + bounds->ar_end += next_code_delta(bounds); + do { + bounds->opaque.lo_next++; + } while (bounds->opaque.lo_next < bounds->opaque.limit && + ((*bounds->opaque.lo_next) & 128) == 0); + ASSERT_VALID_BOUNDS(bounds); +} + +static inline int +at_end(PyCodeAddressRange *bounds) { + return bounds->opaque.lo_next >= bounds->opaque.limit; +} + +int +_PyLineTable_NextAddressRange(PyCodeAddressRange *range) +{ + if (at_end(range)) { + return 0; + } + advance(range); + assert(range->ar_end > range->ar_start); + return 1; +} +#elif PY_VERSION_HEX >= 0x030A0000 +void +_PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) +{ + range->opaque.lo_next = linetable; + range->opaque.limit = range->opaque.lo_next + length; + range->ar_start = -1; + range->ar_end = 0; + range->opaque.computed_line = firstlineno; + range->ar_line = -1; +} + +static void +advance(PyCodeAddressRange *bounds) +{ + bounds->ar_start = bounds->ar_end; + int delta = ((unsigned char *)bounds->opaque.lo_next)[0]; + bounds->ar_end += delta; + int ldelta = ((signed char *)bounds->opaque.lo_next)[1]; + bounds->opaque.lo_next += 2; + if (ldelta == -128) { + bounds->ar_line = -1; + } + else { + bounds->opaque.computed_line += ldelta; + bounds->ar_line = bounds->opaque.computed_line; + } +} + +static inline int +at_end(PyCodeAddressRange *bounds) { + return bounds->opaque.lo_next >= bounds->opaque.limit; +} + +int +_PyLineTable_NextAddressRange(PyCodeAddressRange *range) +{ + if (at_end(range)) { + return 0; + } + advance(range); + while (range->ar_start == range->ar_end) { + assert(!at_end(range)); + advance(range); + } + return 1; +} +#endif + +#endif // DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYLINETABLE_H_ diff --git a/tests/cpp/bytecode_manipulator_test.cc b/tests/cpp/bytecode_manipulator_test.cc index 6f21ae1..934dfef 100644 --- a/tests/cpp/bytecode_manipulator_test.cc +++ b/tests/cpp/bytecode_manipulator_test.cc @@ -116,7 +116,6 @@ static std::string FormatOpcode(uint8_t opcode) { case SETUP_LOOP: return "SETUP_LOOP"; case SETUP_EXCEPT: return "SETUP_EXCEPT"; #endif -#if PY_MAJOR_VERSION >= 3 case DUP_TOP_TWO: return "DUP_TOP_TWO"; case BINARY_MATRIX_MULTIPLY: return "BINARY_MATRIX_MULTIPLY"; case INPLACE_MATRIX_MULTIPLY: return "INPLACE_MATRIX_MULTIPLY"; @@ -182,38 +181,7 @@ static std::string FormatOpcode(uint8_t opcode) { case DICT_MERGE: return "DICT_MERGE"; case DICT_UPDATE: return "DICT_UPDATE"; #endif -#else - case STOP_CODE: return "STOP_CODE"; - case ROT_FOUR: return "ROT_FOUR"; - case UNARY_CONVERT: return "UNARY_CONVERT"; - case BINARY_DIVIDE: return "BINARY_DIVIDE"; - case SLICE: return "SLICE"; - case SLICE_1: return "SLICE_1"; - case SLICE_2: return "SLICE_2"; - case SLICE_3: return "SLICE_3"; - case STORE_SLICE: return "STORE_SLICE"; - case STORE_SLICE_1: return "STORE_SLICE_1"; - case STORE_SLICE_2: return "STORE_SLICE_2"; - case STORE_SLICE_3: return "STORE_SLICE_3"; - case DELETE_SLICE: return "DELETE_SLICE"; - case DELETE_SLICE_1: return "DELETE_SLICE_1"; - case DELETE_SLICE_2: return "DELETE_SLICE_2"; - case DELETE_SLICE_3: return "DELETE_SLICE_3"; - case STORE_MAP: return "STORE_MAP"; - case INPLACE_DIVIDE: return "INPLACE_DIVIDE"; - case PRINT_NEWLINE: return "PRINT_NEWLINE"; - case PRINT_ITEM: return "PRINT_ITEM"; - case PRINT_ITEM_TO: return "PRINT_ITEM_TO"; - case PRINT_NEWLINE_TO: return "PRINT_NEWLINE_TO"; - case LOAD_LOCALS: return "LOAD_LOCALS"; - case EXEC_STMT: return "EXEC_STMT"; - case BUILD_CLASS: return "BUILD_CLASS"; - case DUP_TOPX: return "DUP_TOPX"; - case MAKE_CLOSURE: return "MAKE_CLOSURE"; - case CALL_FUNCTION_VAR: return "CALL_FUNCTION_VAR"; - case CALL_FUNCTION_VAR_KW: return "CALL_FUNCTION_VAR_KW"; - case WITH_CLEANUP: return "WITH_CLEANUP"; -#endif + default: return std::to_string(static_cast(opcode)); } } @@ -263,12 +231,12 @@ static void VerifyBytecode(const BytecodeManipulator& bytecode_manipulator, static void VerifyLineNumbersTable( const BytecodeManipulator& bytecode_manipulator, - std::vector expected_lnotab) { + std::vector expected_linedata) { // Convert to integers to better logging by EXPECT_EQ. - std::vector expected(expected_lnotab.begin(), expected_lnotab.end()); + std::vector expected(expected_linedata.begin(), expected_linedata.end()); std::vector actual( - bytecode_manipulator.lnotab().begin(), - bytecode_manipulator.lnotab().end()); + bytecode_manipulator.linedata().begin(), + bytecode_manipulator.linedata().end()); EXPECT_EQ(expected, actual); } @@ -281,10 +249,10 @@ TEST(BytecodeManipulatorTest, EmptyBytecode) { TEST(BytecodeManipulatorTest, HasLineNumbersTable) { BytecodeManipulator instance1({}, false, {}); - EXPECT_FALSE(instance1.has_lnotab()); + EXPECT_FALSE(instance1.has_linedata()); BytecodeManipulator instance2({}, true, {}); - EXPECT_TRUE(instance2.has_lnotab()); + EXPECT_TRUE(instance2.has_linedata()); } @@ -373,6 +341,25 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUpdates) { {}); ASSERT_TRUE(instance.InjectMethodCall(2, 47)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. + VerifyBytecode( + instance, + { + JUMP_FORWARD, // offset 0. + 12 + 3, // offset 1. + LOAD_CONST, // offset 2. + 47, // offset 3. + CALL_FUNCTION, // offset 4. + 0, // offset 5. + POP_TOP, // offset 6. + 0, // offset 7. + NOP, // offset 8. + 0, // offset 9. + JUMP_ABSOLUTE, // offset 10. + 34 + 3 // offset 11. + }); +#else VerifyBytecode( instance, { @@ -389,6 +376,7 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUpdates) { JUMP_ABSOLUTE, // offset 10. 34 + 6 // offset 11. }); +#endif } @@ -418,6 +406,37 @@ TEST(BytecodeManipulatorTest, InsertionExtendedOffsetUpdates) { {}); ASSERT_TRUE(instance.InjectMethodCall(8, 11)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 12, // offset 1. + EXTENDED_ARG, // offset 2. + 34, // offset 3. + EXTENDED_ARG, // offset 4. + 56, // offset 5. + JUMP_FORWARD, // offset 6. + 78 + 3, // offset 7. + LOAD_CONST, // offset 8. + 11, // offset 9. + CALL_FUNCTION, // offset 10. + 0, // offset 11. + POP_TOP, // offset 12. + 0, // offset 13. + NOP, // offset 14. + 0, // offset 15. + EXTENDED_ARG, // offset 16. + 98, // offset 17. + EXTENDED_ARG, // offset 18. + 76, // offset 19. + EXTENDED_ARG, // offset 20. + 54, // offset 21. + JUMP_ABSOLUTE, // offset 22. + 32 + 3 // offset 23. + }); +#else VerifyBytecode( instance, { @@ -446,6 +465,7 @@ TEST(BytecodeManipulatorTest, InsertionExtendedOffsetUpdates) { JUMP_ABSOLUTE, // offset 22. 32 + 6 // offset 23. }); +#endif } @@ -521,13 +541,15 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUneededExtended) { {}); ASSERT_TRUE(instance.InjectMethodCall(4, 11)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. VerifyBytecode( instance, { EXTENDED_ARG, // offset 0. 0, // offset 1. JUMP_FORWARD, // offset 2. - 8, // offset 3. + 2 + 3, // offset 3. LOAD_CONST, // offset 4. 11, // offset 5. CALL_FUNCTION, // offset 6. @@ -537,13 +559,33 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUneededExtended) { NOP, // offset 10. 0 // offset 11. }); +#else + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 0, // offset 1. + JUMP_FORWARD, // offset 2. + 2 + 6, // offset 3. + LOAD_CONST, // offset 4. + 11, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + NOP, // offset 10. + 0 // offset 11. + }); +#endif } TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtended) { - BytecodeManipulator instance({ JUMP_ABSOLUTE, 250 , NOP, 0 }, false, {}); + BytecodeManipulator instance({ JUMP_ABSOLUTE, 254 , NOP, 0 }, false, {}); ASSERT_TRUE(instance.InjectMethodCall(2, 11)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. VerifyBytecode( instance, { @@ -560,27 +602,68 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtended) { NOP, // offset 10. 0 // offset 11. }); +#else + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 1, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 6, // offset 3. + LOAD_CONST, // offset 4. + 11, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + NOP, // offset 10. + 0 // offset 11. + }); +#endif } TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtendedTwice) { BytecodeManipulator instance( - { JUMP_ABSOLUTE, 248, JUMP_ABSOLUTE, 250, NOP, 0 }, + { JUMP_ABSOLUTE, 252, JUMP_ABSOLUTE, 254, NOP, 0 }, false, {}); ASSERT_TRUE(instance.InjectMethodCall(4, 12)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. VerifyBytecode( instance, { EXTENDED_ARG, // offset 0. 1, // offset 1. JUMP_ABSOLUTE, // offset 2. - 2, // offset 3. + 1, // offset 3. + EXTENDED_ARG, // offset 4. + 1, // offset 5. + JUMP_ABSOLUTE, // offset 6. + 3, // offset 7. + LOAD_CONST, // offset 8. + 12, // offset 9. + CALL_FUNCTION, // offset 10. + 0, // offset 11. + POP_TOP, // offset 12. + 0, // offset 13. + NOP, // offset 14. + 0 // offset 15. + }); +#else + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 1, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 6, // offset 3. EXTENDED_ARG, // offset 4. 1, // offset 5. JUMP_ABSOLUTE, // offset 6. - 4, // offset 7. + 8, // offset 7. LOAD_CONST, // offset 8. 12, // offset 9. CALL_FUNCTION, // offset 10. @@ -590,6 +673,7 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtendedTwice) { NOP, // offset 14. 0 // offset 15. }); +#endif } @@ -629,16 +713,16 @@ TEST(BytecodeManipulatorTest, InsertionMidInstruction) { TEST(BytecodeManipulatorTest, InsertionTooManyUpgrades) { BytecodeManipulator instance( { - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, NOP, 0 }, false, @@ -707,7 +791,7 @@ TEST(BytecodeManipulatorTest, LineNumbersTablePastEnd) { TEST(BytecodeManipulatorTest, LineNumbersTableUpgradeExtended) { BytecodeManipulator instance( - { JUMP_ABSOLUTE, 250, RETURN_VALUE, 0 }, + { JUMP_ABSOLUTE, 254, RETURN_VALUE, 0 }, true, { 2, 1, 2, 1 }); ASSERT_TRUE(instance.InjectMethodCall(2, 99)); @@ -724,7 +808,11 @@ TEST(BytecodeManipulatorTest, LineNumbersTableOverflow) { { 254, 1 }); ASSERT_TRUE(instance.InjectMethodCall(2, 99)); +#if PY_VERSION_HEX >= 0x030A0000 + VerifyLineNumbersTable(instance, { 254, 0, 6, 1 }); +#else VerifyLineNumbersTable(instance, { 255, 0, 5, 1 }); +#endif } diff --git a/tests/py/collector_test.py b/tests/py/collector_test.py index fe936ad..abc39b2 100644 --- a/tests/py/collector_test.py +++ b/tests/py/collector_test.py @@ -5,6 +5,7 @@ import inspect import logging import os +import sys import time from unittest import mock @@ -1428,7 +1429,8 @@ def testLogBytesQuota(self): def testMissingLogLevel(self): # Missing is equivalent to INFO. - log_collector = LogCollectorWithDefaultLocation({'logMessageFormat': 'hello'}) + log_collector = LogCollectorWithDefaultLocation( + {'logMessageFormat': 'hello'}) self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: hello')) @@ -1487,11 +1489,17 @@ def testBadExpression(self): 'expressions': ['-', '+'] }) self.assertIsNone(log_collector.Log(inspect.currentframe())) - self.assertTrue( - self._verifier.GotMessage( - 'LOGPOINT: a=, b=')) + if sys.version_info.minor < 10: + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: a=, b=')) + else: + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: a=, ' + 'b=')) def testDollarEscape(self): unused_integer = 12345 diff --git a/tests/py/module_search_test.py b/tests/py/module_search_test.py index 70c67b4..3a12c57 100644 --- a/tests/py/module_search_test.py +++ b/tests/py/module_search_test.py @@ -83,8 +83,7 @@ def testSearchSymLinkInSysPath(self): # Returned result should have a successful file match and symbolic # links should be kept. - self.assertEndsWith( - module_search.Search('b/first.py'), 'link/b/first.py') + self.assertEndsWith(module_search.Search('b/first.py'), 'link/b/first.py') finally: sys.path.remove(os.path.join(self._test_package_dir, 'link')) From bb3fe498e2acd53297ebfea019bf8a5d91bc8887 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Thu, 2 Feb 2023 10:35:23 -0500 Subject: [PATCH 227/241] chore: Release version 3.3 (#71) --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 267e47d..a61798c 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '3.2' +__version__ = '3.3' From 3d3e066a8d2841456bde28be3c0e3d9a0f836cad Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 17 Feb 2023 09:31:47 -0500 Subject: [PATCH 228/241] fix: handle negative line number deltas (#73) In Python 3.6-3.9, unlike in Python 2, co_lnotab's line number offsets can be negative and are represented by a signed integer. The Debugger agent had been using an unsigned integer which prevented it from being able to set breakpoints on lines of code in a code object after the line table had a negative offset. This PR also enables most of the previously disabled tests now that they work on all supported versions of Python. A couple still remain disabled and will be fixed later on. --- src/googleclouddebugger/module_explorer.py | 5 +- src/googleclouddebugger/python_util.cc | 2 +- ...n_test_disabled.py => integration_test.py} | 55 +++++++++---------- ...st_disabled.py => module_explorer_test.py} | 2 - ..._disabled.py => python_breakpoint_test.py} | 45 +++++++++------ 5 files changed, 59 insertions(+), 50 deletions(-) rename tests/py/{integration_test_disabled.py => integration_test.py} (94%) rename tests/py/{module_explorer_test_disabled.py => module_explorer_test.py} (99%) rename tests/py/{python_breakpoint_test_disabled.py => python_breakpoint_test.py} (95%) diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index 99829df..ac62ce4 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -62,7 +62,7 @@ def GetCodeObjectAtLine(module, line): prev_line = max(prev_line, co_line_number) elif co_line_number > line: next_line = min(next_line, co_line_number) - break + # Continue because line numbers may not be sequential. prev_line = None if prev_line == 0 else prev_line next_line = None if next_line == sys.maxsize else next_line @@ -87,6 +87,9 @@ def _GetLineNumbers(code_object): line_incrs = code_object.co_lnotab[1::2] current_line = code_object.co_firstlineno for line_incr in line_incrs: + if line_incr >= 0x80: + # line_incrs is an array of 8-bit signed integers + line_incr -= 0x100 current_line += line_incr yield current_line else: diff --git a/src/googleclouddebugger/python_util.cc b/src/googleclouddebugger/python_util.cc index e28a142..bc03bfc 100644 --- a/src/googleclouddebugger/python_util.cc +++ b/src/googleclouddebugger/python_util.cc @@ -79,7 +79,7 @@ bool CodeObjectLinesEnumerator::Next() { while (true) { offset_ += next_entry_[0]; - line_number_ += next_entry_[1]; + line_number_ += static_cast(next_entry_[1]); bool stop = ((next_entry_[0] != 0xFF) || (next_entry_[1] != 0)) && ((next_entry_[0] != 0) || (next_entry_[1] != 0xFF)); diff --git a/tests/py/integration_test_disabled.py b/tests/py/integration_test.py similarity index 94% rename from tests/py/integration_test_disabled.py rename to tests/py/integration_test.py index 12321c4..39067b0 100644 --- a/tests/py/integration_test_disabled.py +++ b/tests/py/integration_test.py @@ -1,7 +1,5 @@ """Complete tests of the debugger mocking the backend.""" -# TODO: Get this test to work well all supported versions of python. - from datetime import datetime from datetime import timedelta import functools @@ -20,7 +18,7 @@ import google.auth from absl.testing import absltest -from googleclouddebugger import capture_collector +from googleclouddebugger import collector from googleclouddebugger import labels import python_test_util @@ -98,7 +96,7 @@ def GetVal(): cdbg.enable() # Increase the polling rate to speed up the test. - cdbg._hub_client.min_interval_sec = 0.001 # Poll every 1 ms + cdbg.gcp_hub_client.min_interval_sec = 0.001 # Poll every 1 ms def SetBreakpoint(self, tag, template=None): """Sets a new breakpoint in this source file. @@ -214,7 +212,6 @@ def execute(self): # pylint: disable=invalid-name return FakeBreakpointUpdateCommand(self._incoming_breakpoint_updates) - # We only need to attach the debugger exactly once. The IntegrationTest class # is created for each test case, so we need to keep this state global. @@ -226,7 +223,7 @@ def _FakeLog(self, message, extra=None): def setUp(self): self._info_log = [] - capture_collector.log_info_message = self._FakeLog + collector.log_info_message = self._FakeLog def tearDown(self): IntegrationTest._hub.SetActiveBreakpoints([]) @@ -281,8 +278,8 @@ def testExistingLabelsPriority(self): def Trigger(): print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_PRIORITY - current_labels_collector = capture_collector.breakpoint_labels_collector - capture_collector.breakpoint_labels_collector = \ + current_labels_collector = collector.breakpoint_labels_collector + collector.breakpoint_labels_collector = \ lambda: {'label_1': 'value_1', 'label_2': 'value_2'} IntegrationTest._hub.SetBreakpoint( @@ -294,7 +291,7 @@ def Trigger(): Trigger() - capture_collector.breakpoint_labels_collector = current_labels_collector + collector.breakpoint_labels_collector = current_labels_collector # In this case, label_1 was in both the agent and the pre existing labels, # the pre existing value of value_foobar should be preserved. @@ -313,14 +310,14 @@ def Trigger(): print('Breakpoint trigger req id label') # BPTAG: REQUEST_LOG_ID_LABEL current_request_log_id_collector = \ - capture_collector.request_log_id_collector - capture_collector.request_log_id_collector = lambda: 'foo_bar_id' + collector.request_log_id_collector + collector.request_log_id_collector = lambda: 'foo_bar_id' IntegrationTest._hub.SetBreakpoint('REQUEST_LOG_ID_LABEL') Trigger() - capture_collector.request_log_id_collector = \ + collector.request_log_id_collector = \ current_request_log_id_collector result = IntegrationTest._hub.GetNextResult() @@ -559,23 +556,25 @@ def Method(a): }, python_test_util.PackFrameVariable(result, 'x', frame=1)) return x - def testRecursion(self): - - def RecursiveMethod(i): - if i == 0: - return 0 # BPTAG: RECURSION - return RecursiveMethod(i - 1) - - IntegrationTest._hub.SetBreakpoint('RECURSION') - RecursiveMethod(5) - result = IntegrationTest._hub.GetNextResult() - for frame in range(5): - self.assertEqual({ - 'name': 'i', - 'value': str(frame), - 'type': 'int' - }, python_test_util.PackFrameVariable(result, 'i', frame, 'arguments')) +# FIXME: Broken in Python 3.10 +# def testRecursion(self): +# +# def RecursiveMethod(i): +# if i == 0: +# return 0 # BPTAG: RECURSION +# return RecursiveMethod(i - 1) +# +# IntegrationTest._hub.SetBreakpoint('RECURSION') +# RecursiveMethod(5) +# result = IntegrationTest._hub.GetNextResult() +# +# for frame in range(5): +# self.assertEqual({ +# 'name': 'i', +# 'value': str(frame), +# 'type': 'int' +# }, python_test_util.PackFrameVariable(result, 'i', frame, 'arguments')) def testWatchedExpressions(self): diff --git a/tests/py/module_explorer_test_disabled.py b/tests/py/module_explorer_test.py similarity index 99% rename from tests/py/module_explorer_test_disabled.py rename to tests/py/module_explorer_test.py index b7542e1..4e1a42c 100644 --- a/tests/py/module_explorer_test_disabled.py +++ b/tests/py/module_explorer_test.py @@ -1,7 +1,5 @@ """Unit test for module_explorer module.""" -# TODO: Get this test to run properly on all supported versions of Python - import dis import inspect import os diff --git a/tests/py/python_breakpoint_test_disabled.py b/tests/py/python_breakpoint_test.py similarity index 95% rename from tests/py/python_breakpoint_test_disabled.py rename to tests/py/python_breakpoint_test.py index a337d12..153a2ba 100644 --- a/tests/py/python_breakpoint_test_disabled.py +++ b/tests/py/python_breakpoint_test.py @@ -1,7 +1,5 @@ """Unit test for python_breakpoint module.""" -# TODO: Get this test to work with all supported versions of Python. - from datetime import datetime from datetime import timedelta import inspect @@ -12,7 +10,7 @@ from absl.testing import absltest from googleclouddebugger import cdbg_native as native -from googleclouddebugger import imphook2 +from googleclouddebugger import imphook from googleclouddebugger import python_breakpoint import python_test_util @@ -102,7 +100,7 @@ def testDeferredBreakpoint(self): self._update_queue[0]['stackFrames'][0]['function']) self.assertTrue(self._update_queue[0]['isFinalState']) - self.assertEmpty(imphook2._import_callbacks) + self.assertEmpty(imphook._import_callbacks) # Old module search algorithm rejects multiple matches. This test verifies # that the new module search algorithm searches sys.path sequentially, and @@ -142,7 +140,7 @@ def testSearchUsingSysPathOrder(self): self.assertEqual( '2', self._update_queue[0]['stackFrames'][0]['locals'][0]['value']) - self.assertEmpty(imphook2._import_callbacks) + self.assertEmpty(imphook._import_callbacks) # Old module search algorithm rejects multiple matches. This test verifies # that when the new module search cannot find any match in sys.path, it @@ -183,7 +181,7 @@ def testMultipleDeferredMatches(self): self.assertEqual( '1', self._update_queue[0]['stackFrames'][0]['locals'][0]['value']) - self.assertEmpty(imphook2._import_callbacks) + self.assertEmpty(imphook._import_callbacks) def testNeverLoadedBreakpoint(self): open(os.path.join(self._test_package_dir, 'never_print.py'), 'w').close() @@ -223,7 +221,7 @@ def testDeferredNoCodeAtLine(self): params = desc['parameters'] self.assertIn('defer_empty.py', params[1]) self.assertEqual(params[0], '10') - self.assertEmpty(imphook2._import_callbacks) + self.assertEmpty(imphook._import_callbacks) def testDeferredBreakpointCancelled(self): open(os.path.join(self._test_package_dir, 'defer_cancel.py'), 'w').close() @@ -236,7 +234,7 @@ def testDeferredBreakpointCancelled(self): breakpoint.Clear() self.assertFalse(self._completed) - self.assertEmpty(imphook2._import_callbacks) + self.assertEmpty(imphook._import_callbacks) unused_no_code_line_above = 0 # BPTAG: NO_CODE_LINE_ABOVE # BPTAG: NO_CODE_LINE @@ -345,7 +343,7 @@ def testNonRootInitFile(self): self.assertEqual('DoPrint', self._update_queue[0]['stackFrames'][0]['function']) - self.assertEmpty(imphook2._import_callbacks) + self.assertEmpty(imphook._import_callbacks) self._update_queue = [] def testBreakpointInLoadedPackageFile(self): @@ -414,15 +412,26 @@ def testInvalidCondition(self): self.assertEqual(set(['BP_ID']), self._completed) self.assertLen(self._update_queue, 1) self.assertTrue(self._update_queue[0]['isFinalState']) - self.assertEqual( - { - 'isError': True, - 'refersTo': 'BREAKPOINT_CONDITION', - 'description': { - 'format': 'Expression could not be compiled: $0', - 'parameters': ['unexpected EOF while parsing'] - } - }, self._update_queue[0]['status']) + if sys.version_info.minor < 10: + self.assertEqual( + { + 'isError': True, + 'refersTo': 'BREAKPOINT_CONDITION', + 'description': { + 'format': 'Expression could not be compiled: $0', + 'parameters': ['unexpected EOF while parsing'] + } + }, self._update_queue[0]['status']) + else: + self.assertEqual( + { + 'isError': True, + 'refersTo': 'BREAKPOINT_CONDITION', + 'description': { + 'format': 'Expression could not be compiled: $0', + 'parameters': ['invalid syntax'] + } + }, self._update_queue[0]['status']) def testHit(self): breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, From 7d09b9ab99410d8ffe7218a21ea9386eb51d34f2 Mon Sep 17 00:00:00 2001 From: jasonborg <48138260+jasonborg@users.noreply.github.com> Date: Tue, 21 Feb 2023 19:20:15 +0000 Subject: [PATCH 229/241] fix: Breakpoint Expiry in Firebase Backend Version (#74) - The firebase backend client code now calls on_idle periodically - The breakpoint expiration code now checks for the `createTimeUnixMsec` breakpoint field Fixes #72 --- src/googleclouddebugger/firebase_client.py | 6 ++++ src/googleclouddebugger/python_breakpoint.py | 36 ++++++++++++++++---- tests/py/python_breakpoint_test.py | 28 +++++++++++++++ tests/py/python_test_util.py | 4 +++ 4 files changed, 68 insertions(+), 6 deletions(-) diff --git a/src/googleclouddebugger/firebase_client.py b/src/googleclouddebugger/firebase_client.py index be59a3c..474211a 100644 --- a/src/googleclouddebugger/firebase_client.py +++ b/src/googleclouddebugger/firebase_client.py @@ -311,6 +311,12 @@ def _MainThreadProc(self): self._StartMarkActiveTimer() + while not self._shutdown: + if self.on_idle is not None: + self.on_idle() + + time.sleep(1) + def _TransmissionThreadProc(self): """Entry point for the transmission worker thread.""" diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index 1339ebe..c65b95d 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -252,16 +252,40 @@ def GetBreakpointId(self): return self.definition['id'] def GetExpirationTime(self): - """Computes the timestamp at which this breakpoint will expire.""" - # TODO: Move this to a common method. - if '.' not in self.definition['createTime']: + """Computes the timestamp at which this breakpoint will expire. + + If no creation time can be found an expiration time in the past will be + used. + """ + return self.GetCreateTime() + self.expiration_period + + def GetCreateTime(self): + """Retrieves the creation time of this breakpoint. + + If no creation time can be found a creation time in the past will be used. + """ + if 'createTime' in self.definition: + return self.GetTimeFromRfc3339Str(self.definition['createTime']) + else: + return self.GetTimeFromUnixMsec( + self.definition.get('createTimeUnixMsec', 0)) + + def GetTimeFromRfc3339Str(self, rfc3339_str): + if '.' not in rfc3339_str: fmt = '%Y-%m-%dT%H:%M:%S%Z' else: fmt = '%Y-%m-%dT%H:%M:%S.%f%Z' - create_datetime = datetime.strptime( - self.definition['createTime'].replace('Z', 'UTC'), fmt) - return create_datetime + self.expiration_period + return datetime.strptime(rfc3339_str.replace('Z', 'UTC'), fmt) + + def GetTimeFromUnixMsec(self, unix_msec): + try: + return datetime.fromtimestamp(unix_msec / 1000) + except (TypeError, ValueError, OSError, OverflowError) as e: + native.LogWarning( + 'Unexpected error (%s) occured processing unix_msec %s, breakpoint: %s' + % (repr(e), str(unix_msec), self.GetBreakpointId())) + return datetime.fromtimestamp(0) def ExpireBreakpoint(self): """Expires this breakpoint.""" diff --git a/tests/py/python_breakpoint_test.py b/tests/py/python_breakpoint_test.py index 153a2ba..6aff9c4 100644 --- a/tests/py/python_breakpoint_test.py +++ b/tests/py/python_breakpoint_test.py @@ -457,6 +457,22 @@ def testHitNewTimestamp(self): self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) self.assertTrue(self._update_queue[0]['isFinalState']) + def testHitTimestampUnixMsec(self): + # Using the Snapshot Debugger (Firebase backend) version of creation time + self._template.pop('createTime', None); + self._template[ + 'createTimeUnixMsec'] = python_test_util.DateTimeToUnixMsec( + self._base_time) + + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) + breakpoint._BreakpointEvent(native.BREAKPOINT_EVENT_HIT, + inspect.currentframe()) + self.assertEqual(set(['BP_ID']), self._completed) + self.assertLen(self._update_queue, 1) + self.assertGreater(len(self._update_queue[0]['stackFrames']), 3) + self.assertTrue(self._update_queue[0]['isFinalState']) + def testDoubleHit(self): breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, None) @@ -541,6 +557,18 @@ def testExpirationTime(self): self.assertEqual( datetime(year=2015, month=1, day=2), breakpoint.GetExpirationTime()) + def testExpirationTimeUnixMsec(self): + # Using the Snapshot Debugger (Firebase backend) version of creation time + self._template.pop('createTime', None); + self._template[ + 'createTimeUnixMsec'] = python_test_util.DateTimeToUnixMsec( + self._base_time) + breakpoint = python_breakpoint.PythonBreakpoint(self._template, self, self, + None) + breakpoint.Clear() + self.assertEqual( + self._base_time + timedelta(hours=24), breakpoint.GetExpirationTime()) + def testExpirationTimeWithExpiresIn(self): definition = self._template.copy() definition['expires_in'] = { diff --git a/tests/py/python_test_util.py b/tests/py/python_test_util.py index 26d1aef..ffac4da 100644 --- a/tests/py/python_test_util.py +++ b/tests/py/python_test_util.py @@ -106,6 +106,10 @@ def DateTimeToTimestampNew(t): """ return t.strftime('%Y-%m-%dT%H:%M:%S') + 'Z' +def DateTimeToUnixMsec(t): + """Returns the Unix time as in integer value in milliseconds""" + return int(t.timestamp() * 1000) + def PackFrameVariable(breakpoint, name, frame=0, collection='locals'): """Finds local variable or argument by name. From c4b54d448ec1ac2b6bf70ddb0dd3def379a17ff4 Mon Sep 17 00:00:00 2001 From: jasonborg <48138260+jasonborg@users.noreply.github.com> Date: Tue, 21 Feb 2023 21:19:22 +0000 Subject: [PATCH 230/241] fix: Cleanup logs when Firebase RTDB does no exist (#76) The emitted logs when the DB does not exist will now resemble: ``` I0221 20:25:54.661298 93984 firebase_client.py:402] Failed to check debuggee presence at cdbg/debuggees/d-93910f74/registrationTimeUnixMsec: NotFoundError('404 Not Found') I0221 20:25:54.661362 93984 firebase_client.py:373] registering at https://my-project-cdbg.firebaseio.com, path: cdbg/debuggees/d-93910f74 I0221 20:25:54.690697 93984 firebase_client.py:390] Failed to register debuggee: NotFoundError('404 Not Found') ``` --- src/googleclouddebugger/firebase_client.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/googleclouddebugger/firebase_client.py b/src/googleclouddebugger/firebase_client.py index 474211a..01afccd 100644 --- a/src/googleclouddebugger/firebase_client.py +++ b/src/googleclouddebugger/firebase_client.py @@ -382,10 +382,10 @@ def _RegisterDebuggee(self): self.register_backoff.Succeeded() return (False, 0) # Proceed immediately to subscribing to breakpoints. - except BaseException: + except BaseException as e: # There is no significant benefit to handing different exceptions # in different ways; we will log and retry regardless. - native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}') + native.LogInfo(f'Failed to register debuggee: {repr(e)}') return (True, self.register_backoff.Failed()) def _CheckDebuggeePresence(self): @@ -394,9 +394,8 @@ def _CheckDebuggeePresence(self): snapshot = firebase_admin.db.reference(path).get() # The value doesn't matter; just return true if there's any value. return snapshot is not None - except BaseException: - native.LogInfo( - f'Failed to check debuggee presence: {traceback.format_exc()}') + except BaseException as e: + native.LogInfo(f'Failed to check debuggee presence at {path}: {repr(e)}') return False def _MarkDebuggeeActive(self): From e0c48db8aad24984c3772720f542f2720d985fb4 Mon Sep 17 00:00:00 2001 From: jasonborg <48138260+jasonborg@users.noreply.github.com> Date: Tue, 28 Feb 2023 18:47:07 +0000 Subject: [PATCH 231/241] feat: Support default-rtdb instance. (#78) Fixes #77 --- src/googleclouddebugger/firebase_client.py | 99 +++++++--- tests/py/firebase_client_test.py | 208 +++++++++++++++------ 2 files changed, 232 insertions(+), 75 deletions(-) diff --git a/src/googleclouddebugger/firebase_client.py b/src/googleclouddebugger/firebase_client.py index 01afccd..8dbe30a 100644 --- a/src/googleclouddebugger/firebase_client.py +++ b/src/googleclouddebugger/firebase_client.py @@ -106,8 +106,10 @@ def __init__(self): self._mark_active_interval_sec = 60 * 60 # 1 hour in seconds self._new_updates = threading.Event() self._breakpoint_subscription = None + self._firebase_app = None # Events for unit testing. + self.connection_complete = threading.Event() self.registration_complete = threading.Event() self.subscription_complete = threading.Event() @@ -116,6 +118,7 @@ def __init__(self): # # Delay before retrying failed request. + self.connect_backoff = backoff.Backoff() # Connect to the DB. self.register_backoff = backoff.Backoff() # Register debuggee. self.subscribe_backoff = backoff.Backoff() # Subscribe to updates. self.update_backoff = backoff.Backoff() # Update breakpoint. @@ -193,7 +196,10 @@ def SetupAuth(self, service_account_json_file: JSON file to use for credentials. If not provided, will default to application default credentials. database_url: Firebase realtime database URL to be used. If not - provided, will default to https://{project_id}-cdbg.firebaseio.com + provided, connect attempts to the following DBs will be made, in + order: + https://{project_id}-cdbg.firebaseio.com + https://{project_id}-default-rtdb.firebaseio.com Raises: NoProjectIdError: If the project id cannot be determined. """ @@ -220,11 +226,7 @@ def SetupAuth(self, 'Please specify the project id using the --project_id flag.') self._project_id = project_id - - if database_url: - self._database_url = database_url - else: - self._database_url = f'https://{self._project_id}-cdbg.firebaseio.com' + self._database_url = database_url def Start(self): """Starts the worker thread.""" @@ -287,15 +289,11 @@ def _MainThreadProc(self): which will run in its own thread. That thread will be owned by self._breakpoint_subscription. """ - # Note: if self._credentials is None, default app credentials will be used. - try: - firebase_admin.initialize_app(self._credentials, - {'databaseURL': self._database_url}) - except ValueError: - native.LogWarning( - f'Failed to initialize firebase: {traceback.format_exc()}') - native.LogError('Failed to start debugger agent. Giving up.') - return + connection_required, delay = True, 0 + while connection_required: + time.sleep(delay) + connection_required, delay = self._ConnectToDb() + self.connection_complete.set() registration_required, delay = True, 0 while registration_required: @@ -343,6 +341,45 @@ def _StartMarkActiveTimer(self): self._MarkActiveTimerFunc) self._mark_active_timer.start() + def _ConnectToDb(self): + urls = [self._database_url] if self._database_url is not None else \ + [f'https://{self._project_id}-cdbg.firebaseio.com', + f'https://{self._project_id}-default-rtdb.firebaseio.com'] + + for url in urls: + native.LogInfo(f'Attempting to connect to DB with url: {url}') + + status, firebase_app = self._TryInitializeDbForUrl(url) + if status: + native.LogInfo(f'Successfully connected to DB with url: {url}') + self._database_url = url + self._firebase_app = firebase_app + self.connect_backoff.Succeeded() + return (False, 0) # Proceed immediately to registering the debuggee. + + return (True, self.connect_backoff.Failed()) + + def _TryInitializeDbForUrl(self, database_url): + # Note: if self._credentials is None, default app credentials will be used. + app = None + try: + app = firebase_admin.initialize_app( + self._credentials, {'databaseURL': database_url}, name='cdbg') + + if self._CheckSchemaVersionPresence(app): + return True, app + + except ValueError: + native.LogWarning( + f'Failed to initialize firebase: {traceback.format_exc()}') + + # This is the failure path, if we hit here we must cleanup the app handle + if app is not None: + firebase_admin.delete_app(app) + app = None + + return False, app + def _RegisterDebuggee(self): """Single attempt to register the debuggee. @@ -371,11 +408,12 @@ def _RegisterDebuggee(self): else: debuggee_path = f'cdbg/debuggees/{self._debuggee_id}' native.LogInfo( - f'registering at {self._database_url}, path: {debuggee_path}') + f'Registering at {self._database_url}, path: {debuggee_path}') debuggee_data = copy.deepcopy(debuggee) debuggee_data['registrationTimeUnixMsec'] = {'.sv': 'timestamp'} debuggee_data['lastUpdateTimeUnixMsec'] = {'.sv': 'timestamp'} - firebase_admin.db.reference(debuggee_path).set(debuggee_data) + firebase_admin.db.reference(debuggee_path, + self._firebase_app).set(debuggee_data) native.LogInfo( f'Debuggee registered successfully, ID: {self._debuggee_id}') @@ -388,10 +426,21 @@ def _RegisterDebuggee(self): native.LogInfo(f'Failed to register debuggee: {repr(e)}') return (True, self.register_backoff.Failed()) + def _CheckSchemaVersionPresence(self, firebase_app): + path = f'cdbg/schema_version' + try: + snapshot = firebase_admin.db.reference(path, firebase_app).get() + # The value doesn't matter; just return true if there's any value. + return snapshot is not None + except BaseException as e: + native.LogInfo( + f'Failed to check schema version presence at {path}: {repr(e)}') + return False + def _CheckDebuggeePresence(self): path = f'cdbg/debuggees/{self._debuggee_id}/registrationTimeUnixMsec' try: - snapshot = firebase_admin.db.reference(path).get() + snapshot = firebase_admin.db.reference(path, self._firebase_app).get() # The value doesn't matter; just return true if there's any value. return snapshot is not None except BaseException as e: @@ -402,7 +451,8 @@ def _MarkDebuggeeActive(self): active_path = f'cdbg/debuggees/{self._debuggee_id}/lastUpdateTimeUnixMsec' try: server_time = {'.sv': 'timestamp'} - firebase_admin.db.reference(active_path).set(server_time) + firebase_admin.db.reference(active_path, + self._firebase_app).set(server_time) except BaseException: native.LogInfo( f'Failed to mark debuggee active: {traceback.format_exc()}') @@ -415,7 +465,7 @@ def _SubscribeToBreakpoints(self): path = f'cdbg/breakpoints/{self._debuggee_id}/active' native.LogInfo(f'Subscribing to breakpoint updates at {path}') - ref = firebase_admin.db.reference(path) + ref = firebase_admin.db.reference(path, self._firebase_app) try: self._breakpoint_subscription = ref.listen(self._ActiveBreakpointCallback) return (False, 0) @@ -508,7 +558,8 @@ def _TransmitBreakpointUpdates(self): # First, remove from the active breakpoints. bp_ref = firebase_admin.db.reference( - f'cdbg/breakpoints/{self._debuggee_id}/active/{bp_id}') + f'cdbg/breakpoints/{self._debuggee_id}/active/{bp_id}', + self._firebase_app) bp_ref.delete() summary_data = breakpoint_data @@ -516,7 +567,8 @@ def _TransmitBreakpointUpdates(self): if is_snapshot: # Note that there may not be snapshot data. bp_ref = firebase_admin.db.reference( - f'cdbg/breakpoints/{self._debuggee_id}/snapshot/{bp_id}') + f'cdbg/breakpoints/{self._debuggee_id}/snapshot/{bp_id}', + self._firebase_app) bp_ref.set(breakpoint_data) # Now strip potential snapshot data. @@ -527,7 +579,8 @@ def _TransmitBreakpointUpdates(self): # Then add it to the list of final breakpoints. bp_ref = firebase_admin.db.reference( - f'cdbg/breakpoints/{self._debuggee_id}/final/{bp_id}') + f'cdbg/breakpoints/{self._debuggee_id}/final/{bp_id}', + self._firebase_app) bp_ref.set(summary_data) native.LogInfo(f'Breakpoint {bp_id} update transmitted successfully') diff --git a/tests/py/firebase_client_test.py b/tests/py/firebase_client_test.py index 5cd8fb6..d72c68c 100644 --- a/tests/py/firebase_client_test.py +++ b/tests/py/firebase_client_test.py @@ -6,6 +6,7 @@ import tempfile import time from unittest import mock +from unittest.mock import ANY from unittest.mock import MagicMock from unittest.mock import call from unittest.mock import patch @@ -20,6 +21,7 @@ import firebase_admin.credentials from firebase_admin.exceptions import FirebaseError +from firebase_admin.exceptions import NotFoundError TEST_PROJECT_ID = 'test-project-id' METADATA_PROJECT_URL = ('http://metadata.google.internal/computeMetadata/' @@ -61,8 +63,8 @@ def setUp(self): # Speed up the delays for retry loops. for backoff in [ - self._client.register_backoff, self._client.subscribe_backoff, - self._client.update_backoff + self._client.connect_backoff, self._client.register_backoff, + self._client.subscribe_backoff, self._client.update_backoff ]: backoff.min_interval_sec /= 100000.0 backoff.max_interval_sec /= 100000.0 @@ -73,19 +75,33 @@ def setUp(self): self._mock_initialize_app = patcher.start() self.addCleanup(patcher.stop) + patcher = patch('firebase_admin.delete_app') + self._mock_delete_app = patcher.start() + self.addCleanup(patcher.stop) + patcher = patch('firebase_admin.db.reference') self._mock_db_ref = patcher.start() self.addCleanup(patcher.stop) # Set up the mocks for the database refs. + self._firebase_app = 'FIREBASE_APP_HANDLE' + self._mock_initialize_app.return_value = self._firebase_app + self._mock_schema_version_ref = MagicMock() + self._mock_schema_version_ref.get.return_value = "2" self._mock_presence_ref = MagicMock() self._mock_presence_ref.get.return_value = None self._mock_active_ref = MagicMock() self._mock_register_ref = MagicMock() self._fake_subscribe_ref = FakeReference() + + # Setup common happy path reference sequence: + # cdbg/schema_version + # cdbg/debuggees/{debuggee_id}/registrationTimeUnixMsec + # cdbg/debuggees/{debuggee_id} + # cdbg/breakpoints/{debuggee_id}/active self._mock_db_ref.side_effect = [ - self._mock_presence_ref, self._mock_register_ref, - self._fake_subscribe_ref + self._mock_schema_version_ref, self._mock_presence_ref, + self._mock_register_ref, self._fake_subscribe_ref ] def tearDown(self): @@ -100,8 +116,6 @@ def testSetupAuthDefault(self): self._client.SetupAuth() self.assertEqual(TEST_PROJECT_ID, self._client._project_id) - self.assertEqual(f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com', - self._client._database_url) def testSetupAuthOverrideProjectIdNumber(self): # If a project id is provided, we use it. @@ -109,8 +123,6 @@ def testSetupAuthOverrideProjectIdNumber(self): self._client.SetupAuth(project_id=project_id) self.assertEqual(project_id, self._client._project_id) - self.assertEqual(f'https://{project_id}-cdbg.firebaseio.com', - self._client._database_url) def testSetupAuthServiceAccountJsonAuth(self): # We'll load credentials from the provided file (mocked for simplicity) @@ -142,11 +154,14 @@ def testStart(self): debuggee_id = self._client._debuggee_id self._mock_initialize_app.assert_called_with( - None, {'databaseURL': f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com'}) + None, {'databaseURL': f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com'}, + name='cdbg') self.assertEqual([ - call(f'cdbg/debuggees/{debuggee_id}/registrationTimeUnixMsec'), - call(f'cdbg/debuggees/{debuggee_id}'), - call(f'cdbg/breakpoints/{debuggee_id}/active') + call(f'cdbg/schema_version', self._firebase_app), + call(f'cdbg/debuggees/{debuggee_id}/registrationTimeUnixMsec', + self._firebase_app), + call(f'cdbg/debuggees/{debuggee_id}', self._firebase_app), + call(f'cdbg/breakpoints/{debuggee_id}/active', self._firebase_app) ], self._mock_db_ref.call_args_list) # Verify that the register call has been made. @@ -155,13 +170,97 @@ def testStart(self): expected_data['lastUpdateTimeUnixMsec'] = {'.sv': 'timestamp'} self._mock_register_ref.set.assert_called_once_with(expected_data) + def testStartCustomDbUrlConfigured(self): + self._client.SetupAuth( + project_id=TEST_PROJECT_ID, + database_url='https://custom-db.firebaseio.com') + self._client.Start() + self._client.connection_complete.wait() + + debuggee_id = self._client._debuggee_id + + self._mock_initialize_app.assert_called_once_with( + None, {'databaseURL': 'https://custom-db.firebaseio.com'}, name='cdbg') + + def testStartConnectFallsBackToDefaultRtdb(self): + # A new schema_version ref will be fetched each time + self._mock_db_ref.side_effect = [ + self._mock_schema_version_ref, self._mock_schema_version_ref, + self._mock_presence_ref, self._mock_register_ref, + self._fake_subscribe_ref + ] + + # Fail on the '-cdbg' instance test, succeed on the '-default-rtdb' one. + self._mock_schema_version_ref.get.side_effect = [ + NotFoundError("Not found", http_response=404), '2' + ] + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.connection_complete.wait() + + self.assertEqual([ + call( + None, + {'databaseURL': f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com'}, + name='cdbg'), + call( + None, { + 'databaseURL': + f'https://{TEST_PROJECT_ID}-default-rtdb.firebaseio.com' + }, + name='cdbg') + ], self._mock_initialize_app.call_args_list) + + self.assertEqual(1, self._mock_delete_app.call_count) + + def testStartConnectFailsThenSucceeds(self): + # A new schema_version ref will be fetched each time + self._mock_db_ref.side_effect = [ + self._mock_schema_version_ref, self._mock_schema_version_ref, + self._mock_schema_version_ref, self._mock_presence_ref, + self._mock_register_ref, self._fake_subscribe_ref + ] + + # Completely fail on the initial attempt at reaching a DB, then succeed on + # 2nd attempt. One full attempt will try the '-cdbg' db instance followed by + # the '-default-rtdb' one. + self._mock_schema_version_ref.get.side_effect = [ + NotFoundError("Not found", http_response=404), + NotFoundError("Not found", http_response=404), '2' + ] + + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.connection_complete.wait() + + self.assertEqual([ + call( + None, + {'databaseURL': f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com'}, + name='cdbg'), + call( + None, { + 'databaseURL': + f'https://{TEST_PROJECT_ID}-default-rtdb.firebaseio.com' + }, + name='cdbg'), + call( + None, + {'databaseURL': f'https://{TEST_PROJECT_ID}-cdbg.firebaseio.com'}, + name='cdbg') + ], self._mock_initialize_app.call_args_list) + + self.assertEqual(2, self._mock_delete_app.call_count) + def testStartAlreadyPresent(self): # Create a mock for just this test that claims the debuggee is registered. mock_presence_ref = MagicMock() mock_presence_ref.get.return_value = 'present!' self._mock_db_ref.side_effect = [ - mock_presence_ref, self._mock_active_ref, self._fake_subscribe_ref + self._mock_schema_version_ref, mock_presence_ref, self._mock_active_ref, + self._fake_subscribe_ref ] self._client.SetupAuth(project_id=TEST_PROJECT_ID) @@ -171,9 +270,12 @@ def testStartAlreadyPresent(self): debuggee_id = self._client._debuggee_id self.assertEqual([ - call(f'cdbg/debuggees/{debuggee_id}/registrationTimeUnixMsec'), - call(f'cdbg/debuggees/{debuggee_id}/lastUpdateTimeUnixMsec'), - call(f'cdbg/breakpoints/{debuggee_id}/active') + call(f'cdbg/schema_version', self._firebase_app), + call(f'cdbg/debuggees/{debuggee_id}/registrationTimeUnixMsec', + self._firebase_app), + call(f'cdbg/debuggees/{debuggee_id}/lastUpdateTimeUnixMsec', + self._firebase_app), + call(f'cdbg/breakpoints/{debuggee_id}/active', self._firebase_app) ], self._mock_db_ref.call_args_list) # Verify that the register call has been made. @@ -182,9 +284,9 @@ def testStartAlreadyPresent(self): def testStartRegisterRetry(self): # A new set of db refs are fetched on each retry. self._mock_db_ref.side_effect = [ - self._mock_presence_ref, self._mock_register_ref, - self._mock_presence_ref, self._mock_register_ref, - self._fake_subscribe_ref + self._mock_schema_version_ref, self._mock_presence_ref, + self._mock_register_ref, self._mock_presence_ref, + self._mock_register_ref, self._fake_subscribe_ref ] # Fail once, then succeed on retry. @@ -202,6 +304,7 @@ def testStartSubscribeRetry(self): # A new db ref is fetched on each retry. self._mock_db_ref.side_effect = [ + self._mock_schema_version_ref, self._mock_presence_ref, self._mock_register_ref, mock_subscribe_ref, # Fail the first time @@ -212,28 +315,27 @@ def testStartSubscribeRetry(self): self._client.Start() self._client.subscription_complete.wait() - self.assertEqual(4, self._mock_db_ref.call_count) + self.assertEqual(5, self._mock_db_ref.call_count) def testMarkActiveTimer(self): - # Make sure that there are enough refs queued up. - refs = list(self._mock_db_ref.side_effect) - refs.extend([self._mock_active_ref] * 10) - self._mock_db_ref.side_effect = refs - - # Speed things WAY up rather than waiting for hours. - self._client._mark_active_interval_sec = 0.1 + # Make sure that there are enough refs queued up. + refs = list(self._mock_db_ref.side_effect) + refs.extend([self._mock_active_ref] * 10) + self._mock_db_ref.side_effect = refs - self._client.SetupAuth(project_id=TEST_PROJECT_ID) - self._client.Start() - self._client.subscription_complete.wait() + # Speed things WAY up rather than waiting for hours. + self._client._mark_active_interval_sec = 0.1 - # wait long enough for the timer to trigger a few times. - time.sleep(0.5) + self._client.SetupAuth(project_id=TEST_PROJECT_ID) + self._client.Start() + self._client.subscription_complete.wait() - print(f'Timer triggered {self._mock_active_ref.set.call_count} times') - self.assertTrue(self._mock_active_ref.set.call_count > 3) - self._mock_active_ref.set.assert_called_with({'.sv': 'timestamp'}) + # wait long enough for the timer to trigger a few times. + time.sleep(0.5) + print(f'Timer triggered {self._mock_active_ref.set.call_count} times') + self.assertTrue(self._mock_active_ref.set.call_count > 3) + self._mock_active_ref.set.assert_called_with({'.sv': 'timestamp'}) def testBreakpointSubscription(self): # This class will keep track of the breakpoint updates and will check @@ -310,9 +412,9 @@ def testEnqueueBreakpointUpdate(self): final_ref_mock = MagicMock() self._mock_db_ref.side_effect = [ - self._mock_presence_ref, self._mock_register_ref, - self._fake_subscribe_ref, active_ref_mock, snapshot_ref_mock, - final_ref_mock + self._mock_schema_version_ref, self._mock_presence_ref, + self._mock_register_ref, self._fake_subscribe_ref, active_ref_mock, + snapshot_ref_mock, final_ref_mock ] self._client.SetupAuth(project_id=TEST_PROJECT_ID) @@ -369,14 +471,14 @@ def testEnqueueBreakpointUpdate(self): db_ref_calls = self._mock_db_ref.call_args_list self.assertEqual( - call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}'), - db_ref_calls[3]) + call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}', + self._firebase_app), db_ref_calls[4]) self.assertEqual( - call(f'cdbg/breakpoints/{debuggee_id}/snapshot/{breakpoint_id}'), - db_ref_calls[4]) + call(f'cdbg/breakpoints/{debuggee_id}/snapshot/{breakpoint_id}', + self._firebase_app), db_ref_calls[5]) self.assertEqual( - call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}'), - db_ref_calls[5]) + call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}', + self._firebase_app), db_ref_calls[6]) active_ref_mock.delete.assert_called_once() snapshot_ref_mock.set.assert_called_once_with(full_breakpoint) @@ -387,8 +489,9 @@ def testEnqueueBreakpointUpdateWithLogpoint(self): final_ref_mock = MagicMock() self._mock_db_ref.side_effect = [ - self._mock_presence_ref, self._mock_register_ref, - self._fake_subscribe_ref, active_ref_mock, final_ref_mock + self._mock_schema_version_ref, self._mock_presence_ref, + self._mock_register_ref, self._fake_subscribe_ref, active_ref_mock, + final_ref_mock ] self._client.SetupAuth(project_id=TEST_PROJECT_ID) @@ -436,19 +539,19 @@ def testEnqueueBreakpointUpdateWithLogpoint(self): db_ref_calls = self._mock_db_ref.call_args_list self.assertEqual( - call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}'), - db_ref_calls[3]) + call(f'cdbg/breakpoints/{debuggee_id}/active/{breakpoint_id}', + self._firebase_app), db_ref_calls[4]) self.assertEqual( - call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}'), - db_ref_calls[4]) + call(f'cdbg/breakpoints/{debuggee_id}/final/{breakpoint_id}', + self._firebase_app), db_ref_calls[5]) active_ref_mock.delete.assert_called_once() final_ref_mock.set.assert_called_once_with(output_breakpoint) # Make sure that the snapshot node was not accessed. self.assertTrue( - call(f'cdbg/breakpoints/{debuggee_id}/snapshot/{breakpoint_id}') not in - db_ref_calls) + call(f'cdbg/breakpoints/{debuggee_id}/snapshot/{breakpoint_id}', ANY) + not in db_ref_calls) def testEnqueueBreakpointUpdateRetry(self): active_ref_mock = MagicMock() @@ -468,6 +571,7 @@ def testEnqueueBreakpointUpdateRetry(self): ] self._mock_db_ref.side_effect = [ + self._mock_schema_version_ref, self._mock_presence_ref, self._mock_register_ref, self._fake_subscribe_ref, # setup From 42f5fc31a1d4c1d8a91eaa4d6c51a0773dae7d93 Mon Sep 17 00:00:00 2001 From: jasonborg <48138260+jasonborg@users.noreply.github.com> Date: Wed, 1 Mar 2023 18:26:52 +0000 Subject: [PATCH 232/241] fix: Module not found corner case (#80) Fixes #79 --- src/googleclouddebugger/module_utils.py | 25 ++++++++++++++++++++ src/googleclouddebugger/python_breakpoint.py | 15 +----------- tests/py/module_utils_test.py | 7 ++++++ 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/googleclouddebugger/module_utils.py b/src/googleclouddebugger/module_utils.py index 738fc8c..53f2e37 100644 --- a/src/googleclouddebugger/module_utils.py +++ b/src/googleclouddebugger/module_utils.py @@ -16,6 +16,26 @@ import os import sys +def NormalizePath(path): + """Normalizes a path. + + E.g. One example is it will convert "/a/b/./c" -> "/a/b/c" + """ + # TODO: Calling os.path.normpath "may change the meaning of a + # path that contains symbolic links" (e.g., "A/foo/../B" != "A/B" if foo is a + # symlink). This might cause trouble when matching against loaded module + # paths. We should try to avoid using it. + # Example: + # > import symlink.a + # > symlink.a.__file__ + # symlink/a.py + # > import target.a + # > starget.a.__file__ + # target/a.py + # Python interpreter treats these as two separate modules. So, we also need to + # handle them the same way. + return os.path.normpath(path) + def IsPathSuffix(mod_path, path): """Checks whether path is a full path suffix of mod_path. @@ -69,6 +89,11 @@ def GetLoadedModuleBySuffix(path): if not os.path.isabs(mod_root): mod_root = os.path.join(os.getcwd(), mod_root) + # In the following invocation 'python3 ./main.py' (using the ./), the + # mod_root variable will '/base/path/./main'. In order to correctly compare + # it with the root variable, it needs to be '/base/path/main'. + mod_root = NormalizePath(mod_root) + if IsPathSuffix(mod_root, root): return module diff --git a/src/googleclouddebugger/python_breakpoint.py b/src/googleclouddebugger/python_breakpoint.py index c65b95d..62f2512 100644 --- a/src/googleclouddebugger/python_breakpoint.py +++ b/src/googleclouddebugger/python_breakpoint.py @@ -134,20 +134,7 @@ def _MultipleModulesFoundError(path, candidates): def _NormalizePath(path): """Removes surrounding whitespace, leading separator and normalize.""" - # TODO: Calling os.path.normpath "may change the meaning of a - # path that contains symbolic links" (e.g., "A/foo/../B" != "A/B" if foo is a - # symlink). This might cause trouble when matching against loaded module - # paths. We should try to avoid using it. - # Example: - # > import symlink.a - # > symlink.a.__file__ - # symlink/a.py - # > import target.a - # > starget.a.__file__ - # target/a.py - # Python interpreter treats these as two separate modules. So, we also need to - # handle them the same way. - return os.path.normpath(path.strip().lstrip(os.sep)) + return module_utils.NormalizePath(path.strip().lstrip(os.sep)) class PythonBreakpoint(object): diff --git a/tests/py/module_utils_test.py b/tests/py/module_utils_test.py index 0ed0fd2..ac847ad 100644 --- a/tests/py/module_utils_test.py +++ b/tests/py/module_utils_test.py @@ -156,6 +156,13 @@ def testMainLoadedModuleFromSuffix(self): self.assertTrue(m1, 'Module not found') self.assertEqual('/a/b/p/m.pyc', m1.__file__) + def testMainWithDotSlashLoadedModuleFromSuffix(self): + # Lookup module started via 'python3 ./m.py', notice the './' + _AddSysModule('__main__', '/a/b/p/./m.pyc') + m1 = module_utils.GetLoadedModuleBySuffix('/a/b/p/m.py') + self.assertIsNotNone(m1) + self.assertTrue(m1, 'Module not found') + self.assertEqual('/a/b/p/./m.pyc', m1.__file__) if __name__ == '__main__': absltest.main() From ec24144b191c63bdf88f72d3b930823157035ca1 Mon Sep 17 00:00:00 2001 From: jasonborg <48138260+jasonborg@users.noreply.github.com> Date: Wed, 1 Mar 2023 18:53:39 +0000 Subject: [PATCH 233/241] chore: Release version 3.4 (#81) --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index a61798c..5c21bc2 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '3.3' +__version__ = '3.4' From 8dcaf94a3bf80dc030f133e6e3c698232e97f03d Mon Sep 17 00:00:00 2001 From: jasonborg <48138260+jasonborg@users.noreply.github.com> Date: Thu, 2 Mar 2023 17:54:19 +0000 Subject: [PATCH 234/241] chore: Add extra tess cleanup to build scripts (#82) This addresses an issue where first running `./build_and_test.sh` followed by `build-dist.sh` could see some test failures while running the second script. One concrete scenario was when the local python version was 3.10.9 while version 3.10.10 was used during `build-dist.sh`. --- build_and_test.sh | 6 ++++++ src/build-wheels.sh | 17 ++++++++++++++--- src/build.sh | 11 ++++++++++- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/build_and_test.sh b/build_and_test.sh index 8e742ea..4ce82b9 100755 --- a/build_and_test.sh +++ b/build_and_test.sh @@ -1,5 +1,8 @@ #!/bin/bash -e +# Clean up any previous generated test files. +rm -rf tests/py/__pycache__ + cd src ./build.sh cd .. @@ -10,3 +13,6 @@ pip3 install -r requirements_dev.txt pip3 install src/dist/* --force-reinstall python3 -m pytest tests/py deactivate + +# Clean up any generated test files. +rm -rf tests/py/__pycache__ diff --git a/src/build-wheels.sh b/src/build-wheels.sh index 1e4a0c6..8477d84 100755 --- a/src/build-wheels.sh +++ b/src/build-wheels.sh @@ -10,8 +10,14 @@ ROOT=$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null; /bin/pwd -P) # Parallelize the build over N threads where N is the number of cores * 1.5. PARALLEL_BUILD_OPTION="-j $(($(nproc 2> /dev/null || echo 4)*3/2))" -# Clean up any previous build files. -rm -rf ${ROOT}/build ${ROOT}/dist ${ROOT}/setup.cfg +# Clean up any previous build/test files. +rm -rf \ + ${ROOT}/build \ + ${ROOT}/dist \ + ${ROOT}/setup.cfg \ + ${ROOT}/google_python_cloud_debugger.egg-info \ + /io/dist \ + /io/tests/py/__pycache__ # Create directory for third-party libraries. mkdir -p ${ROOT}/build/third_party @@ -78,6 +84,11 @@ done popd # Clean up temporary directories. -rm -rf ${ROOT}/build ${ROOT}/setup.cfg +rm -rf \ + ${ROOT}/build \ + ${ROOT}/setup.cfg \ + ${ROOT}/google_python_cloud_debugger.egg-info \ + /io/tests/py/__pycache__ + echo "Build artifacts are in the dist directory" diff --git a/src/build.sh b/src/build.sh index ba9a944..f61ef2f 100755 --- a/src/build.sh +++ b/src/build.sh @@ -42,7 +42,11 @@ ROOT=$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null; /bin/pwd -P) PARALLEL_BUILD_OPTION="-j $(($(nproc 2> /dev/null || echo 4)*3/2))" # Clean up any previous build files. -rm -rf ${ROOT}/build ${ROOT}/dist ${ROOT}/setup.cfg +rm -rf \ + ${ROOT}/build \ + ${ROOT}/dist \ + ${ROOT}/setup.cfg \ + ${ROOT}/google_python_cloud_debugger.egg-info # Create directory for third-party libraries. mkdir -p ${ROOT}/build/third_party @@ -91,3 +95,8 @@ pushd ${ROOT} "${PYTHON:-python3}" -m pip wheel . --no-deps -w dist popd +# Clean up temporary directories. +rm -rf \ + ${ROOT}/build \ + ${ROOT}/setup.cfg \ + ${ROOT}/google_python_cloud_debugger.egg-info From ed9d2b935a53d2dee7f0bc43540a1fdc9dd314f7 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 4 Apr 2023 09:49:27 -0400 Subject: [PATCH 235/241] fix: address some cases where jumps were not being updated (#83) Affects Python 3.10 only; in some situations jump instructions were not updated with new targets due to the targets being interpretted as memory offsets instead of instruction offsets. --- .../bytecode_manipulator.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 3c95edd..44cef74 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -132,6 +132,9 @@ static PythonOpcodeType GetOpcodeType(uint8_t opcode) { #if PY_VERSION_HEX < 0x03080000 // Removed in Python 3.8. case CONTINUE_LOOP: +#endif +#if PY_VERSION_HEX >= 0x03090000 + case JUMP_IF_NOT_EXC_MATCH: #endif return BRANCH_ABSOLUTE_OPCODE; @@ -144,10 +147,18 @@ static PythonOpcodeType GetOpcodeType(uint8_t opcode) { static int GetBranchTarget(int offset, PythonInstruction instruction) { switch (GetOpcodeType(instruction.opcode)) { case BRANCH_DELTA_OPCODE: +#if PY_VERSION_HEX < 0x030A0000 return offset + instruction.size + instruction.argument; +#else + return offset + instruction.size + instruction.argument * 2; +#endif case BRANCH_ABSOLUTE_OPCODE: +#if PY_VERSION_HEX < 0x030A0000 return instruction.argument; +#else + return instruction.argument * 2; +#endif default: DCHECK(false) << "Not a branch instruction"; @@ -428,13 +439,21 @@ static bool InsertAndUpdateBranchInstructions( // argument of 0 even when it is not required. This needs to be taken // into account when calculating the target of a branch instruction. int inst_size = std::max(instruction.size, it->original_size); +#if PY_VERSION_HEX < 0x030A0000 int32_t target = it->current_offset + inst_size + arg; +#else + int32_t target = it->current_offset + inst_size + arg * 2; +#endif need_to_update = it->current_offset < insertion.current_offset && insertion.current_offset < target; } else if (opcode_type == BRANCH_ABSOLUTE_OPCODE) { // For absolute branches, the argument needs to be updated if the // insertion before the target. +#if PY_VERSION_HEX < 0x030A0000 need_to_update = insertion.current_offset < arg; +#else + need_to_update = insertion.current_offset < arg * 2; +#endif } // If we are inserting the original method call instructions, we want to From 317073840c3930e64bdc2498cd56572deac3f3ee Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 5 Apr 2023 09:48:14 -0400 Subject: [PATCH 236/241] chore: release version 3.5 (#84) --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 5c21bc2..0f8f662 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '3.4' +__version__ = '3.5' From 75c678b0ad5d667f69f0e14f6e85d967ae0d031e Mon Sep 17 00:00:00 2001 From: James McTavish Date: Wed, 31 May 2023 16:11:41 -0400 Subject: [PATCH 237/241] feat!: default to Firebase backend. (#86) The Cloud Debugger API is shut down, so all agents must use the Firebase backend. --- requirements.txt | 4 - src/googleclouddebugger/__init__.py | 19 +- src/googleclouddebugger/gcp_hub_client.py | 579 ---------- src/setup.py | 4 - tests/py/gcp_hub_client_test.py | 501 --------- tests/py/integration_test.py | 1217 ++++++++++----------- 6 files changed, 611 insertions(+), 1713 deletions(-) delete mode 100644 src/googleclouddebugger/gcp_hub_client.py delete mode 100644 tests/py/gcp_hub_client_test.py diff --git a/requirements.txt b/requirements.txt index 13f973e..784eb7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,2 @@ -google-auth>=1.0.0 -google-auth-httplib2 -google-api-python-client -google-api-core firebase_admin>=5.3.0 pyyaml diff --git a/src/googleclouddebugger/__init__.py b/src/googleclouddebugger/__init__.py index 259cd88..378f6a7 100644 --- a/src/googleclouddebugger/__init__.py +++ b/src/googleclouddebugger/__init__.py @@ -29,7 +29,6 @@ from . import breakpoints_manager from . import collector from . import error_data_visibility_policy -from . import gcp_hub_client from . import firebase_client from . import glob_data_visibility_policy from . import yaml_data_visibility_config_reader @@ -52,20 +51,10 @@ def _StartDebugger(): cdbg_native.LogInfo( f'Initializing Cloud Debugger Python agent version: {__version__}') - use_firebase = _flags.get('use_firebase') - if use_firebase: - _backend_client = firebase_client.FirebaseClient() - _backend_client.SetupAuth( - _flags.get('project_id'), _flags.get('service_account_json_file'), - _flags.get('firebase_db_url')) - else: - _backend_client = gcp_hub_client.GcpHubClient() - _backend_client.SetupAuth( - _flags.get('project_id'), _flags.get('project_number'), - _flags.get('service_account_json_file')) - _backend_client.SetupCanaryMode( - _flags.get('breakpoint_enable_canary'), - _flags.get('breakpoint_allow_canary_override')) + _backend_client = firebase_client.FirebaseClient() + _backend_client.SetupAuth( + _flags.get('project_id'), _flags.get('service_account_json_file'), + _flags.get('firebase_db_url')) visibility_policy = _GetVisibilityPolicy() diff --git a/src/googleclouddebugger/gcp_hub_client.py b/src/googleclouddebugger/gcp_hub_client.py deleted file mode 100644 index 3b03708..0000000 --- a/src/googleclouddebugger/gcp_hub_client.py +++ /dev/null @@ -1,579 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Communicates with Cloud Debugger backend over HTTP.""" - -from collections import deque -import copy -import hashlib -import inspect -import json -import logging -import os -import platform -import socket -import sys -import threading -import time -import traceback - -import google_auth_httplib2 -import googleapiclient -import googleapiclient.discovery -import httplib2 - -import google.auth -from google.oauth2 import service_account - -from . import backoff -from . import cdbg_native as native -from . import labels -from . import uniquifier_computer -from . import application_info -from . import version -# This module catches all exception. This is safe because it runs in -# a daemon thread (so we are not blocking Ctrl+C). We need to catch all -# the exception because HTTP client is unpredictable as far as every -# exception it can throw. -# pylint: disable=broad-except - -# API scope we are requesting when service account authentication is enabled. -_CLOUD_PLATFORM_SCOPE = ['https://www.googleapis.com/auth/cloud-platform'] - -# Set of all known debuggee labels (passed down as flags). The value of -# a map is optional environment variable that can be used to set the flag -# (flags still take precedence). -_DEBUGGEE_LABELS = { - labels.Debuggee.MODULE: [ - 'GAE_SERVICE', 'GAE_MODULE_NAME', 'K_SERVICE', 'FUNCTION_NAME' - ], - labels.Debuggee.VERSION: [ - 'GAE_VERSION', 'GAE_MODULE_VERSION', 'K_REVISION', - 'X_GOOGLE_FUNCTION_VERSION' - ], - labels.Debuggee.MINOR_VERSION: ['GAE_DEPLOYMENT_ID', 'GAE_MINOR_VERSION'] -} - -# Debuggee labels used to format debuggee description (ordered). The minor -# version is excluded for the sake of consistency with AppEngine UX. -_DESCRIPTION_LABELS = [ - labels.Debuggee.PROJECT_ID, labels.Debuggee.MODULE, labels.Debuggee.VERSION -] - -# HTTP timeout when accessing the cloud debugger API. It is selected to be -# longer than the typical controller.breakpoints.list hanging get latency -# of 40 seconds. -_HTTP_TIMEOUT_SECONDS = 100 - -# The map from the values of flags (breakpoint_enable_canary, -# breakpoint_allow_canary_override) to canary mode. -_CANARY_MODE_MAP = { - (True, True): 'CANARY_MODE_DEFAULT_ENABLED', - (True, False): 'CANARY_MODE_ALWAYS_ENABLED', - (False, True): 'CANARY_MODE_DEFAULT_DISABLED', - (False, False): 'CANARY_MODE_ALWAYS_DISABLED', -} - - -class NoProjectIdError(Exception): - """Used to indicate the project id cannot be determined.""" - - -class GcpHubClient(object): - """Controller API client. - - Registers the debuggee, queries the active breakpoints and sends breakpoint - updates to the backend. - - This class supports two types of authentication: application default - credentials or a manually provided JSON credentials file for a service - account. - - GcpHubClient creates a worker thread that communicates with the backend. The - thread can be stopped with a Stop function, but it is optional since the - worker thread is marked as daemon. - """ - - def __init__(self): - self.on_active_breakpoints_changed = lambda x: None - self.on_idle = lambda: None - self._debuggee_labels = {} - self._service_account_auth = False - self._debuggee_id = None - self._agent_id = None - self._canary_mode = None - self._wait_token = 'init' - self._breakpoints = [] - self._main_thread = None - self._transmission_thread = None - self._transmission_thread_startup_lock = threading.Lock() - self._transmission_queue = deque(maxlen=100) - self._new_updates = threading.Event() - - # Disable logging in the discovery API to avoid excessive logging. - class _ChildLogFilter(logging.Filter): - """Filter to eliminate info-level logging when called from this module.""" - - def __init__(self, filter_levels=None): - super(_ChildLogFilter, self).__init__() - self._filter_levels = filter_levels or set(logging.INFO) - # Get name without extension to avoid .py vs .pyc issues - self._my_filename = os.path.splitext( - inspect.getmodule(_ChildLogFilter).__file__)[0] - - def filter(self, record): - if record.levelno not in self._filter_levels: - return True - callerframes = inspect.getouterframes(inspect.currentframe()) - for f in callerframes: - if os.path.splitext(f[1])[0] == self._my_filename: - return False - return True - - self._log_filter = _ChildLogFilter({logging.INFO}) - googleapiclient.discovery.logger.addFilter(self._log_filter) - - # - # Configuration options (constants only modified by unit test) - # - - # Delay before retrying failed request. - self.register_backoff = backoff.Backoff() # Register debuggee. - self.list_backoff = backoff.Backoff() # Query active breakpoints. - self.update_backoff = backoff.Backoff() # Update breakpoint. - - # Maximum number of times that the message is re-transmitted before it - # is assumed to be poisonous and discarded - self.max_transmit_attempts = 10 - - def InitializeDebuggeeLabels(self, flags): - """Initialize debuggee labels from environment variables and flags. - - The caller passes all the flags that the debuglet got. This function - will only use the flags used to label the debuggee. Flags take precedence - over environment variables. - - Debuggee description is formatted from available flags. - - Args: - flags: dictionary of debuglet command line flags. - """ - self._debuggee_labels = {} - - for (label, var_names) in _DEBUGGEE_LABELS.items(): - # var_names is a list of possible environment variables that may contain - # the label value. Find the first one that is set. - for name in var_names: - value = os.environ.get(name) - if value: - # Special case for module. We omit the "default" module - # to stay consistent with AppEngine. - if label == labels.Debuggee.MODULE and value == 'default': - break - self._debuggee_labels[label] = value - break - - # Special case when FUNCTION_NAME is set and X_GOOGLE_FUNCTION_VERSION - # isn't set. We set the version to 'unversioned' to be consistent with other - # agents. - # TODO: Stop assigning 'unversioned' to a GCF and find the - # actual version. - if ('FUNCTION_NAME' in os.environ and - labels.Debuggee.VERSION not in self._debuggee_labels): - self._debuggee_labels[labels.Debuggee.VERSION] = 'unversioned' - - if flags: - self._debuggee_labels.update({ - name: value - for (name, value) in flags.items() - if name in _DEBUGGEE_LABELS - }) - - self._debuggee_labels[labels.Debuggee.PROJECT_ID] = self._project_id - - platform_enum = application_info.GetPlatform() - self._debuggee_labels[labels.Debuggee.PLATFORM] = platform_enum.value - - if platform_enum == application_info.PlatformType.CLOUD_FUNCTION: - region = application_info.GetRegion() - if region: - self._debuggee_labels[labels.Debuggee.REGION] = region - - def SetupAuth(self, - project_id=None, - project_number=None, - service_account_json_file=None): - """Sets up authentication with Google APIs. - - This will use the credentials from service_account_json_file if provided, - falling back to application default credentials. - See https://cloud.google.com/docs/authentication/production. - - Args: - project_id: GCP project ID (e.g. myproject). If not provided, will attempt - to retrieve it from the credentials. - project_number: GCP project number (e.g. 72386324623). If not provided, - project_id will be used in its place. - service_account_json_file: JSON file to use for credentials. If not - provided, will default to application default credentials. - Raises: - NoProjectIdError: If the project id cannot be determined. - """ - if service_account_json_file: - self._credentials = ( - service_account.Credentials.from_service_account_file( - service_account_json_file, scopes=_CLOUD_PLATFORM_SCOPE)) - if not project_id: - with open(service_account_json_file) as f: - project_id = json.load(f).get('project_id') - else: - self._credentials, credentials_project_id = google.auth.default( - scopes=_CLOUD_PLATFORM_SCOPE) - project_id = project_id or credentials_project_id - - if not project_id: - raise NoProjectIdError( - 'Unable to determine the project id from the API credentials. ' - 'Please specify the project id using the --project_id flag.') - - self._project_id = project_id - self._project_number = project_number or project_id - - def SetupCanaryMode(self, breakpoint_enable_canary, - breakpoint_allow_canary_override): - """Sets up canaryMode for the debuggee according to input parameters. - - Args: - breakpoint_enable_canary: str or bool, whether to enable breakpoint - canary. Any string except 'True' is interpreted as False. - breakpoint_allow_canary_override: str or bool, whether to allow the - individually set breakpoint to override the canary behavior. Any - string except 'True' is interpreted as False. - """ - enable_canary = breakpoint_enable_canary in ('True', True) - allow_canary_override = breakpoint_allow_canary_override in ('True', True) - self._canary_mode = _CANARY_MODE_MAP[enable_canary, allow_canary_override] - - def Start(self): - """Starts the worker thread.""" - self._shutdown = False - - self._main_thread = threading.Thread(target=self._MainThreadProc) - self._main_thread.name = 'Cloud Debugger main worker thread' - self._main_thread.daemon = True - self._main_thread.start() - - def Stop(self): - """Signals the worker threads to shut down and waits until it exits.""" - self._shutdown = True - self._new_updates.set() # Wake up the transmission thread. - - if self._main_thread is not None: - self._main_thread.join() - self._main_thread = None - - if self._transmission_thread is not None: - self._transmission_thread.join() - self._transmission_thread = None - - def EnqueueBreakpointUpdate(self, breakpoint): - """Asynchronously updates the specified breakpoint on the backend. - - This function returns immediately. The worker thread is actually doing - all the work. The worker thread is responsible to retry the transmission - in case of transient errors. - - Args: - breakpoint: breakpoint in either final or non-final state. - """ - with self._transmission_thread_startup_lock: - if self._transmission_thread is None: - self._transmission_thread = threading.Thread( - target=self._TransmissionThreadProc) - self._transmission_thread.name = 'Cloud Debugger transmission thread' - self._transmission_thread.daemon = True - self._transmission_thread.start() - - self._transmission_queue.append((breakpoint, 0)) - self._new_updates.set() # Wake up the worker thread to send immediately. - - def _BuildService(self): - http = httplib2.Http(timeout=_HTTP_TIMEOUT_SECONDS) - http = google_auth_httplib2.AuthorizedHttp(self._credentials, http) - - api = googleapiclient.discovery.build( - 'clouddebugger', 'v2', http=http, cache_discovery=False) - return api.controller() - - def _MainThreadProc(self): - """Entry point for the worker thread.""" - registration_required = True - while not self._shutdown: - if registration_required: - service = self._BuildService() - registration_required, delay = self._RegisterDebuggee(service) - - if not registration_required: - registration_required, delay = self._ListActiveBreakpoints(service) - - if self.on_idle is not None: - self.on_idle() - - if not self._shutdown: - time.sleep(delay) - - def _TransmissionThreadProc(self): - """Entry point for the transmission worker thread.""" - reconnect = True - - while not self._shutdown: - self._new_updates.clear() - - if reconnect: - service = self._BuildService() - reconnect = False - - reconnect, delay = self._TransmitBreakpointUpdates(service) - - self._new_updates.wait(delay) - - def _RegisterDebuggee(self, service): - """Single attempt to register the debuggee. - - If the registration succeeds, sets self._debuggee_id to the registered - debuggee ID. - - Args: - service: client to use for API calls - - Returns: - (registration_required, delay) tuple - """ - try: - request = {'debuggee': self._GetDebuggee()} - - try: - response = service.debuggees().register(body=request).execute() - - # self._project_number will refer to the project id on initialization if - # the project number is not available. The project field in the debuggee - # will always refer to the project number. Update so the server will not - # have to do id->number translations in the future. - project_number = response['debuggee'].get('project') - self._project_number = project_number or self._project_number - - self._debuggee_id = response['debuggee']['id'] - self._agent_id = response['agentId'] - native.LogInfo( - 'Debuggee registered successfully, ID: %s, agent ID: %s, ' - 'canary mode: %s' % (self._debuggee_id, self._agent_id, - response['debuggee'].get('canaryMode'))) - self.register_backoff.Succeeded() - return (False, 0) # Proceed immediately to list active breakpoints. - except BaseException: - native.LogInfo('Failed to register debuggee: %s, %s' % - (request, traceback.format_exc())) - except BaseException: - native.LogWarning('Debuggee information not available: ' + - traceback.format_exc()) - - return (True, self.register_backoff.Failed()) - - def _ListActiveBreakpoints(self, service): - """Single attempt query the list of active breakpoints. - - Must not be called before the debuggee has been registered. If the request - fails, this function resets self._debuggee_id, which triggers repeated - debuggee registration. - - Args: - service: client to use for API calls - - Returns: - (registration_required, delay) tuple - """ - try: - response = service.debuggees().breakpoints().list( - debuggeeId=self._debuggee_id, - agentId=self._agent_id, - waitToken=self._wait_token, - successOnTimeout=True).execute() - if not response.get('waitExpired'): - self._wait_token = response.get('nextWaitToken') - breakpoints = response.get('breakpoints') or [] - if self._breakpoints != breakpoints: - self._breakpoints = breakpoints - native.LogInfo('Breakpoints list changed, %d active, wait token: %s' % - (len(self._breakpoints), self._wait_token)) - self.on_active_breakpoints_changed(copy.deepcopy(self._breakpoints)) - except BaseException: - native.LogInfo('Failed to query active breakpoints: ' + - traceback.format_exc()) - - # Forget debuggee ID to trigger repeated debuggee registration. Once the - # registration succeeds, the worker thread will retry this query - self._debuggee_id = None - - return (True, self.list_backoff.Failed()) - - self.list_backoff.Succeeded() - return (False, 0) - - def _TransmitBreakpointUpdates(self, service): - """Tries to send pending breakpoint updates to the backend. - - Sends all the pending breakpoint updates. In case of transient failures, - the breakpoint is inserted back to the top of the queue. Application - failures are not retried (for example updating breakpoint in a final - state). - - Each pending breakpoint maintains a retry counter. After repeated transient - failures the breakpoint is discarded and dropped from the queue. - - Args: - service: client to use for API calls - - Returns: - (reconnect, timeout) tuple. The first element ("reconnect") is set to - true on unexpected HTTP responses. The caller should discard the HTTP - connection and create a new one. The second element ("timeout") is - set to None if all pending breakpoints were sent successfully. Otherwise - returns time interval in seconds to stall before retrying. - """ - reconnect = False - retry_list = [] - - # There is only one consumer, so two step pop is safe. - while self._transmission_queue: - breakpoint, retry_count = self._transmission_queue.popleft() - - try: - service.debuggees().breakpoints().update( - debuggeeId=self._debuggee_id, - id=breakpoint['id'], - body={ - 'breakpoint': breakpoint - }).execute() - - native.LogInfo('Breakpoint %s update transmitted successfully' % - (breakpoint['id'])) - except googleapiclient.errors.HttpError as err: - # Treat 400 error codes (except timeout) as application error that will - # not be retried. All other errors are assumed to be transient. - status = err.resp.status - is_transient = ((status >= 500) or (status == 408)) - if is_transient: - if retry_count < self.max_transmit_attempts - 1: - native.LogInfo('Failed to send breakpoint %s update: %s' % - (breakpoint['id'], traceback.format_exc())) - retry_list.append((breakpoint, retry_count + 1)) - else: - native.LogWarning('Breakpoint %s retry count exceeded maximum' % - breakpoint['id']) - else: - # This is very common if multiple instances are sending final update - # simultaneously. - native.LogInfo('%s, breakpoint: %s' % (err, breakpoint['id'])) - except socket.error as err: - if retry_count < self.max_transmit_attempts - 1: - native.LogInfo( - 'Socket error %d while sending breakpoint %s update: %s' % - (err.errno, breakpoint['id'], traceback.format_exc())) - retry_list.append((breakpoint, retry_count + 1)) - else: - native.LogWarning('Breakpoint %s retry count exceeded maximum' % - breakpoint['id']) - # Socket errors shouldn't persist like this; reconnect. - reconnect = True - except BaseException: - native.LogWarning('Fatal error sending breakpoint %s update: %s' % - (breakpoint['id'], traceback.format_exc())) - reconnect = True - - self._transmission_queue.extend(retry_list) - - if not self._transmission_queue: - self.update_backoff.Succeeded() - # Nothing to send, wait until next breakpoint update. - return (reconnect, None) - else: - return (reconnect, self.update_backoff.Failed()) - - def _GetDebuggee(self): - """Builds the debuggee structure.""" - major_version = 'v' + version.__version__.split('.')[0] - python_version = ''.join(platform.python_version().split('.')[:2]) - agent_version = ('google.com/python%s-gcp/%s' % - (python_version, major_version)) - - debuggee = { - 'project': self._project_number, - 'description': self._GetDebuggeeDescription(), - 'labels': self._debuggee_labels, - 'agentVersion': agent_version, - 'canaryMode': self._canary_mode, - } - - source_context = self._ReadAppJsonFile('source-context.json') - if source_context: - debuggee['sourceContexts'] = [source_context] - - debuggee['uniquifier'] = self._ComputeUniquifier(debuggee) - - return debuggee - - def _GetDebuggeeDescription(self): - """Formats debuggee description based on debuggee labels.""" - return '-'.join(self._debuggee_labels[label] - for label in _DESCRIPTION_LABELS - if label in self._debuggee_labels) - - def _ComputeUniquifier(self, debuggee): - """Computes debuggee uniquifier. - - The debuggee uniquifier has to be identical on all instances. Therefore the - uniquifier should not include any random numbers and should only be based - on inputs that are guaranteed to be the same on all instances. - - Args: - debuggee: complete debuggee message without the uniquifier - - Returns: - Hex string of SHA1 hash of project information, debuggee labels and - debuglet version. - """ - uniquifier = hashlib.sha1() - - # Compute hash of application files if we don't have source context. This - # way we can still distinguish between different deployments. - if ('minorversion' not in debuggee.get('labels', []) and - 'sourceContexts' not in debuggee): - uniquifier_computer.ComputeApplicationUniquifier(uniquifier) - - return uniquifier.hexdigest() - - def _ReadAppJsonFile(self, relative_path): - """Reads JSON file from an application directory. - - Args: - relative_path: file name relative to application root directory. - - Returns: - Parsed JSON data or None if the file does not exist, can't be read or - not a valid JSON file. - """ - try: - with open(os.path.join(sys.path[0], relative_path), 'r') as f: - return json.load(f) - except (IOError, ValueError): - return None diff --git a/src/setup.py b/src/setup.py index 6b380d5..25f6095 100644 --- a/src/setup.py +++ b/src/setup.py @@ -101,10 +101,6 @@ def ReadConfig(section, value, default): author='Google Inc.', version=version, install_requires=[ - 'google-api-python-client', - 'google-auth>=1.0.0', - 'google-auth-httplib2', - 'google-api-core', 'firebase-admin>=5.3.0', 'pyyaml', ], diff --git a/tests/py/gcp_hub_client_test.py b/tests/py/gcp_hub_client_test.py deleted file mode 100644 index ada26bf..0000000 --- a/tests/py/gcp_hub_client_test.py +++ /dev/null @@ -1,501 +0,0 @@ -"""Unit test for gcp_hub_client_test module.""" - -import datetime -import errno -import os -import socket -import sys -import tempfile -import time -from unittest import mock - -from googleapiclient import discovery -from googleapiclient.errors import HttpError -from googleclouddebugger import version - -import google.auth -from google.oauth2 import service_account -from absl.testing import absltest -from absl.testing import parameterized - -from googleclouddebugger import gcp_hub_client - -TEST_DEBUGGEE_ID = 'gcp:debuggee-id' -TEST_AGENT_ID = 'abc-123-d4' -TEST_PROJECT_ID = 'test-project-id' -TEST_PROJECT_NUMBER = '123456789' -TEST_SERVICE_ACCOUNT_EMAIL = 'a@developer.gserviceaccount.com' - - -class HttpResponse(object): - - def __init__(self, status): - self.status = status - self.reason = None - - -def HttpErrorTimeout(): - return HttpError(HttpResponse(408), b'Fake timeout') - - -def HttpConnectionReset(): - return socket.error(errno.ECONNRESET, 'Fake connection reset') - - -class GcpHubClientTest(parameterized.TestCase): - """Simulates service account authentication.""" - - def setUp(self): - version.__version__ = 'test' - - self._client = gcp_hub_client.GcpHubClient() - - for backoff in [ - self._client.register_backoff, self._client.list_backoff, - self._client.update_backoff - ]: - backoff.min_interval_sec /= 100000.0 - backoff.max_interval_sec /= 100000.0 - backoff._current_interval_sec /= 100000.0 - - self._client.on_idle = self._OnIdle - self._client.on_active_breakpoints_changed = mock.Mock() - - patcher = mock.patch.object(google.auth, 'default') - self._default_auth_mock = patcher.start() - self._default_auth_mock.return_value = (None, TEST_PROJECT_ID) - self.addCleanup(patcher.stop) - - self._service = mock.Mock() - self._iterations = 0 - - patcher = mock.patch.object(discovery, 'build') - self._mock_build = patcher.start() - self._mock_build.return_value = self._service - self.addCleanup(patcher.stop) - - controller = self._service.controller.return_value - debuggees = controller.debuggees.return_value - breakpoints = debuggees.breakpoints.return_value - self._register_call = debuggees.register - self._register_execute = self._register_call.return_value.execute - self._list_call = breakpoints.list - self._list_execute = self._list_call.return_value.execute - self._update_execute = breakpoints.update.return_value.execute - - # Default responses for API requests. - self._register_execute.return_value = { - 'debuggee': { - 'id': TEST_DEBUGGEE_ID, - 'project': TEST_PROJECT_NUMBER, - }, - 'agentId': TEST_AGENT_ID, - } - self._list_execute.return_value = {} - - self._start_time = datetime.datetime.utcnow() - - def tearDown(self): - self._client.Stop() - - def testDefaultAuth(self): - self._client.SetupAuth() - - self._default_auth_mock.assert_called_with( - scopes=['https://www.googleapis.com/auth/cloud-platform']) - self.assertEqual(TEST_PROJECT_ID, self._client._project_id) - self.assertEqual(TEST_PROJECT_ID, self._client._project_number) - - def testOverrideProjectIdNumber(self): - project_id = 'project2' - project_number = '456' - self._client.SetupAuth(project_id=project_id, project_number=project_number) - - self._default_auth_mock.assert_called_with( - scopes=['https://www.googleapis.com/auth/cloud-platform']) - self.assertEqual(project_id, self._client._project_id) - self.assertEqual(project_number, self._client._project_number) - - def testServiceAccountJsonAuth(self): - with mock.patch.object( - service_account.Credentials, - 'from_service_account_file') as from_service_account_file: - json_file = tempfile.NamedTemporaryFile() - with open(json_file.name, 'w') as f: - f.write('{"project_id": "%s"}' % TEST_PROJECT_ID) - self._client.SetupAuth(service_account_json_file=json_file.name) - - self._default_auth_mock.assert_not_called() - from_service_account_file.assert_called_with( - json_file.name, - scopes=['https://www.googleapis.com/auth/cloud-platform']) - self.assertEqual(TEST_PROJECT_ID, self._client._project_id) - self.assertEqual(TEST_PROJECT_ID, self._client._project_number) - - def testNoProjectId(self): - self._default_auth_mock.return_value = (None, None) - - with self.assertRaises(gcp_hub_client.NoProjectIdError): - self._Start() - - def testContinuousSuccess(self): - self._Start() - self._SkipIterations(10) - self.assertTrue(self._mock_build.called) - self.assertEqual(TEST_PROJECT_NUMBER, self._client._project_number) - - def testBreakpointsChanged(self): - self._Start() - self._SkipIterations(5) - self.assertEqual(0, self._client.on_active_breakpoints_changed.call_count) - - self._list_execute.return_value = ({'breakpoints': [{'id': 'bp1'}]}) - self._SkipIterations() - self.assertEqual(1, self._client.on_active_breakpoints_changed.call_count) - - self._list_execute.return_value = ({'breakpoints': [{'id': 'bp2'}]}) - self._SkipIterations() - self.assertEqual(2, self._client.on_active_breakpoints_changed.call_count) - - self._list_execute.return_value = ({'breakpoints': [{}]}) - self._SkipIterations() - self.assertEqual(3, self._client.on_active_breakpoints_changed.call_count) - - @parameterized.named_parameters( - ('DefaultEnabled', True, True, 'CANARY_MODE_DEFAULT_ENABLED'), - ('AlwaysEnabled', True, False, 'CANARY_MODE_ALWAYS_ENABLED'), - ('DefaultDisabled', False, True, 'CANARY_MODE_DEFAULT_DISABLED'), - ('AlwaysDisabled', False, False, 'CANARY_MODE_ALWAYS_DISABLED'), - ('AlwaysEnabledWithStringFlags', 'True', - 'a-value-should-be-treated-as-false', 'CANARY_MODE_ALWAYS_ENABLED')) - def testRegisterDebuggeeCanaryMode(self, breakpoint_enable_canary, - breakpoint_allow_canary_override, - expected_canary_mode): - self._client.SetupCanaryMode(breakpoint_enable_canary, - breakpoint_allow_canary_override) - self._Start() - self._SkipIterations(5) - self.assertEqual( - expected_canary_mode, - self._register_call.call_args[1]['body']['debuggee']['canaryMode']) - - def testRegisterDebuggeeFailure(self): - self._register_execute.side_effect = HttpErrorTimeout() - self._Start() - self._SkipIterations(5) - self.assertGreaterEqual(self._register_execute.call_count, 5) - - def testListActiveBreakpointsFailure(self): - self._Start() - self._SkipIterations(5) - self.assertEqual(1, self._register_execute.call_count) - - # If the these 2 lines are executed between _ListActiveBreakpoints() and - # on_idle() in _MainThreadProc, then there will be 1 iteration incremented - # where _ListActiveBreakpoints is still a success and registration is not - # required, leading to only 4 _register_execute calls instead of 5. - self._list_execute.side_effect = HttpErrorTimeout() - self._SkipIterations(5) - - self.assertGreaterEqual(self._register_execute.call_count, 4) - - def testListActiveBreakpointsNoUpdate(self): - self._Start() - self._SkipIterations(5) - self.assertEqual(1, self._register_execute.call_count) - self.assertEqual(0, self._client.on_active_breakpoints_changed.call_count) - - self._list_execute.return_value = ({'breakpoints': [{'id': 'bp1'}]}) - self._SkipIterations() - self.assertEqual(1, self._client.on_active_breakpoints_changed.call_count) - - self._list_execute.return_value = ({'waitExpired': 'True'}) - self._SkipIterations(20) - self.assertEqual(1, self._register_execute.call_count) - self.assertEqual(1, self._client.on_active_breakpoints_changed.call_count) - - def testListActiveBreakpointsSendAgentId(self): - self._Start() - self._SkipIterations(5) - self.assertEqual(1, self._register_execute.call_count) - self.assertGreater(self._list_execute.call_count, 0) - self.assertEqual(TEST_AGENT_ID, self._list_call.call_args[1]['agentId']) - - def testTransmitBreakpointUpdateSuccess(self): - self._Start() - self._client.EnqueueBreakpointUpdate({'id': 'A'}) - while not self._update_execute.call_count: - self._SkipIterations() - self.assertEmpty(self._client._transmission_queue) - - def testPoisonousMessage(self): - self._update_execute.side_effect = HttpErrorTimeout() - self._Start() - self._SkipIterations(5) - self._client.EnqueueBreakpointUpdate({'id': 'A'}) - while self._update_execute.call_count < 10: - self._SkipIterations() - self._SkipIterations(10) - self.assertEmpty(self._client._transmission_queue) - - def testTransmitBreakpointUpdateSocketError(self): - # It would be nice to ensure that the retries will succeed if the error - # stops, but that would make this test setup flaky. - self._update_execute.side_effect = HttpConnectionReset() - self._Start() - self._client.EnqueueBreakpointUpdate({'id': 'A'}) - while self._update_execute.call_count < 10: - self._SkipIterations() - self._SkipIterations(10) - self.assertEmpty(self._client._transmission_queue) - - def _TestInitializeLabels(self, module_var, version_var, minor_var): - self._Start() - - self._client.InitializeDebuggeeLabels({ - 'module': 'my_module', - 'version': '1', - 'minorversion': '23', - 'something_else': 'irrelevant' - }) - self.assertEqual( - { - 'projectid': 'test-project-id', - 'module': 'my_module', - 'version': '1', - 'minorversion': '23', - 'platform': 'default' - }, self._client._debuggee_labels) - self.assertEqual('test-project-id-my_module-1', - self._client._GetDebuggeeDescription()) - - uniquifier1 = self._client._ComputeUniquifier( - {'labels': self._client._debuggee_labels}) - self.assertTrue(uniquifier1) # Not empty string. - - try: - os.environ[module_var] = 'env_module' - os.environ[version_var] = '213' - os.environ[minor_var] = '3476734' - self._client.InitializeDebuggeeLabels(None) - self.assertEqual( - { - 'projectid': 'test-project-id', - 'module': 'env_module', - 'version': '213', - 'minorversion': '3476734', - 'platform': 'default' - }, self._client._debuggee_labels) - self.assertEqual('test-project-id-env_module-213', - self._client._GetDebuggeeDescription()) - - os.environ[module_var] = 'default' - os.environ[version_var] = '213' - os.environ[minor_var] = '3476734' - self._client.InitializeDebuggeeLabels({'minorversion': 'something else'}) - self.assertEqual( - { - 'projectid': 'test-project-id', - 'version': '213', - 'minorversion': 'something else', - 'platform': 'default' - }, self._client._debuggee_labels) - self.assertEqual('test-project-id-213', - self._client._GetDebuggeeDescription()) - - finally: - del os.environ[module_var] - del os.environ[version_var] - del os.environ[minor_var] - - def testInitializeLegacyDebuggeeLabels(self): - self._TestInitializeLabels('GAE_MODULE_NAME', 'GAE_MODULE_VERSION', - 'GAE_MINOR_VERSION') - - def testInitializeDebuggeeLabels(self): - self._TestInitializeLabels('GAE_SERVICE', 'GAE_VERSION', - 'GAE_DEPLOYMENT_ID') - - def testInitializeCloudRunDebuggeeLabels(self): - self._Start() - - try: - os.environ['K_SERVICE'] = 'env_module' - os.environ['K_REVISION'] = '213' - self._client.InitializeDebuggeeLabels(None) - self.assertEqual( - { - 'projectid': 'test-project-id', - 'module': 'env_module', - 'version': '213', - 'platform': 'default' - }, self._client._debuggee_labels) - self.assertEqual('test-project-id-env_module-213', - self._client._GetDebuggeeDescription()) - - finally: - del os.environ['K_SERVICE'] - del os.environ['K_REVISION'] - - def testInitializeCloudFunctionDebuggeeLabels(self): - self._Start() - - try: - os.environ['FUNCTION_NAME'] = 'fcn-name' - os.environ['X_GOOGLE_FUNCTION_VERSION'] = '213' - self._client.InitializeDebuggeeLabels(None) - self.assertEqual( - { - 'projectid': 'test-project-id', - 'module': 'fcn-name', - 'version': '213', - 'platform': 'cloud_function' - }, self._client._debuggee_labels) - self.assertEqual('test-project-id-fcn-name-213', - self._client._GetDebuggeeDescription()) - - finally: - del os.environ['FUNCTION_NAME'] - del os.environ['X_GOOGLE_FUNCTION_VERSION'] - - def testInitializeCloudFunctionUnversionedDebuggeeLabels(self): - self._Start() - - try: - os.environ['FUNCTION_NAME'] = 'fcn-name' - self._client.InitializeDebuggeeLabels(None) - self.assertEqual( - { - 'projectid': 'test-project-id', - 'module': 'fcn-name', - 'version': 'unversioned', - 'platform': 'cloud_function' - }, self._client._debuggee_labels) - self.assertEqual('test-project-id-fcn-name-unversioned', - self._client._GetDebuggeeDescription()) - - finally: - del os.environ['FUNCTION_NAME'] - - def testInitializeCloudFunctionWithRegionDebuggeeLabels(self): - self._Start() - - try: - os.environ['FUNCTION_NAME'] = 'fcn-name' - os.environ['FUNCTION_REGION'] = 'fcn-region' - self._client.InitializeDebuggeeLabels(None) - self.assertEqual( - { - 'projectid': 'test-project-id', - 'module': 'fcn-name', - 'version': 'unversioned', - 'platform': 'cloud_function', - 'region': 'fcn-region' - }, self._client._debuggee_labels) - self.assertEqual('test-project-id-fcn-name-unversioned', - self._client._GetDebuggeeDescription()) - - finally: - del os.environ['FUNCTION_NAME'] - del os.environ['FUNCTION_REGION'] - - def testAppFilesUniquifierNoMinorVersion(self): - """Verify that uniquifier_computer is used if minor version not defined.""" - self._Start() - - root = tempfile.mkdtemp('', 'fake_app_') - sys.path.insert(0, root) - try: - uniquifier1 = self._client._ComputeUniquifier({}) - - with open(os.path.join(root, 'app.py'), 'w') as f: - f.write('hello') - uniquifier2 = self._client._ComputeUniquifier({}) - finally: - del sys.path[0] - - self.assertNotEqual(uniquifier1, uniquifier2) - - def testAppFilesUniquifierWithMinorVersion(self): - """Verify that uniquifier_computer not used if minor version is defined.""" - self._Start() - - root = tempfile.mkdtemp('', 'fake_app_') - - os.environ['GAE_MINOR_VERSION'] = '12345' - sys.path.insert(0, root) - try: - self._client.InitializeDebuggeeLabels(None) - - uniquifier1 = self._client._GetDebuggee()['uniquifier'] - - with open(os.path.join(root, 'app.py'), 'w') as f: - f.write('hello') - uniquifier2 = self._client._GetDebuggee()['uniquifier'] - finally: - del os.environ['GAE_MINOR_VERSION'] - del sys.path[0] - - self.assertEqual(uniquifier1, uniquifier2) - - def testSourceContext(self): - self._Start() - - root = tempfile.mkdtemp('', 'fake_app_') - source_context_path = os.path.join(root, 'source-context.json') - - sys.path.insert(0, root) - try: - debuggee_no_source_context1 = self._client._GetDebuggee() - - with open(source_context_path, 'w') as f: - f.write('not a valid JSON') - debuggee_bad_source_context = self._client._GetDebuggee() - - with open(os.path.join(root, 'fake_app.py'), 'w') as f: - f.write('pretend') - debuggee_no_source_context2 = self._client._GetDebuggee() - - with open(source_context_path, 'w') as f: - f.write('{"what": "source context"}') - debuggee_with_source_context = self._client._GetDebuggee() - - os.remove(source_context_path) - finally: - del sys.path[0] - - self.assertNotIn('sourceContexts', debuggee_no_source_context1) - self.assertNotIn('sourceContexts', debuggee_bad_source_context) - self.assertListEqual([{ - 'what': 'source context' - }], debuggee_with_source_context['sourceContexts']) - - uniquifiers = set() - uniquifiers.add(debuggee_no_source_context1['uniquifier']) - uniquifiers.add(debuggee_with_source_context['uniquifier']) - uniquifiers.add(debuggee_bad_source_context['uniquifier']) - self.assertLen(uniquifiers, 1) - uniquifiers.add(debuggee_no_source_context2['uniquifier']) - self.assertLen(uniquifiers, 2) - - def _Start(self): - self._client.SetupAuth() - self._client.Start() - - def _OnIdle(self): - self._iterations += 1 - - def _SkipIterations(self, n=1): - target = self._iterations + n - while self._iterations < target: - self._CheckTestTimeout() - time.sleep(0.01) - - def _CheckTestTimeout(self): - elapsed_time = datetime.datetime.utcnow() - self._start_time - if elapsed_time > datetime.timedelta(seconds=15): - self.fail('Test case timed out while waiting for state transition') - - -if __name__ == '__main__': - absltest.main() diff --git a/tests/py/integration_test.py b/tests/py/integration_test.py index 39067b0..1a16f30 100644 --- a/tests/py/integration_test.py +++ b/tests/py/integration_test.py @@ -32,627 +32,624 @@ # and the mock object will use a lot of memory to record all the calls. _REQUEST_DELAY_SECS = 0.01 +# TODO: Modify to work with a mocked Firebase database instead. +# class IntegrationTest(absltest.TestCase): +# """Complete tests of the debugger mocking the backend. + +# These tests employ all the components of the debugger. The actual +# communication channel with the backend is mocked. This allows the test +# quickly inject breakpoints and read results. It also makes the test +# standalone and independent of the actual backend. + +# Uses the new module search algorithm (b/70226488). +# """ + +# class FakeHub(object): +# """Starts the debugger with a mocked communication channel.""" + +# def __init__(self): +# # Breakpoint updates posted by the debugger that haven't been processed +# # by the test case code. +# self._incoming_breakpoint_updates = queue.Queue() + +# # Running counter used to generate unique breakpoint IDs. +# self._id_counter = itertools.count() + +# self._service = mock.Mock() + +# patcher = mock.patch.object(discovery, 'build') +# self._mock_build = patcher.start() +# self._mock_build.return_value = self._service + +# patcher = mock.patch.object(google.auth, 'default') +# self._default_auth_mock = patcher.start() +# self._default_auth_mock.return_value = None, _TEST_PROJECT_ID + +# controller = self._service.controller.return_value +# debuggees = controller.debuggees.return_value +# breakpoints = debuggees.breakpoints.return_value + +# # Simulate a time delay for calls to the mock API. +# def ReturnWithDelay(val): + +# def GetVal(): +# time.sleep(_REQUEST_DELAY_SECS) +# return val + +# return GetVal + +# self._register_execute = debuggees.register.return_value.execute +# self._register_execute.side_effect = ReturnWithDelay({ +# 'debuggee': { +# 'id': _TEST_DEBUGGEE_ID +# }, +# 'agentId': _TEST_AGENT_ID +# }) + +# self._active_breakpoints = {'breakpoints': []} +# self._list_execute = breakpoints.list.return_value.execute +# self._list_execute.side_effect = ReturnWithDelay(self._active_breakpoints) + +# breakpoints.update = self._UpdateBreakpoint + +# # Start the debugger. +# cdbg.enable() + +# def SetBreakpoint(self, tag, template=None): +# """Sets a new breakpoint in this source file. + +# The line number is identified by tag. The optional template may specify +# other breakpoint parameters such as condition and watched expressions. + +# Args: +# tag: label for a source line. +# template: optional breakpoint parameters. +# """ +# path, line = python_test_util.ResolveTag(sys.modules[__name__], tag) +# self.SetBreakpointAtPathLine(path, line, template) + +# def SetBreakpointAtFile(self, filename, tag, template=None): +# """Sets a breakpoint in a file with the given filename. + +# The line number is identified by tag. The optional template may specify +# other breakpoint parameters such as condition and watched expressions. + +# Args: +# filename: the name of the file inside which the tag will be searched. +# Must be in the same directory as the current file. +# tag: label for a source line. +# template: optional breakpoint parameters. + +# Raises: +# Exception: when the given tag does not uniquely identify a line. +# """ +# # TODO: Move part of this to python_test_utils.py file. +# # Find the full path of filename, using the directory of the current file. +# module_path = inspect.getsourcefile(sys.modules[__name__]) +# directory, unused_name = os.path.split(module_path) +# path = os.path.join(directory, filename) + +# # Similar to ResolveTag(), but for a module that's not loaded yet. +# tags = python_test_util.GetSourceFileTags(path) +# if tag not in tags: +# raise Exception('tag %s not found' % tag) +# lines = tags[tag] +# if len(lines) != 1: +# raise Exception('tag %s is ambiguous (lines: %s)' % (tag, lines)) + +# self.SetBreakpointAtPathLine(path, lines[0], template) + +# def SetBreakpointAtPathLine(self, path, line, template=None): +# """Sets a new breakpoint at path:line.""" +# breakpoint = { +# 'id': 'BP_%d' % next(self._id_counter), +# 'createTime': python_test_util.DateTimeToTimestamp(datetime.utcnow()), +# 'location': { +# 'path': path, +# 'line': line +# } +# } +# breakpoint.update(template or {}) + +# self.SetActiveBreakpoints(self.GetActiveBreakpoints() + [breakpoint]) + +# def GetActiveBreakpoints(self): +# """Returns current list of active breakpoints.""" +# return self._active_breakpoints['breakpoints'] + +# def SetActiveBreakpoints(self, breakpoints): +# """Sets a new list of active breakpoints. + +# Args: +# breakpoints: list of breakpoints to return to the debuglet. +# """ +# self._active_breakpoints['breakpoints'] = breakpoints +# begin_count = self._list_execute.call_count +# while self._list_execute.call_count < begin_count + 2: +# time.sleep(_REQUEST_DELAY_SECS) + +# def GetNextResult(self): +# """Waits for the next breakpoint update from the debuglet. + +# Returns: +# First breakpoint update sent by the debuglet that hasn't been +# processed yet. + +# Raises: +# queue.Empty: if waiting for breakpoint update times out. +# """ +# try: +# return self._incoming_breakpoint_updates.get(True, 15) +# except queue.Empty: +# raise AssertionError('Timed out waiting for breakpoint update') + +# def TryGetNextResult(self): +# """Returns the first unprocessed breakpoint update from the debuglet. + +# Returns: +# First breakpoint update sent by the debuglet that hasn't been +# processed yet. If no updates are pending, returns None. +# """ +# try: +# return self._incoming_breakpoint_updates.get_nowait() +# except queue.Empty: +# return None + +# def _UpdateBreakpoint(self, **keywords): +# """Fake implementation of service.debuggees().breakpoints().update().""" + +# class FakeBreakpointUpdateCommand(object): + +# def __init__(self, q): +# self._breakpoint = keywords['body']['breakpoint'] +# self._queue = q + +# def execute(self): # pylint: disable=invalid-name +# self._queue.put(self._breakpoint) + +# return FakeBreakpointUpdateCommand(self._incoming_breakpoint_updates) + +# # We only need to attach the debugger exactly once. The IntegrationTest class +# # is created for each test case, so we need to keep this state global. + +# _hub = FakeHub() + +# def _FakeLog(self, message, extra=None): +# del extra # unused +# self._info_log.append(message) + +# def setUp(self): +# self._info_log = [] +# collector.log_info_message = self._FakeLog + +# def tearDown(self): +# IntegrationTest._hub.SetActiveBreakpoints([]) + +# while True: +# breakpoint = IntegrationTest._hub.TryGetNextResult() +# if breakpoint is None: +# break +# self.fail('Unexpected incoming breakpoint update: %s' % breakpoint) + +# def testBackCompat(self): +# # Verify that the old AttachDebugger() is the same as enable() +# self.assertEqual(cdbg.enable, cdbg.AttachDebugger) + +# def testBasic(self): + +# def Trigger(): +# print('Breakpoint trigger') # BPTAG: BASIC + +# IntegrationTest._hub.SetBreakpoint('BASIC') +# Trigger() +# result = IntegrationTest._hub.GetNextResult() +# self.assertEqual('Trigger', result['stackFrames'][0]['function']) +# self.assertEqual('IntegrationTest.testBasic', +# result['stackFrames'][1]['function']) + +# # Verify that any pre existing labels present in the breakpoint are preserved +# # by the agent. +# def testExistingLabelsSurvive(self): + +# def Trigger(): +# print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_SURVIVE + +# IntegrationTest._hub.SetBreakpoint( +# 'EXISTING_LABELS_SURVIVE', +# {'labels': { +# 'label_1': 'value_1', +# 'label_2': 'value_2' +# }}) +# Trigger() +# result = IntegrationTest._hub.GetNextResult() +# self.assertIn('labels', result.keys()) +# self.assertIn('label_1', result['labels']) +# self.assertIn('label_2', result['labels']) +# self.assertEqual('value_1', result['labels']['label_1']) +# self.assertEqual('value_2', result['labels']['label_2']) + +# # Verify that any pre existing labels present in the breakpoint have priority +# # if they 'collide' with labels in the agent. +# def testExistingLabelsPriority(self): + +# def Trigger(): +# print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_PRIORITY -class IntegrationTest(absltest.TestCase): - """Complete tests of the debugger mocking the backend. +# current_labels_collector = collector.breakpoint_labels_collector +# collector.breakpoint_labels_collector = \ +# lambda: {'label_1': 'value_1', 'label_2': 'value_2'} - These tests employ all the components of the debugger. The actual - communication channel with the backend is mocked. This allows the test - quickly inject breakpoints and read results. It also makes the test - standalone and independent of the actual backend. +# IntegrationTest._hub.SetBreakpoint( +# 'EXISTING_LABELS_PRIORITY', +# {'labels': { +# 'label_1': 'value_foobar', +# 'label_3': 'value_3' +# }}) - Uses the new module search algorithm (b/70226488). - """ +# Trigger() - class FakeHub(object): - """Starts the debugger with a mocked communication channel.""" +# collector.breakpoint_labels_collector = current_labels_collector - def __init__(self): - # Breakpoint updates posted by the debugger that haven't been processed - # by the test case code. - self._incoming_breakpoint_updates = queue.Queue() - - # Running counter used to generate unique breakpoint IDs. - self._id_counter = itertools.count() - - self._service = mock.Mock() - - patcher = mock.patch.object(discovery, 'build') - self._mock_build = patcher.start() - self._mock_build.return_value = self._service - - patcher = mock.patch.object(google.auth, 'default') - self._default_auth_mock = patcher.start() - self._default_auth_mock.return_value = None, _TEST_PROJECT_ID - - controller = self._service.controller.return_value - debuggees = controller.debuggees.return_value - breakpoints = debuggees.breakpoints.return_value - - # Simulate a time delay for calls to the mock API. - def ReturnWithDelay(val): - - def GetVal(): - time.sleep(_REQUEST_DELAY_SECS) - return val - - return GetVal - - self._register_execute = debuggees.register.return_value.execute - self._register_execute.side_effect = ReturnWithDelay({ - 'debuggee': { - 'id': _TEST_DEBUGGEE_ID - }, - 'agentId': _TEST_AGENT_ID - }) - - self._active_breakpoints = {'breakpoints': []} - self._list_execute = breakpoints.list.return_value.execute - self._list_execute.side_effect = ReturnWithDelay(self._active_breakpoints) - - breakpoints.update = self._UpdateBreakpoint - - # Start the debugger. - cdbg.enable() - - # Increase the polling rate to speed up the test. - cdbg.gcp_hub_client.min_interval_sec = 0.001 # Poll every 1 ms - - def SetBreakpoint(self, tag, template=None): - """Sets a new breakpoint in this source file. - - The line number is identified by tag. The optional template may specify - other breakpoint parameters such as condition and watched expressions. - - Args: - tag: label for a source line. - template: optional breakpoint parameters. - """ - path, line = python_test_util.ResolveTag(sys.modules[__name__], tag) - self.SetBreakpointAtPathLine(path, line, template) - - def SetBreakpointAtFile(self, filename, tag, template=None): - """Sets a breakpoint in a file with the given filename. - - The line number is identified by tag. The optional template may specify - other breakpoint parameters such as condition and watched expressions. - - Args: - filename: the name of the file inside which the tag will be searched. - Must be in the same directory as the current file. - tag: label for a source line. - template: optional breakpoint parameters. - - Raises: - Exception: when the given tag does not uniquely identify a line. - """ - # TODO: Move part of this to python_test_utils.py file. - # Find the full path of filename, using the directory of the current file. - module_path = inspect.getsourcefile(sys.modules[__name__]) - directory, unused_name = os.path.split(module_path) - path = os.path.join(directory, filename) - - # Similar to ResolveTag(), but for a module that's not loaded yet. - tags = python_test_util.GetSourceFileTags(path) - if tag not in tags: - raise Exception('tag %s not found' % tag) - lines = tags[tag] - if len(lines) != 1: - raise Exception('tag %s is ambiguous (lines: %s)' % (tag, lines)) - - self.SetBreakpointAtPathLine(path, lines[0], template) - - def SetBreakpointAtPathLine(self, path, line, template=None): - """Sets a new breakpoint at path:line.""" - breakpoint = { - 'id': 'BP_%d' % next(self._id_counter), - 'createTime': python_test_util.DateTimeToTimestamp(datetime.utcnow()), - 'location': { - 'path': path, - 'line': line - } - } - breakpoint.update(template or {}) - - self.SetActiveBreakpoints(self.GetActiveBreakpoints() + [breakpoint]) - - def GetActiveBreakpoints(self): - """Returns current list of active breakpoints.""" - return self._active_breakpoints['breakpoints'] - - def SetActiveBreakpoints(self, breakpoints): - """Sets a new list of active breakpoints. - - Args: - breakpoints: list of breakpoints to return to the debuglet. - """ - self._active_breakpoints['breakpoints'] = breakpoints - begin_count = self._list_execute.call_count - while self._list_execute.call_count < begin_count + 2: - time.sleep(_REQUEST_DELAY_SECS) - - def GetNextResult(self): - """Waits for the next breakpoint update from the debuglet. - - Returns: - First breakpoint update sent by the debuglet that hasn't been - processed yet. - - Raises: - queue.Empty: if waiting for breakpoint update times out. - """ - try: - return self._incoming_breakpoint_updates.get(True, 15) - except queue.Empty: - raise AssertionError('Timed out waiting for breakpoint update') - - def TryGetNextResult(self): - """Returns the first unprocessed breakpoint update from the debuglet. - - Returns: - First breakpoint update sent by the debuglet that hasn't been - processed yet. If no updates are pending, returns None. - """ - try: - return self._incoming_breakpoint_updates.get_nowait() - except queue.Empty: - return None - - def _UpdateBreakpoint(self, **keywords): - """Fake implementation of service.debuggees().breakpoints().update().""" - - class FakeBreakpointUpdateCommand(object): - - def __init__(self, q): - self._breakpoint = keywords['body']['breakpoint'] - self._queue = q - - def execute(self): # pylint: disable=invalid-name - self._queue.put(self._breakpoint) - - return FakeBreakpointUpdateCommand(self._incoming_breakpoint_updates) - -# We only need to attach the debugger exactly once. The IntegrationTest class -# is created for each test case, so we need to keep this state global. - - _hub = FakeHub() - - def _FakeLog(self, message, extra=None): - del extra # unused - self._info_log.append(message) - - def setUp(self): - self._info_log = [] - collector.log_info_message = self._FakeLog - - def tearDown(self): - IntegrationTest._hub.SetActiveBreakpoints([]) - - while True: - breakpoint = IntegrationTest._hub.TryGetNextResult() - if breakpoint is None: - break - self.fail('Unexpected incoming breakpoint update: %s' % breakpoint) - - def testBackCompat(self): - # Verify that the old AttachDebugger() is the same as enable() - self.assertEqual(cdbg.enable, cdbg.AttachDebugger) - - def testBasic(self): - - def Trigger(): - print('Breakpoint trigger') # BPTAG: BASIC - - IntegrationTest._hub.SetBreakpoint('BASIC') - Trigger() - result = IntegrationTest._hub.GetNextResult() - self.assertEqual('Trigger', result['stackFrames'][0]['function']) - self.assertEqual('IntegrationTest.testBasic', - result['stackFrames'][1]['function']) - - # Verify that any pre existing labels present in the breakpoint are preserved - # by the agent. - def testExistingLabelsSurvive(self): - - def Trigger(): - print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_SURVIVE - - IntegrationTest._hub.SetBreakpoint( - 'EXISTING_LABELS_SURVIVE', - {'labels': { - 'label_1': 'value_1', - 'label_2': 'value_2' - }}) - Trigger() - result = IntegrationTest._hub.GetNextResult() - self.assertIn('labels', result.keys()) - self.assertIn('label_1', result['labels']) - self.assertIn('label_2', result['labels']) - self.assertEqual('value_1', result['labels']['label_1']) - self.assertEqual('value_2', result['labels']['label_2']) - - # Verify that any pre existing labels present in the breakpoint have priority - # if they 'collide' with labels in the agent. - def testExistingLabelsPriority(self): - - def Trigger(): - print('Breakpoint trigger with labels') # BPTAG: EXISTING_LABELS_PRIORITY +# # In this case, label_1 was in both the agent and the pre existing labels, +# # the pre existing value of value_foobar should be preserved. +# result = IntegrationTest._hub.GetNextResult() +# self.assertIn('labels', result.keys()) +# self.assertIn('label_1', result['labels']) +# self.assertIn('label_2', result['labels']) +# self.assertIn('label_3', result['labels']) +# self.assertEqual('value_foobar', result['labels']['label_1']) +# self.assertEqual('value_2', result['labels']['label_2']) +# self.assertEqual('value_3', result['labels']['label_3']) - current_labels_collector = collector.breakpoint_labels_collector - collector.breakpoint_labels_collector = \ - lambda: {'label_1': 'value_1', 'label_2': 'value_2'} +# def testRequestLogIdLabel(self): - IntegrationTest._hub.SetBreakpoint( - 'EXISTING_LABELS_PRIORITY', - {'labels': { - 'label_1': 'value_foobar', - 'label_3': 'value_3' - }}) +# def Trigger(): +# print('Breakpoint trigger req id label') # BPTAG: REQUEST_LOG_ID_LABEL - Trigger() +# current_request_log_id_collector = \ +# collector.request_log_id_collector +# collector.request_log_id_collector = lambda: 'foo_bar_id' - collector.breakpoint_labels_collector = current_labels_collector +# IntegrationTest._hub.SetBreakpoint('REQUEST_LOG_ID_LABEL') - # In this case, label_1 was in both the agent and the pre existing labels, - # the pre existing value of value_foobar should be preserved. - result = IntegrationTest._hub.GetNextResult() - self.assertIn('labels', result.keys()) - self.assertIn('label_1', result['labels']) - self.assertIn('label_2', result['labels']) - self.assertIn('label_3', result['labels']) - self.assertEqual('value_foobar', result['labels']['label_1']) - self.assertEqual('value_2', result['labels']['label_2']) - self.assertEqual('value_3', result['labels']['label_3']) +# Trigger() - def testRequestLogIdLabel(self): - - def Trigger(): - print('Breakpoint trigger req id label') # BPTAG: REQUEST_LOG_ID_LABEL - - current_request_log_id_collector = \ - collector.request_log_id_collector - collector.request_log_id_collector = lambda: 'foo_bar_id' - - IntegrationTest._hub.SetBreakpoint('REQUEST_LOG_ID_LABEL') - - Trigger() - - collector.request_log_id_collector = \ - current_request_log_id_collector - - result = IntegrationTest._hub.GetNextResult() - self.assertIn('labels', result.keys()) - self.assertIn(labels.Breakpoint.REQUEST_LOG_ID, result['labels']) - self.assertEqual('foo_bar_id', - result['labels'][labels.Breakpoint.REQUEST_LOG_ID]) - - # Tests the issue in b/30876465 - def testSameLine(self): - - def Trigger(): - print('Breakpoint trigger same line') # BPTAG: SAME_LINE - - num_breakpoints = 5 - _, line = python_test_util.ResolveTag(sys.modules[__name__], 'SAME_LINE') - for _ in range(0, num_breakpoints): - IntegrationTest._hub.SetBreakpoint('SAME_LINE') - Trigger() - results = [] - for _ in range(0, num_breakpoints): - results.append(IntegrationTest._hub.GetNextResult()) - lines = [result['stackFrames'][0]['location']['line'] for result in results] - self.assertListEqual(lines, [line] * num_breakpoints) - - def testCallStack(self): - - def Method1(): - Method2() - - def Method2(): - Method3() - - def Method3(): - Method4() - - def Method4(): - Method5() - - def Method5(): - return 0 # BPTAG: CALL_STACK - - IntegrationTest._hub.SetBreakpoint('CALL_STACK') - Method1() - result = IntegrationTest._hub.GetNextResult() - self.assertEqual([ - 'Method5', 'Method4', 'Method3', 'Method2', 'Method1', - 'IntegrationTest.testCallStack' - ], [frame['function'] for frame in result['stackFrames']][:6]) - - def testInnerMethod(self): - - def Inner1(): - - def Inner2(): - - def Inner3(): - print('Inner3') # BPTAG: INNER3 - - Inner3() - - Inner2() - - IntegrationTest._hub.SetBreakpoint('INNER3') - Inner1() - result = IntegrationTest._hub.GetNextResult() - self.assertEqual('Inner3', result['stackFrames'][0]['function']) - - def testClassMethodWithDecorator(self): - - def MyDecorator(handler): - - def Caller(self): - return handler(self) - - return Caller - - class BaseClass(object): - pass - - class MyClass(BaseClass): - - @MyDecorator - def Get(self): - param = {} # BPTAG: METHOD_WITH_DECORATOR - return str(param) - - IntegrationTest._hub.SetBreakpoint('METHOD_WITH_DECORATOR') - self.assertEqual('{}', MyClass().Get()) - result = IntegrationTest._hub.GetNextResult() - self.assertEqual('MyClass.Get', result['stackFrames'][0]['function']) - self.assertEqual('MyClass.Caller', result['stackFrames'][1]['function']) - self.assertEqual( - { - 'name': - 'self', - 'type': - __name__ + '.MyClass', - 'members': [{ - 'status': { - 'refersTo': 'VARIABLE_NAME', - 'description': { - 'format': 'Object has no fields' - } - } - }] - }, - python_test_util.PackFrameVariable( - result, 'self', collection='arguments')) - - def testGlobalDecorator(self): - IntegrationTest._hub.SetBreakpoint('WRAPPED_GLOBAL_METHOD') - self.assertEqual('hello', WrappedGlobalMethod()) - result = IntegrationTest._hub.GetNextResult() - - self.assertNotIn('status', result) - - def testNoLambdaExpression(self): - - def Trigger(): - cube = lambda x: x**3 # BPTAG: LAMBDA - cube(18) - - num_breakpoints = 5 - for _ in range(0, num_breakpoints): - IntegrationTest._hub.SetBreakpoint('LAMBDA') - Trigger() - results = [] - for _ in range(0, num_breakpoints): - results.append(IntegrationTest._hub.GetNextResult()) - functions = [result['stackFrames'][0]['function'] for result in results] - self.assertListEqual(functions, ['Trigger'] * num_breakpoints) - - def testNoGeneratorExpression(self): - - def Trigger(): - gen = (i for i in range(0, 5)) # BPTAG: GENEXPR - next(gen) - next(gen) - next(gen) - next(gen) - next(gen) - - num_breakpoints = 1 - for _ in range(0, num_breakpoints): - IntegrationTest._hub.SetBreakpoint('GENEXPR') - Trigger() - results = [] - for _ in range(0, num_breakpoints): - results.append(IntegrationTest._hub.GetNextResult()) - functions = [result['stackFrames'][0]['function'] for result in results] - self.assertListEqual(functions, ['Trigger'] * num_breakpoints) - - def testTryBlock(self): - - def Method(a): - try: - return a * a # BPTAG: TRY_BLOCK - except Exception as unused_e: # pylint: disable=broad-except - return a - - IntegrationTest._hub.SetBreakpoint('TRY_BLOCK') - Method(11) - result = IntegrationTest._hub.GetNextResult() - self.assertEqual('Method', result['stackFrames'][0]['function']) - self.assertEqual([{ - 'name': 'a', - 'value': '11', - 'type': 'int' - }], result['stackFrames'][0]['arguments']) - - def testFrameArguments(self): - - def Method(a, b): - return a + str(b) # BPTAG: FRAME_ARGUMENTS - - IntegrationTest._hub.SetBreakpoint('FRAME_ARGUMENTS') - Method('hello', 87) - result = IntegrationTest._hub.GetNextResult() - self.assertEqual([{ - 'name': 'a', - 'value': "'hello'", - 'type': 'str' - }, { - 'name': 'b', - 'value': '87', - 'type': 'int' - }], result['stackFrames'][0]['arguments']) - self.assertEqual('self', result['stackFrames'][1]['arguments'][0]['name']) - - def testFrameLocals(self): - - class Number(object): - - def __init__(self): - self.n = 57 - - def Method(a): - b = a**2 - c = str(a) * 3 - return c + str(b) # BPTAG: FRAME_LOCALS - - IntegrationTest._hub.SetBreakpoint('FRAME_LOCALS') - x = {'a': 1, 'b': Number()} - Method(8) - result = IntegrationTest._hub.GetNextResult() - self.assertEqual({ - 'name': 'b', - 'value': '64', - 'type': 'int' - }, python_test_util.PackFrameVariable(result, 'b')) - self.assertEqual({ - 'name': 'c', - 'value': "'888'", - 'type': 'str' - }, python_test_util.PackFrameVariable(result, 'c')) - self.assertEqual( - { - 'name': - 'x', - 'type': - 'dict', - 'members': [{ - 'name': "'a'", - 'value': '1', - 'type': 'int' - }, { - 'name': "'b'", - 'type': __name__ + '.Number', - 'members': [{ - 'name': 'n', - 'value': '57', - 'type': 'int' - }] - }] - }, python_test_util.PackFrameVariable(result, 'x', frame=1)) - return x - - -# FIXME: Broken in Python 3.10 -# def testRecursion(self): -# -# def RecursiveMethod(i): -# if i == 0: -# return 0 # BPTAG: RECURSION -# return RecursiveMethod(i - 1) -# -# IntegrationTest._hub.SetBreakpoint('RECURSION') -# RecursiveMethod(5) -# result = IntegrationTest._hub.GetNextResult() -# -# for frame in range(5): -# self.assertEqual({ -# 'name': 'i', -# 'value': str(frame), -# 'type': 'int' -# }, python_test_util.PackFrameVariable(result, 'i', frame, 'arguments')) - - def testWatchedExpressions(self): - - def Trigger(): - - class MyClass(object): - - def __init__(self): - self.a = 1 - self.b = 'bbb' - - unused_my = MyClass() - print('Breakpoint trigger') # BPTAG: WATCHED_EXPRESSION - - IntegrationTest._hub.SetBreakpoint('WATCHED_EXPRESSION', - {'expressions': ['unused_my']}) - Trigger() - result = IntegrationTest._hub.GetNextResult() - - self.assertEqual( - { - 'name': - 'unused_my', - 'type': - __name__ + '.MyClass', - 'members': [{ - 'name': 'a', - 'value': '1', - 'type': 'int' - }, { - 'name': 'b', - 'value': "'bbb'", - 'type': 'str' - }] - }, python_test_util.PackWatchedExpression(result, 0)) - - def testBreakpointExpiration(self): # BPTAG: BREAKPOINT_EXPIRATION - created_time = datetime.utcnow() - timedelta(hours=25) - IntegrationTest._hub.SetBreakpoint( - 'BREAKPOINT_EXPIRATION', - {'createTime': python_test_util.DateTimeToTimestamp(created_time)}) - result = IntegrationTest._hub.GetNextResult() - - self.assertTrue(result['status']['isError']) - - def testLogAction(self): - - def Trigger(): - for i in range(3): - print('Log me %d' % i) # BPTAG: LOG - - IntegrationTest._hub.SetBreakpoint( - 'LOG', { - 'action': 'LOG', - 'logLevel': 'INFO', - 'logMessageFormat': 'hello $0', - 'expressions': ['i'] - }) - Trigger() - self.assertListEqual( - ['LOGPOINT: hello 0', 'LOGPOINT: hello 1', 'LOGPOINT: hello 2'], - self._info_log) - - def testDeferred(self): - - def Trigger(): - import integration_test_helper # pylint: disable=g-import-not-at-top - integration_test_helper.Trigger() - - IntegrationTest._hub.SetBreakpointAtFile('integration_test_helper.py', - 'DEFERRED') - - Trigger() - result = IntegrationTest._hub.GetNextResult() - self.assertEqual('Trigger', result['stackFrames'][0]['function']) - self.assertEqual('Trigger', result['stackFrames'][1]['function']) - self.assertEqual('IntegrationTest.testDeferred', - result['stackFrames'][2]['function']) +# collector.request_log_id_collector = \ +# current_request_log_id_collector + +# result = IntegrationTest._hub.GetNextResult() +# self.assertIn('labels', result.keys()) +# self.assertIn(labels.Breakpoint.REQUEST_LOG_ID, result['labels']) +# self.assertEqual('foo_bar_id', +# result['labels'][labels.Breakpoint.REQUEST_LOG_ID]) + +# # Tests the issue in b/30876465 +# def testSameLine(self): + +# def Trigger(): +# print('Breakpoint trigger same line') # BPTAG: SAME_LINE + +# num_breakpoints = 5 +# _, line = python_test_util.ResolveTag(sys.modules[__name__], 'SAME_LINE') +# for _ in range(0, num_breakpoints): +# IntegrationTest._hub.SetBreakpoint('SAME_LINE') +# Trigger() +# results = [] +# for _ in range(0, num_breakpoints): +# results.append(IntegrationTest._hub.GetNextResult()) +# lines = [result['stackFrames'][0]['location']['line'] for result in results] +# self.assertListEqual(lines, [line] * num_breakpoints) + +# def testCallStack(self): + +# def Method1(): +# Method2() + +# def Method2(): +# Method3() + +# def Method3(): +# Method4() + +# def Method4(): +# Method5() + +# def Method5(): +# return 0 # BPTAG: CALL_STACK + +# IntegrationTest._hub.SetBreakpoint('CALL_STACK') +# Method1() +# result = IntegrationTest._hub.GetNextResult() +# self.assertEqual([ +# 'Method5', 'Method4', 'Method3', 'Method2', 'Method1', +# 'IntegrationTest.testCallStack' +# ], [frame['function'] for frame in result['stackFrames']][:6]) + +# def testInnerMethod(self): + +# def Inner1(): + +# def Inner2(): + +# def Inner3(): +# print('Inner3') # BPTAG: INNER3 + +# Inner3() + +# Inner2() + +# IntegrationTest._hub.SetBreakpoint('INNER3') +# Inner1() +# result = IntegrationTest._hub.GetNextResult() +# self.assertEqual('Inner3', result['stackFrames'][0]['function']) + +# def testClassMethodWithDecorator(self): + +# def MyDecorator(handler): + +# def Caller(self): +# return handler(self) + +# return Caller + +# class BaseClass(object): +# pass + +# class MyClass(BaseClass): + +# @MyDecorator +# def Get(self): +# param = {} # BPTAG: METHOD_WITH_DECORATOR +# return str(param) + +# IntegrationTest._hub.SetBreakpoint('METHOD_WITH_DECORATOR') +# self.assertEqual('{}', MyClass().Get()) +# result = IntegrationTest._hub.GetNextResult() +# self.assertEqual('MyClass.Get', result['stackFrames'][0]['function']) +# self.assertEqual('MyClass.Caller', result['stackFrames'][1]['function']) +# self.assertEqual( +# { +# 'name': +# 'self', +# 'type': +# __name__ + '.MyClass', +# 'members': [{ +# 'status': { +# 'refersTo': 'VARIABLE_NAME', +# 'description': { +# 'format': 'Object has no fields' +# } +# } +# }] +# }, +# python_test_util.PackFrameVariable( +# result, 'self', collection='arguments')) + +# def testGlobalDecorator(self): +# IntegrationTest._hub.SetBreakpoint('WRAPPED_GLOBAL_METHOD') +# self.assertEqual('hello', WrappedGlobalMethod()) +# result = IntegrationTest._hub.GetNextResult() + +# self.assertNotIn('status', result) + +# def testNoLambdaExpression(self): + +# def Trigger(): +# cube = lambda x: x**3 # BPTAG: LAMBDA +# cube(18) + +# num_breakpoints = 5 +# for _ in range(0, num_breakpoints): +# IntegrationTest._hub.SetBreakpoint('LAMBDA') +# Trigger() +# results = [] +# for _ in range(0, num_breakpoints): +# results.append(IntegrationTest._hub.GetNextResult()) +# functions = [result['stackFrames'][0]['function'] for result in results] +# self.assertListEqual(functions, ['Trigger'] * num_breakpoints) + +# def testNoGeneratorExpression(self): + +# def Trigger(): +# gen = (i for i in range(0, 5)) # BPTAG: GENEXPR +# next(gen) +# next(gen) +# next(gen) +# next(gen) +# next(gen) + +# num_breakpoints = 1 +# for _ in range(0, num_breakpoints): +# IntegrationTest._hub.SetBreakpoint('GENEXPR') +# Trigger() +# results = [] +# for _ in range(0, num_breakpoints): +# results.append(IntegrationTest._hub.GetNextResult()) +# functions = [result['stackFrames'][0]['function'] for result in results] +# self.assertListEqual(functions, ['Trigger'] * num_breakpoints) + +# def testTryBlock(self): + +# def Method(a): +# try: +# return a * a # BPTAG: TRY_BLOCK +# except Exception as unused_e: # pylint: disable=broad-except +# return a + +# IntegrationTest._hub.SetBreakpoint('TRY_BLOCK') +# Method(11) +# result = IntegrationTest._hub.GetNextResult() +# self.assertEqual('Method', result['stackFrames'][0]['function']) +# self.assertEqual([{ +# 'name': 'a', +# 'value': '11', +# 'type': 'int' +# }], result['stackFrames'][0]['arguments']) + +# def testFrameArguments(self): + +# def Method(a, b): +# return a + str(b) # BPTAG: FRAME_ARGUMENTS + +# IntegrationTest._hub.SetBreakpoint('FRAME_ARGUMENTS') +# Method('hello', 87) +# result = IntegrationTest._hub.GetNextResult() +# self.assertEqual([{ +# 'name': 'a', +# 'value': "'hello'", +# 'type': 'str' +# }, { +# 'name': 'b', +# 'value': '87', +# 'type': 'int' +# }], result['stackFrames'][0]['arguments']) +# self.assertEqual('self', result['stackFrames'][1]['arguments'][0]['name']) + +# def testFrameLocals(self): + +# class Number(object): + +# def __init__(self): +# self.n = 57 + +# def Method(a): +# b = a**2 +# c = str(a) * 3 +# return c + str(b) # BPTAG: FRAME_LOCALS + +# IntegrationTest._hub.SetBreakpoint('FRAME_LOCALS') +# x = {'a': 1, 'b': Number()} +# Method(8) +# result = IntegrationTest._hub.GetNextResult() +# self.assertEqual({ +# 'name': 'b', +# 'value': '64', +# 'type': 'int' +# }, python_test_util.PackFrameVariable(result, 'b')) +# self.assertEqual({ +# 'name': 'c', +# 'value': "'888'", +# 'type': 'str' +# }, python_test_util.PackFrameVariable(result, 'c')) +# self.assertEqual( +# { +# 'name': +# 'x', +# 'type': +# 'dict', +# 'members': [{ +# 'name': "'a'", +# 'value': '1', +# 'type': 'int' +# }, { +# 'name': "'b'", +# 'type': __name__ + '.Number', +# 'members': [{ +# 'name': 'n', +# 'value': '57', +# 'type': 'int' +# }] +# }] +# }, python_test_util.PackFrameVariable(result, 'x', frame=1)) +# return x + + +# # FIXME: Broken in Python 3.10 +# # def testRecursion(self): +# # +# # def RecursiveMethod(i): +# # if i == 0: +# # return 0 # BPTAG: RECURSION +# # return RecursiveMethod(i - 1) +# # +# # IntegrationTest._hub.SetBreakpoint('RECURSION') +# # RecursiveMethod(5) +# # result = IntegrationTest._hub.GetNextResult() +# # +# # for frame in range(5): +# # self.assertEqual({ +# # 'name': 'i', +# # 'value': str(frame), +# # 'type': 'int' +# # }, python_test_util.PackFrameVariable(result, 'i', frame, 'arguments')) + +# def testWatchedExpressions(self): + +# def Trigger(): + +# class MyClass(object): + +# def __init__(self): +# self.a = 1 +# self.b = 'bbb' + +# unused_my = MyClass() +# print('Breakpoint trigger') # BPTAG: WATCHED_EXPRESSION + +# IntegrationTest._hub.SetBreakpoint('WATCHED_EXPRESSION', +# {'expressions': ['unused_my']}) +# Trigger() +# result = IntegrationTest._hub.GetNextResult() + +# self.assertEqual( +# { +# 'name': +# 'unused_my', +# 'type': +# __name__ + '.MyClass', +# 'members': [{ +# 'name': 'a', +# 'value': '1', +# 'type': 'int' +# }, { +# 'name': 'b', +# 'value': "'bbb'", +# 'type': 'str' +# }] +# }, python_test_util.PackWatchedExpression(result, 0)) + +# def testBreakpointExpiration(self): # BPTAG: BREAKPOINT_EXPIRATION +# created_time = datetime.utcnow() - timedelta(hours=25) +# IntegrationTest._hub.SetBreakpoint( +# 'BREAKPOINT_EXPIRATION', +# {'createTime': python_test_util.DateTimeToTimestamp(created_time)}) +# result = IntegrationTest._hub.GetNextResult() + +# self.assertTrue(result['status']['isError']) + +# def testLogAction(self): + +# def Trigger(): +# for i in range(3): +# print('Log me %d' % i) # BPTAG: LOG + +# IntegrationTest._hub.SetBreakpoint( +# 'LOG', { +# 'action': 'LOG', +# 'logLevel': 'INFO', +# 'logMessageFormat': 'hello $0', +# 'expressions': ['i'] +# }) +# Trigger() +# self.assertListEqual( +# ['LOGPOINT: hello 0', 'LOGPOINT: hello 1', 'LOGPOINT: hello 2'], +# self._info_log) + +# def testDeferred(self): + +# def Trigger(): +# import integration_test_helper # pylint: disable=g-import-not-at-top +# integration_test_helper.Trigger() + +# IntegrationTest._hub.SetBreakpointAtFile('integration_test_helper.py', +# 'DEFERRED') + +# Trigger() +# result = IntegrationTest._hub.GetNextResult() +# self.assertEqual('Trigger', result['stackFrames'][0]['function']) +# self.assertEqual('Trigger', result['stackFrames'][1]['function']) +# self.assertEqual('IntegrationTest.testDeferred', +# result['stackFrames'][2]['function']) def MyGlobalDecorator(fn): From 29b4ecd4b4dd588d210b922b3d407a8f9bdbd1b4 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Mon, 5 Jun 2023 14:37:46 -0400 Subject: [PATCH 238/241] chore: release v4.0 (#88) --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 0f8f662..7b5c565 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '3.5' +__version__ = '4.0' From 2774034713811c43ff87a5d6ac4abe134c25c94e Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 6 Jun 2023 12:07:07 -0400 Subject: [PATCH 239/241] docs: modify docs to remove deprecated functionality (#89) Cloud Debugger API has been shut down, so the docs should reflect the current state. --- README.md | 132 +++++++++++++----------------------------------------- 1 file changed, 30 insertions(+), 102 deletions(-) diff --git a/README.md b/README.md index 2a836ba..2b1286f 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,22 @@ -# Python Cloud Debugger Agent +# Python Snapshot Debugger Agent -Google [Cloud Debugger](https://cloud.google.com/debugger/) for -Python 3.6, Python 3.7, Python 3.8, Python 3.9, and Python 3.10. +[Snapshot debugger](https://github.com/GoogleCloudPlatform/snapshot-debugger/) +agent for Python 3.6, Python 3.7, Python 3.8, Python 3.9, and Python 3.10. ## Overview -Cloud Debugger (also known as Stackdriver Debugger) lets you inspect the state +Snapshot Debugger lets you inspect the state of a running cloud application, at any code location, without stopping or slowing it down. It is not your traditional process debugger but rather an always on, whole app debugger taking snapshots from any instance of the app. -Cloud Debugger is safe for use with production apps or during development. The +Snapshot Debugger is safe for use with production apps or during development. The Python debugger agent only few milliseconds to the request latency when a debug snapshot is captured. In most cases, this is not noticeable to users. Furthermore, the Python debugger agent does not allow modification of application state in any way, and has close to zero impact on the app instances. -Cloud Debugger attaches to all instances of the app providing the ability to +Snapshot Debugger attaches to all instances of the app providing the ability to take debug snapshots and add logpoints. A snapshot captures the call-stack and variables from any one instance that executes the snapshot location. A logpoint writes a formatted message to the application log whenever any instance of the @@ -25,26 +25,22 @@ app executes the logpoint location. The Python debugger agent is only supported on Linux at the moment. It was tested on Debian Linux, but it should work on other distributions as well. -Cloud Debugger consists of 3 primary components: +Snapshot Debugger consists of 3 primary components: 1. The Python debugger agent (this repo implements one for CPython 3.6, 3.7, 3.8, 3.9, and 3.10). -2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the - APIs using - [APIs Explorer](https://cloud.google.com/debugger/api/reference/rest/). +2. A Firebase Realtime Database for storing and managing snapshots/logpoints. + Explore the + [schema](https://github.com/GoogleCloudPlatform/snapshot-debugger/blob/main/docs/SCHEMA.md). 3. User interface, including a command line interface - [`gcloud debug`](https://cloud.google.com/sdk/gcloud/reference/debug/) and a - Web interface on - [Google Cloud Console](https://console.cloud.google.com/debug/). See the - [online help](https://cloud.google.com/debugger/docs/using/snapshots) on how - to use Google Cloud Console Debug page. + [`snapshot-dbg-cli`](https://pypi.org/project/snapshot-dbg-cli/) and a + [VSCode extension](https://github.com/GoogleCloudPlatform/snapshot-debugger/tree/main/snapshot_dbg_extension) ## Getting Help +1. File an [issue](https://github.com/GoogleCloudPlatform/cloud-debug-python/issues) 1. StackOverflow: http://stackoverflow.com/questions/tagged/google-cloud-debugger -2. Send email to: [Cloud Debugger Feedback](mailto:cdbg-feedback@google.com) -3. Send Feedback from Google Cloud Console ## Installation @@ -86,22 +82,13 @@ minimal image with the agent installed. ### Google Cloud Platform -1. First, make sure that you created the VM with this option enabled: - - > Allow API access to all Google Cloud services in the same project. - - This option lets the Python debugger agent authenticate with the machine - account of the Virtual Machine. - - It is possible to use the Python debugger agent without it. Please see the - [next section](#outside-google-cloud-platform) for details. +1. First, make sure that the VM has the + [required scopes](https://github.com/GoogleCloudPlatform/snapshot-debugger/blob/main/docs/configuration.md#access-scopes). 2. Install the Python debugger agent as explained in the [Installation](#installation) section. -3. Enable the debugger in your application using one of the two options: - - _Option A_: add this code to the beginning of your `main()` function: +3. Enable the debugger in your application: ```python # Attach Python Cloud Debugger @@ -112,20 +99,7 @@ minimal image with the agent installed. pass ``` - _Option B_: run the debugger agent as a module: - -
-    python \
-        -m googleclouddebugger --module=[MODULE] --version=[VERSION] -- \
-        myapp.py
-    
- - **Note:** This option does not work well with tools such as - `multiprocessing` or `gunicorn`. These tools spawn workers in separate - processes, but the debugger does not get enabled on these worker processes. - Please use _Option A_ instead. - - Where, in both cases: + Where: * `[MODULE]` is the name of your app. This, along with the version, is used to identify the debug target in the UI.
@@ -160,7 +134,7 @@ account. 1. Use the Google Cloud Console Service Accounts [page](https://console.cloud.google.com/iam-admin/serviceaccounts/project) to create a credentials file for an existing or new service account. The - service account must have at least the `Stackdriver Debugger Agent` role. + service account must have at least the `roles/firebasedatabase.admin` role. 2. Once you have the service account credentials JSON file, deploy it alongside the Python debugger agent. @@ -174,8 +148,6 @@ account. Alternatively, you can provide the path to the credentials file directly to the debugger agent. - _Option A_: - ```python # Attach Python Cloud Debugger try: @@ -187,19 +159,6 @@ account. except ImportError: pass ``` - - _Option B_: - -
-    python \
-        -m googleclouddebugger \
-        --module=[MODULE] \
-        --version=[VERSION] \
-        --service_account_json_file=/path/to/credentials.json \
-        -- \
-        myapp.py
-    
- 4. Follow the rest of the steps in the [GCP](#google-cloud-platform) section. ### Django Web Framework @@ -224,45 +183,15 @@ Alternatively, you can pass the `--noreload` flag when running the Django using the `--noreload` flag disables the autoreload feature in Django, which means local changes to files will not be automatically picked up by Django. -### Snapshot Debugger - Firebase Realtime Database Backend - -This functionality is available for release 3.0 onward of this agent and -provides support for the Snapshot Debugger, which is being provided as a -replacement for the deprecated Cloud Debugger service. - -The agent can be configured to use Firebase Realtime Database as a backend -instead of the Cloud Debugger service. If the Firebase backend is used, -breakpoints can be viewed and set using the Snapshot Debugger CLI instead of the -Cloud Console. - -To use the Firebase backend, set the flag when enabling the agent: - -```python -try: - import googleclouddebugger - googleclouddebugger.enable(use_firebase=True) -except ImportError: - pass -``` - -Additional configuration can be provided if necessary: - -```python -try: - import googleclouddebugger - googleclouddebugger.enable( - use_firebase=True, - project_id='my-project-id', - firebase_db_url='https://my-database-url.firebaseio.com', - service_account_json_file='path/to/service_account.json', - ) -except ImportError: - pass -``` - -See https://github.com/GoogleCloudPlatform/snapshot-debugger and -https://cloud.google.com/debugger/docs/deprecations for more details. +## Historical note +Version 3.x of this agent supported both the now shutdown Cloud Debugger service +(by default) and the +[Snapshot Debugger](https://github.com/GoogleCloudPlatform/snapshot-debugger/) +(Firebase RTDB backend) by setting the `use_firebase` flag to true. Version 4.0 +removed support for the Cloud Debugger service, making the Snapshot Debugger the +default. To note the `use_firebase` flag is now obsolete, but still present for +backward compatibility. ## Flag Reference @@ -298,11 +227,10 @@ which are automatically available on machines hosted on GCP, or can be set via `gcloud auth application-default login` or the `GOOGLE_APPLICATION_CREDENTIALS` environment variable. -`breakpoint_enable_canary`: Whether to enable the -[breakpoint canary feature](https://cloud.google.com/debugger/docs/using/snapshots#with_canarying). -It expects a boolean value (`True`/`False`) or a string, with `'True'` -interpreted as `True` and any other string interpreted as `False`). If not -provided, the breakpoint canarying will not be enabled. +`firebase_db_url`: Url pointing to a configured Firebase Realtime Database for +the agent to use to store snapshot data. +https://**PROJECT_ID**-cdbg.firebaseio.com will be used if not provided. where +**PROJECT_ID** is your project ID. ## Development From bb22c8cb791a7fcffc96934e6d97e443bebcfbb7 Mon Sep 17 00:00:00 2001 From: James McTavish Date: Tue, 6 Jun 2023 14:32:13 -0400 Subject: [PATCH 240/241] chore: Release version 4.1 (#91) --- src/googleclouddebugger/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/googleclouddebugger/version.py b/src/googleclouddebugger/version.py index 7b5c565..3b0f00f 100644 --- a/src/googleclouddebugger/version.py +++ b/src/googleclouddebugger/version.py @@ -4,4 +4,4 @@ # The major version should only change on breaking changes. Minor version # changes go between regular updates. Instances running debuggers with # different major versions will show up as two different debuggees. -__version__ = '4.0' +__version__ = '4.1' From b70d5dbb05ca40bd66d1ceb8444d58e8b9a800fa Mon Sep 17 00:00:00 2001 From: James McTavish Date: Fri, 17 Nov 2023 09:21:41 -0500 Subject: [PATCH 241/241] docs: Document archival status (#93) --- README.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2b1286f..9f171b4 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,14 @@ [Snapshot debugger](https://github.com/GoogleCloudPlatform/snapshot-debugger/) agent for Python 3.6, Python 3.7, Python 3.8, Python 3.9, and Python 3.10. + +## Project Status: Archived + +This project has been archived and is no longer supported. There will be no +further bug fixes or security patches. The repository can be forked by users +if they want to maintain it going forward. + + ## Overview Snapshot Debugger lets you inspect the state @@ -36,12 +44,6 @@ Snapshot Debugger consists of 3 primary components: [`snapshot-dbg-cli`](https://pypi.org/project/snapshot-dbg-cli/) and a [VSCode extension](https://github.com/GoogleCloudPlatform/snapshot-debugger/tree/main/snapshot_dbg_extension) -## Getting Help - -1. File an [issue](https://github.com/GoogleCloudPlatform/cloud-debug-python/issues) -1. StackOverflow: - http://stackoverflow.com/questions/tagged/google-cloud-debugger - ## Installation The easiest way to install the Python Cloud Debugger is with PyPI: