From 63505a423bf0062e98cf1b97407df78a1a693f18 Mon Sep 17 00:00:00 2001 From: Colin Nattrass Date: Wed, 26 Oct 2022 17:55:31 +0200 Subject: [PATCH 01/51] Switch Docker base image from CentOS (as it is now EOL) (close #283) PR #284 * Switch Docker base image to Debian (closes snowplow#283) * Update git clone depth --- Dockerfile | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9d0577e8..ff25c7bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,16 @@ -FROM centos:8 -RUN cd /etc/yum.repos.d/ -RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* -RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* +FROM debian:bullseye-slim + +RUN apt-get update && apt-get install -y --no-install-recommends make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev \ + libsqlite3-dev wget curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ + mecab-ipadic-utf8 git ca-certificates -RUN yum -y install wget -RUN yum install -y epel-release -RUN yum -y install git tar gcc make bzip2 openssl openssl-devel patch gcc-c++ libffi-devel sqlite-devel -RUN git clone https://github.com/yyuu/pyenv.git ~/.pyenv ENV HOME /root ENV PYENV_ROOT $HOME/.pyenv ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH +RUN git clone --depth=1 https://github.com/pyenv/pyenv.git $PYENV_ROOT +RUN git clone --depth=1 https://github.com/pyenv/pyenv-virtualenv.git $PYENV_ROOT/plugins/pyenv-virtualenv RUN pyenv install 3.5.10 && pyenv install 3.6.14 && pyenv install 3.7.11 && pyenv install 3.8.11 && pyenv install 3.9.6 && pyenv install 3.10.1 -RUN git clone https://github.com/pyenv/pyenv-virtualenv.git ~/.pyenv/plugins/pyenv-virtualenv WORKDIR /app COPY . . From 6e886b7aa46074e5536a6169af43f37340a748cc Mon Sep 17 00:00:00 2001 From: Colin Nattrass Date: Wed, 26 Oct 2022 17:57:26 +0200 Subject: [PATCH 02/51] Add session id and idx to Subject (closes #282) PR #285 * Add domain_session_id and domain_session_index to Subject Class --- snowplow_tracker/subject.py | 20 +++++++++++++++++++ .../test/integration/test_integration.py | 6 +++++- snowplow_tracker/test/unit/test_subject.py | 8 ++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index d9c10c80..3b4fb82c 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -123,6 +123,26 @@ def set_domain_user_id(self, duid: str) -> 'Subject': self.standard_nv_pairs["duid"] = duid return self + def set_domain_session_id(self, sid: str) -> 'Subject': + """ + Set the domain session ID + :param sid: Domain session ID + :type sid: string + :rtype: subject + """ + self.standard_nv_pairs["sid"] = sid + return self + + def set_domain_session_index(self, vid: int) -> 'Subject': + """ + Set the domain session Index + :param vid: Domain session Index + :type vid: int + :rtype: subject + """ + self.standard_nv_pairs["vid"] = vid + return self + def set_ip_address(self, ip: str) -> 'Subject': """ Set the domain user ID diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index cea52a46..2346243b 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -226,6 +226,8 @@ def test_integration_standard_nv_pairs(self) -> None: def test_integration_identification_methods(self) -> None: s = subject.Subject() s.set_domain_user_id("4616bfb38f872d16") + s.set_domain_session_id("59ed13b1a5724dae") + s.set_domain_session_index(1) s.set_ip_address("255.255.255.255") s.set_useragent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)") s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") @@ -235,6 +237,8 @@ def test_integration_identification_methods(self) -> None: t.track_page_view("localhost", "local host") expected_fields = { "duid": "4616bfb38f872d16", + "sid": "59ed13b1a5724dae", + "vid": "1", "ip": "255.255.255.255", "ua": "Mozilla%2F5.0+%28compatible%3B+MSIE+9.0%3B+Windows+NT+6.0%3B+Trident%2F5.0%29", "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5" @@ -245,7 +249,7 @@ def test_integration_identification_methods(self) -> None: def test_integration_event_subject(self) -> None: s = subject.Subject() s.set_domain_user_id("4616bfb38f872d16") - s.set_ip_address("255.255.255.255") + s.set_lang("ES") t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") evSubject = subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") diff --git a/snowplow_tracker/test/unit/test_subject.py b/snowplow_tracker/test/unit/test_subject.py index 882cdb2f..93e2b278 100644 --- a/snowplow_tracker/test/unit/test_subject.py +++ b/snowplow_tracker/test/unit/test_subject.py @@ -42,6 +42,8 @@ def test_subject_0(self) -> None: s.set_timezone("PST") s.set_lang("EN") s.set_domain_user_id("domain-user-id") + s.set_domain_session_id("domain-session-id") + s.set_domain_session_index(1) s.set_ip_address("127.0.0.1") s.set_useragent("useragent-string") s.set_network_user_id("network-user-id") @@ -57,6 +59,8 @@ def test_subject_0(self) -> None: "ip": "127.0.0.1", "ua": "useragent-string", "duid": "domain-user-id", + "sid": "domain-session-id", + "vid": 1, "tnuid": "network-user-id" } self.assertDictEqual(s.standard_nv_pairs, exp) @@ -85,5 +89,9 @@ def test_subject_1(self) -> None: s.standard_nv_pairs["ua"] with pytest.raises(KeyError): s.standard_nv_pairs["duid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["sid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["vid"] with pytest.raises(KeyError): s.standard_nv_pairs["tnuid"] From 7feeee905bcd180ba66da71125b944b9df4337ff Mon Sep 17 00:00:00 2001 From: Jack-Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Mon, 31 Oct 2022 12:38:35 +0000 Subject: [PATCH 03/51] Add Python 3.11 to CI tests (close #286) PR #287 * Add Python 3.11 to CI tests --- .github/workflows/ci.yml | 2 +- Dockerfile | 2 +- run-tests.sh | 28 ++++++++++++++++++++++++++++ setup.py | 1 + 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f92176df..e5d5fb09 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9, "3.10"] + python-version: [3.6, 3.7, 3.8, 3.9, "3.10", "3.11"] extras-required: [".", ".[redis]"] services: diff --git a/Dockerfile b/Dockerfile index ff25c7bd..c48e4a8a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH RUN git clone --depth=1 https://github.com/pyenv/pyenv.git $PYENV_ROOT RUN git clone --depth=1 https://github.com/pyenv/pyenv-virtualenv.git $PYENV_ROOT/plugins/pyenv-virtualenv -RUN pyenv install 3.5.10 && pyenv install 3.6.14 && pyenv install 3.7.11 && pyenv install 3.8.11 && pyenv install 3.9.6 && pyenv install 3.10.1 +RUN pyenv install 3.5.10 && pyenv install 3.6.14 && pyenv install 3.7.11 && pyenv install 3.8.11 && pyenv install 3.9.6 && pyenv install 3.10.1 && pyenv install 3.11.0 WORKDIR /app COPY . . diff --git a/run-tests.sh b/run-tests.sh index 477f3f7e..715f72dd 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -116,6 +116,23 @@ function deploy { pip install -r requirements-test.txt source deactivate fi + + # pyenv install 3.11.0 + if [ ! -e ~/.pyenv/versions/tracker311 ]; then + pyenv virtualenv 3.11.0 tracker311 + pyenv activate tracker311 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + if [ ! -e ~/.pyenv/versions/tracker311redis ]; then + pyenv virtualenv 3.11.0 tracker311redis + pyenv activate tracker311redis + pip install .[redis] + pip install -r requirements-test.txt + source deactivate + fi } @@ -167,6 +184,15 @@ function run_tests { pyenv activate tracker310redis pytest source deactivate + + pyenv activate tracker311 + pytest + source deactivate + + pyenv activate tracker311redis + pytest + source deactivate + } function refresh_deploy { @@ -182,6 +208,8 @@ function refresh_deploy { pyenv uninstall -f tracker39redis pyenv uninstall -f tracker310 pyenv uninstall -f tracker310redis + pyenv uninstall -f tracker311 + pyenv uninstall -f tracker311redis } diff --git a/setup.py b/setup.py index 6dd3bc73..6f359ceb 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Operating System :: OS Independent", ], install_requires=["requests>=2.25.1,<3.0", "typing_extensions>=3.7.4"], From fed98f8f61cae0329efaddf46eb262705c9bafd2 Mon Sep 17 00:00:00 2001 From: Jack-Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Mon, 31 Oct 2022 16:01:16 +0000 Subject: [PATCH 04/51] Make HTTPS the default protocol in emitter (close #14) PR #288 * Set https as default protocol * Add unit tests Make HTTPS the default protocol in emitter (close #14) #288 * Fix endpoint check --- snowplow_tracker/emitters.py | 13 ++++++++---- snowplow_tracker/test/unit/test_emitters.py | 22 ++++++++++++++------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 2deb0345..0f331ef5 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -51,7 +51,7 @@ class Emitter(object): def __init__( self, endpoint: str, - protocol: HttpProtocol = "http", + protocol: HttpProtocol = "https", port: Optional[int] = None, method: Method = "get", buffer_size: Optional[int] = None, @@ -60,9 +60,9 @@ def __init__( byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None) -> None: """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to http. + :param protocol: The protocol to use - http or https. Defaults to https. :type protocol: protocol :param port: The collector port to connect to :type port: int | None @@ -116,7 +116,7 @@ def __init__( @staticmethod def as_collector_uri( endpoint: str, - protocol: HttpProtocol = "http", + protocol: HttpProtocol = "https", port: Optional[int] = None, method: Method = "get") -> str: """ @@ -133,6 +133,11 @@ def as_collector_uri( if len(endpoint) < 1: raise ValueError("No endpoint provided.") + if endpoint.split("://")[0] in PROTOCOLS: + endpoint_arr = endpoint.split("://") + protocol = endpoint_arr[0] + endpoint = endpoint_arr[1] + if method == "get": path = "/i" else: diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 0167525a..00ff68c1 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -54,7 +54,7 @@ def setUp(self) -> None: def test_init(self) -> None: e = Emitter('0.0.0.0') - self.assertEqual(e.endpoint, 'http://0.0.0.0/i') + self.assertEqual(e.endpoint, 'https://0.0.0.0/i') self.assertEqual(e.method, 'get') self.assertEqual(e.buffer_size, 1) self.assertEqual(e.buffer, []) @@ -83,24 +83,32 @@ def test_init_requests_timeout(self) -> None: def test_as_collector_uri(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0') - self.assertEqual(uri, 'http://0.0.0.0/i') + self.assertEqual(uri, 'https://0.0.0.0/i') def test_as_collector_uri_post(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0', method="post") - self.assertEqual(uri, 'http://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + self.assertEqual(uri, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') def test_as_collector_uri_port(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0', port=9090, method="post") - self.assertEqual(uri, 'http://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') + self.assertEqual(uri, 'https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') - def test_as_collector_uri_https(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', protocol="https") - self.assertEqual(uri, 'https://0.0.0.0/i') + def test_as_collector_uri_http(self) -> None: + uri = Emitter.as_collector_uri('0.0.0.0', protocol="http") + self.assertEqual(uri, 'http://0.0.0.0/i') def test_as_collector_uri_empty_string(self) -> None: with self.assertRaises(ValueError): Emitter.as_collector_uri('') + def test_as_collector_uri_endpoint_protocol(self) -> None: + uri = Emitter.as_collector_uri("https://0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/i") + + def test_as_collector_uri_endpoint_protocol_http(self) -> None: + uri = Emitter.as_collector_uri("http://0.0.0.0") + self.assertEqual(uri, "http://0.0.0.0/i") + @mock.patch('snowplow_tracker.Emitter.flush') def test_input_no_flush(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush From 2cd6abbbc7b9554d8bbcfca0eeee5b91875e42ca Mon Sep 17 00:00:00 2001 From: Jack-Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Thu, 3 Nov 2022 10:06:04 +0000 Subject: [PATCH 05/51] Change default method to POST in emitter (close #289) PR #290 * Set default method to post * Fix invalid escape sequence in doc strings * Update integration tests * Update unit tests * Update example app --- examples/app.py | 3 +- snowplow_tracker/celery/celery_emitter.py | 2 +- snowplow_tracker/emitters.py | 16 +++---- snowplow_tracker/payload.py | 4 +- .../test/integration/test_integration.py | 48 ++++++++++--------- snowplow_tracker/test/unit/test_emitters.py | 40 ++++++++-------- snowplow_tracker/tracker.py | 8 ++-- 7 files changed, 62 insertions(+), 59 deletions(-) diff --git a/examples/app.py b/examples/app.py index 829055d0..973f5a99 100644 --- a/examples/app.py +++ b/examples/app.py @@ -19,7 +19,7 @@ def main(): t = Tracker(e, s) - print("Sending events to " + collector_url) + print("Sending events to " + e.endpoint) t.track_page_view("https://www.snowplow.io", "Homepage") t.track_page_ping("https://www.snowplow.io", "Homepage") @@ -32,6 +32,7 @@ def main(): ) ) t.track_struct_event("shop", "add-to-basket", None, "pcs", 2) + t.flush() if __name__ == "__main__": diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py index e7a8efae..d9aafaa7 100644 --- a/snowplow_tracker/celery/celery_emitter.py +++ b/snowplow_tracker/celery/celery_emitter.py @@ -52,7 +52,7 @@ def __init__( endpoint: str, protocol: HttpProtocol = "http", port: Optional[int] = None, - method: Method = "get", + method: Method = "post", buffer_size: Optional[int] = None, byte_limit: Optional[int] = None) -> None: super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, None, None, byte_limit) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 0f331ef5..d2549d25 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -53,7 +53,7 @@ def __init__( endpoint: str, protocol: HttpProtocol = "https", port: Optional[int] = None, - method: Method = "get", + method: Method = "post", buffer_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, @@ -66,7 +66,7 @@ def __init__( :type protocol: protocol :param port: The collector port to connect to :type port: int | None - :param method: The HTTP request method + :param method: The HTTP request method. Defaults to post. :type method: method :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type buffer_size: int | None @@ -118,7 +118,7 @@ def as_collector_uri( endpoint: str, protocol: HttpProtocol = "https", port: Optional[int] = None, - method: Method = "get") -> str: + method: Method = "post") -> str: """ :param endpoint: The raw endpoint provided by the user :type endpoint: string @@ -153,7 +153,7 @@ def input(self, payload: PayloadDict) -> None: If the maximum size has been reached, flushes the buffer. :param payload: The name-value pairs for the event - :type payload: dict(string:\*) + :type payload: dict(string:\\*) """ with self.lock: if self.bytes_queued is not None: @@ -212,7 +212,7 @@ def http_post(self, data: str) -> bool: def http_get(self, payload: PayloadDict) -> bool: """ :param payload: The event properties - :type payload: dict(string:\*) + :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) @@ -247,7 +247,7 @@ def is_good_status_code(status_code: int) -> bool: def send_events(self, evts: PayloadDictList) -> None: """ :param evts: Array of events to be sent - :type evts: list(dict(string:\*)) + :type evts: list(dict(string:\\*)) """ if len(evts) > 0: logger.info("Attempting to send %s events" % len(evts)) @@ -312,7 +312,7 @@ def attach_sent_timestamp(events: PayloadDictList) -> None: as `stm` param :param events: Array of events to be sent - :type events: list(dict(string:\*)) + :type events: list(dict(string:\\*)) :rtype: None """ def update(e: PayloadDict) -> None: @@ -332,7 +332,7 @@ def __init__( endpoint: str, protocol: HttpProtocol = "http", port: Optional[int] = None, - method: Method = "get", + method: Method = "post", buffer_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index 77fa6759..bb47a1d6 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -54,7 +54,7 @@ def add_dict(self, dict_: PayloadDict, base64: bool = False) -> None: Add a dict of name value pairs to the Payload object :param dict_: Dictionary to be added to the Payload - :type dict_: dict(string:\*) + :type dict_: dict(string:\\*) """ for f in dict_: self.add(f, dict_[f]) @@ -70,7 +70,7 @@ def add_json( Add an encoded or unencoded JSON to the payload :param dict_: Custom context for the event - :type dict_: dict(string:\*) | None + :type dict_: dict(string:\\*) | None :param encode_base64: If the payload is base64 encoded :type encode_base64: bool :param type_when_encoded: Name of the field when encode_base64 is set diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 2346243b..c487bb70 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -36,9 +36,9 @@ querystrings = [""] -default_emitter = emitters.Emitter("localhost", protocol="http", port=80) +default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=1) -post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=1) +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get') default_subject = subject.Subject() @@ -79,7 +79,7 @@ def fail_response_content(url: str, request: Any) -> Dict[str, Any]: class IntegrationTest(unittest.TestCase): def test_integration_page_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view("http://savethearctic.org", "Save The Arctic", "http://referrer.com") expected_fields = {"e": "pv", "page": "Save+The+Arctic", "url": "http%3A%2F%2Fsavethearctic.org", "refr": "http%3A%2F%2Freferrer.com"} @@ -87,7 +87,7 @@ def test_integration_page_view(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_ecommerce_transaction_item(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction_item("12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP") expected_fields = {"ti_ca": "tarot", "ti_id": "12345", "ti_qu": "2", "ti_sk": "pbz0025", "e": "ti", "ti_nm": "black-tarot", "ti_pr": "7.99", "ti_cu": "GBP"} @@ -95,7 +95,7 @@ def test_integration_ecommerce_transaction_item(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_ecommerce_transaction(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction( "6a8078be", 35, city="London", currency="GBP", @@ -126,7 +126,7 @@ def test_integration_ecommerce_transaction(self) -> None: self.assertEqual(from_querystring("ttm", querystrings[-3]), from_querystring("ttm", querystrings[-2])) def test_integration_screen_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_screen_view("Game HUD 2", id_="534") expected_fields = {"e": "ue"} @@ -146,7 +146,7 @@ def test_integration_screen_view(self) -> None: }) def test_integration_struct_event(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_struct_event("Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99) expected_fields = {"se_ca": "Ecomm", "se_pr": "hd", "se_la": "dog-skateboarding-video", "se_va": "13.99", "se_ac": "add-to-basket", "e": "se"} @@ -154,7 +154,7 @@ def test_integration_struct_event(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_unstruct_event_non_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} @@ -168,7 +168,7 @@ def test_integration_unstruct_event_non_base64(self) -> None: }) def test_integration_unstruct_event_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} @@ -182,7 +182,7 @@ def test_integration_unstruct_event_base64(self) -> None: }) def test_integration_context_non_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) envelope_string = from_querystring("co", querystrings[-1]) @@ -193,7 +193,7 @@ def test_integration_context_non_base64(self) -> None: }) def test_integration_context_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) envelope_string = unquote_plus(from_querystring("cx", querystrings[-1])) @@ -212,7 +212,7 @@ def test_integration_standard_nv_pairs(self) -> None: s.set_timezone("Europe London") s.set_lang("en") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = {"tna": "cf", "res": "100x200", @@ -232,7 +232,7 @@ def test_integration_identification_methods(self) -> None: s.set_useragent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)") s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = { @@ -251,7 +251,7 @@ def test_integration_event_subject(self) -> None: s.set_domain_user_id("4616bfb38f872d16") s.set_lang("ES") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") evSubject = subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", event_subject=evSubject) @@ -293,6 +293,7 @@ def test_integration_success_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", + method='get', on_success=lambda x: callback_success_queue.append(x), on_failure=lambda x, y: callback_failure_queue.append(x)) t = tracker.Tracker([callback_emitter], default_subject) @@ -312,6 +313,7 @@ def test_integration_failure_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", + method='get', on_success=lambda x: callback_success_queue.append(x), on_failure=lambda x, y: callback_failure_queue.append(x)) t = tracker.Tracker([callback_emitter], default_subject) @@ -321,7 +323,7 @@ def test_integration_failure_callback(self) -> None: self.assertEqual(callback_failure_queue[0], 0) def test_post_page_view(self) -> None: - t = tracker.Tracker([post_emitter], default_subject) + t = tracker.Tracker([default_emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "local host", None) expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} @@ -331,8 +333,8 @@ def test_post_page_view(self) -> None: self.assertEqual(request["data"][0][key], expected_fields[key]) def test_post_batched(self) -> None: - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=2) - t = tracker.Tracker(post_emitter, default_subject) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=2) + t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") t.track_struct_event("Test", "B") @@ -341,7 +343,7 @@ def test_post_batched(self) -> None: @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 def test_timestamps(self) -> None: - emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=3) + emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=3) t = tracker.Tracker([emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "stamp0", None, tstamp=None) @@ -361,18 +363,18 @@ def test_timestamps(self) -> None: self.assertEqual(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) def test_bytelimit(self) -> None: - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=5, byte_limit=420) - t = tracker.Tracker(post_emitter, default_subject) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=5, byte_limit=420) + t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") # 140 bytes t.track_struct_event("Test", "A") # 280 bytes t.track_struct_event("Test", "A") # 420 bytes. Send t.track_struct_event("Test", "AA") # 141 self.assertEqual(len(querystrings[-1]["data"]), 3) - self.assertEqual(post_emitter.bytes_queued, 136 + len(_version.__version__)) + self.assertEqual(default_emitter.bytes_queued, 136 + len(_version.__version__)) def test_unicode_get(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) unicode_a = u'\u0107' unicode_b = u'test.\u0107om' test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) @@ -396,7 +398,7 @@ def test_unicode_get(self) -> None: self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: - t = tracker.Tracker([post_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) unicode_a = u'\u0107' unicode_b = u'test.\u0107om' test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 00ff68c1..51b107d0 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -54,9 +54,9 @@ def setUp(self) -> None: def test_init(self) -> None: e = Emitter('0.0.0.0') - self.assertEqual(e.endpoint, 'https://0.0.0.0/i') - self.assertEqual(e.method, 'get') - self.assertEqual(e.buffer_size, 1) + self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + self.assertEqual(e.method, 'post') + self.assertEqual(e.buffer_size, 10) self.assertEqual(e.buffer, []) self.assertIsNone(e.byte_limit) self.assertIsNone(e.bytes_queued) @@ -70,7 +70,7 @@ def test_init_buffer_size(self) -> None: self.assertEqual(e.buffer_size, 10) def test_init_post(self) -> None: - e = Emitter('0.0.0.0', method="post") + e = Emitter('0.0.0.0') self.assertEqual(e.buffer_size, DEFAULT_MAX_LENGTH) def test_init_byte_limit(self) -> None: @@ -83,19 +83,19 @@ def test_init_requests_timeout(self) -> None: def test_as_collector_uri(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0') - self.assertEqual(uri, 'https://0.0.0.0/i') - - def test_as_collector_uri_post(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', method="post") self.assertEqual(uri, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + def test_as_collector_uri_get(self) -> None: + uri = Emitter.as_collector_uri('0.0.0.0', method='get') + self.assertEqual(uri, 'https://0.0.0.0/i') + def test_as_collector_uri_port(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', port=9090, method="post") + uri = Emitter.as_collector_uri('0.0.0.0', port=9090) self.assertEqual(uri, 'https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') def test_as_collector_uri_http(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0', protocol="http") - self.assertEqual(uri, 'http://0.0.0.0/i') + self.assertEqual(uri, 'http://0.0.0.0/com.snowplowanalytics.snowplow/tp2') def test_as_collector_uri_empty_string(self) -> None: with self.assertRaises(ValueError): @@ -103,11 +103,11 @@ def test_as_collector_uri_empty_string(self) -> None: def test_as_collector_uri_endpoint_protocol(self) -> None: uri = Emitter.as_collector_uri("https://0.0.0.0") - self.assertEqual(uri, "https://0.0.0.0/i") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_endpoint_protocol_http(self) -> None: uri = Emitter.as_collector_uri("http://0.0.0.0") - self.assertEqual(uri, "http://0.0.0.0/i") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") @mock.patch('snowplow_tracker.Emitter.flush') def test_input_no_flush(self, mok_flush: Any) -> None: @@ -173,7 +173,7 @@ def test_input_bytes_queued(self, mok_flush: Any) -> None: def test_input_bytes_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="post") + e = Emitter('0.0.0.0') nvPairs = {"testString": "test", "testNum": 2.72} e.input(nvPairs) @@ -219,7 +219,7 @@ def test_attach_sent_tstamp(self) -> None: def test_flush_timer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="post", buffer_size=10) + e = Emitter('0.0.0.0', buffer_size=10) ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] for i in ev_list: e.input(i) @@ -261,7 +261,7 @@ def test_send_events_post_success(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -274,7 +274,7 @@ def test_send_events_post_failure(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -292,7 +292,7 @@ def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: @mock.patch('snowplow_tracker.emitters.requests.post') def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0') + e = Emitter('0.0.0.0', method='get') get_succeeded = e.http_get({"a": "b"}) self.assertFalse(get_succeeded) @@ -366,7 +366,7 @@ def test_async_send_events_post_success(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -379,7 +379,7 @@ def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -403,7 +403,7 @@ def test_input_unicode_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="post", buffer_size=2) + ae = AsyncEmitter('0.0.0.0', buffer_size=2) ae.input(payload) self.assertEqual(len(ae.buffer), 1) diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index f693e41a..16b89d8d 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -263,7 +263,7 @@ def track_link_click( :param element_id: ID attribute of the HTML element :type element_id: string_or_none :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\*) | None + :type element_classes: list(str) | tuple(str,\\*) | None :param element_target: ID attribute of the HTML element :type element_target: string_or_none :param element_content: The content of the HTML element @@ -416,7 +416,7 @@ def track_form_change( :param type_: Type of data the element represents :type type_: non_empty_string, form_type :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\*) | None + :type element_classes: list(str) | tuple(str,\\*) | None :param context: Custom context for the event :type context: context_array | None :param tstamp: Optional event timestamp in milliseconds @@ -456,7 +456,7 @@ def track_form_submit( :param form_id: ID attribute of the HTML form :type form_id: non_empty_string :param form_classes: Classes of the HTML form - :type form_classes: list(str) | tuple(str,\*) | None + :type form_classes: list(str) | tuple(str,\\*) | None :param elements: Classes of the HTML form :type elements: list(form_element) | None :param context: Custom context for the event @@ -611,7 +611,7 @@ def track_ecommerce_transaction( :param currency: The currency the price is expressed in :type currency: string_or_none :param items: The items in the transaction - :type items: list(dict(str:\*)) | None + :type items: list(dict(str:\\*)) | None :param context: Custom context for the event :type context: context_array | None :param tstamp: Optional event timestamp in milliseconds From 723f21f51eaa8e1b8c75fe8254a04709425a0812 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Thu, 3 Nov 2022 15:53:37 +0000 Subject: [PATCH 06/51] Prepare for release 0.12.0 --- CHANGES.txt | 8 ++++++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 7139b7c5..06ba1ae9 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,11 @@ +Version 0.12.0 (2022-11-03) +--------------------------- +Adds Domain Session ID and Domain Session Index to Subject class (#282) (Thanks to @cpnat) +Add support for Python 3.11 (#286) +Change default protocol to HTTPS in the Emitter (#14) +Change default method to POST in the Emitter (#289) +Update Docker base image (#283) (Thanks to @cpnat) + Version 0.11.0 (2022-10-06) --------------------------- Update README file (#264) diff --git a/docs/source/conf.py b/docs/source/conf.py index c1bab387..d906b8be 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = '0.11' +release = '0.12' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 6f359ceb..cbb9e7cc 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ setup( name="snowplow-tracker", - version="0.11.0", + version="0.12.0", author=authors_str, author_email=authors_email_str, packages=[ diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 43043c72..d77b9f6d 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -19,6 +19,6 @@ # License: Apache License Version 2.0 # """ -__version_info__ = (0, 11, 0) +__version_info__ = (0, 12, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From 8a6f31342a65264fda6491c41d0046d7ab1b8af3 Mon Sep 17 00:00:00 2001 From: Jack-Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Thu, 17 Nov 2022 14:35:11 +0000 Subject: [PATCH 07/51] Add a Snowplow interface (close #295) PR #301 * Add snowplow.py interface * Add emitter configuration class * Add tracker configuration class * Add get_namespace() to tracker --- snowplow_tracker/__init__.py | 3 + snowplow_tracker/contracts.py | 21 +- snowplow_tracker/emitter_configuration.py | 129 +++ snowplow_tracker/emitters.py | 287 ++++--- snowplow_tracker/payload.py | 50 +- snowplow_tracker/self_describing_json.py | 6 +- snowplow_tracker/snowplow.py | 161 ++++ snowplow_tracker/subject.py | 133 +-- snowplow_tracker/tracker.py | 949 ++++++++++++---------- snowplow_tracker/tracker_configuration.py | 66 ++ snowplow_tracker/typing.py | 27 +- 11 files changed, 1146 insertions(+), 686 deletions(-) create mode 100644 snowplow_tracker/emitter_configuration.py create mode 100644 snowplow_tracker/snowplow.py create mode 100644 snowplow_tracker/tracker_configuration.py diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index 3d618f9f..da683ada 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -3,6 +3,9 @@ from snowplow_tracker.emitters import logger, Emitter, AsyncEmitter from snowplow_tracker.self_describing_json import SelfDescribingJson from snowplow_tracker.tracker import Tracker +from snowplow_tracker.emitter_configuration import EmitterConfiguration +from snowplow_tracker.tracker_configuration import TrackerConfiguration +from snowplow_tracker.snowplow import Snowplow from snowplow_tracker.contracts import disable_contracts, enable_contracts # celery extra diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py index 2db8b449..7a9253af 100644 --- a/snowplow_tracker/contracts.py +++ b/snowplow_tracker/contracts.py @@ -45,7 +45,9 @@ def contracts_enabled() -> bool: def greater_than(value: float, compared_to: float) -> None: if contracts_enabled() and value <= compared_to: - raise ValueError("{0} must be greater than {1}.".format(_get_parameter_name(), compared_to)) + raise ValueError( + "{0} must be greater than {1}.".format(_get_parameter_name(), compared_to) + ) def non_empty(seq: Sized) -> None: @@ -78,21 +80,26 @@ def _get_parameter_name() -> str: match = _MATCH_FIRST_PARAMETER_REGEX.search(code) if not match: - return 'Unnamed parameter' + return "Unnamed parameter" return match.groups(0)[0] def _check_form_element(element: Dict[str, Any]) -> bool: """ - Helper method to check that dictionary conforms element - in sumbit_form and change_form schemas + Helper method to check that dictionary conforms element + in sumbit_form and change_form schemas """ - all_present = isinstance(element, dict) and 'name' in element and 'value' in element and 'nodeName' in element + all_present = ( + isinstance(element, dict) + and "name" in element + and "value" in element + and "nodeName" in element + ) try: - if element['type'] in FORM_TYPES: + if element["type"] in FORM_TYPES: type_valid = True else: type_valid = False except KeyError: type_valid = True - return all_present and element['nodeName'] in FORM_NODE_NAMES and type_valid + return all_present and element["nodeName"] in FORM_NODE_NAMES and type_valid diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py new file mode 100644 index 00000000..6d4ef0e6 --- /dev/null +++ b/snowplow_tracker/emitter_configuration.py @@ -0,0 +1,129 @@ +# """ +# emitter_configuration.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ + +from typing import Optional, Union, Tuple +from snowplow_tracker.typing import SuccessCallback, FailureCallback + + +class EmitterConfiguration(object): + def __init__( + self, + buffer_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + ) -> None: + """ + Configuration for the emitter that sends events to the Snowplow collector. + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type buffer_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed the number of events flushed. + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) If method is "post": The unsent data in string form; + If method is "get": An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None + """ + + self.buffer_size = buffer_size + self.on_success = on_success + self.on_failure = on_failure + self.byte_limit = byte_limit + self.request_timeout = request_timeout + + @property + def buffer_size(self) -> Optional[int]: + """ + The maximum number of queued events before the buffer is flushed. Default is 10. + """ + return self._buffer_size + + @buffer_size.setter + def buffer_size(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("buffer_size must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("buffer_size must be of type int") + self._buffer_size = value + + @property + def on_success(self) -> Optional[SuccessCallback]: + """ + Callback executed after every HTTP request in a flush has status code 200. Gets passed the number of events flushed. + """ + return self._on_success + + @on_success.setter + def on_success(self, value: Optional[SuccessCallback]): + self._on_success = value + + @property + def on_failure(self) -> Optional[FailureCallback]: + """ + Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) If method is "post": The unsent data in string form; + If method is "get": An array of dictionaries corresponding to the unsent events' payloads + """ + return self._on_failure + + @on_failure.setter + def on_failure(self, value: Optional[FailureCallback]): + self._on_failure = value + + @property + def byte_limit(self) -> Optional[int]: + """ + The size event list after reaching which queued events will be flushed + """ + return self._byte_limit + + @byte_limit.setter + def byte_limit(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("byte_limit must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("byte_limit must be of type int") + self._byte_limit = value + + @property + def request_timeout(self) -> Optional[Union[float, Tuple[float, float]]]: + """ + Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + """ + return self._request_timeout + + @request_timeout.setter + def request_timeout(self, value: Optional[Union[float, Tuple[float, float]]]): + self._request_timeout = value diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index d2549d25..5ddb67c8 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -28,7 +28,14 @@ from queue import Queue from snowplow_tracker.self_describing_json import SelfDescribingJson -from snowplow_tracker.typing import PayloadDict, PayloadDictList, HttpProtocol, Method, SuccessCallback, FailureCallback +from snowplow_tracker.typing import ( + PayloadDict, + PayloadDictList, + HttpProtocol, + Method, + SuccessCallback, + FailureCallback, +) from snowplow_tracker.contracts import one_of # logging @@ -37,54 +44,57 @@ logger.setLevel(logging.INFO) DEFAULT_MAX_LENGTH = 10 -PAYLOAD_DATA_SCHEMA = "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" +PAYLOAD_DATA_SCHEMA = ( + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" +) PROTOCOLS = {"http", "https"} METHODS = {"get", "post"} class Emitter(object): """ - Synchronously send Snowplow events to a Snowplow collector - Supports both GET and POST requests + Synchronously send Snowplow events to a Snowplow collector + Supports both GET and POST requests """ def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "https", - port: Optional[int] = None, - method: Method = "post", - buffer_size: Optional[int] = None, - on_success: Optional[SuccessCallback] = None, - on_failure: Optional[FailureCallback] = None, - byte_limit: Optional[int] = None, - request_timeout: Optional[Union[float, Tuple[float, float]]] = None) -> None: + self, + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + buffer_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + ) -> None: """ - :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. - :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to https. - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: The HTTP request method. Defaults to post. - :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None - :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. - :type on_success: function | None - :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 - Gets passed two arguments: - 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads - :type on_failure: function | None - :param byte_limit: The size event list after reaching which queued events will be flushed - :type byte_limit: int | None - :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which - applies to both "connect" AND "read" timeout, or as tuple with two float values - which specify the "connect" and "read" timeouts separately - :type request_timeout: float | tuple | None + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to https. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method. Defaults to post. + :type method: method + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type buffer_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed the number of events flushed. + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) If method is "post": The unsent data in string form; + If method is "get": An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -115,20 +125,21 @@ def __init__( @staticmethod def as_collector_uri( - endpoint: str, - protocol: HttpProtocol = "https", - port: Optional[int] = None, - method: Method = "post") -> str: + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + ) -> str: """ - :param endpoint: The raw endpoint provided by the user - :type endpoint: string - :param protocol: The protocol to use - http or https - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: Either `get` or `post` HTTP method - :type method: method - :rtype: string + :param endpoint: The raw endpoint provided by the user + :type endpoint: string + :param protocol: The protocol to use - http or https + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: Either `get` or `post` HTTP method + :type method: method + :rtype: string """ if len(endpoint) < 1: raise ValueError("No endpoint provided.") @@ -149,11 +160,11 @@ def as_collector_uri( def input(self, payload: PayloadDict) -> None: """ - Adds an event to the buffer. - If the maximum size has been reached, flushes the buffer. + Adds an event to the buffer. + If the maximum size has been reached, flushes the buffer. - :param payload: The name-value pairs for the event - :type payload: dict(string:\\*) + :param payload: The name-value pairs for the event + :type payload: dict(string:\\*) """ with self.lock: if self.bytes_queued is not None: @@ -169,18 +180,20 @@ def input(self, payload: PayloadDict) -> None: def reached_limit(self) -> bool: """ - Checks if event-size or bytes limit are reached + Checks if event-size or bytes limit are reached - :rtype: bool + :rtype: bool """ if self.byte_limit is None: return len(self.buffer) >= self.buffer_size else: - return (self.bytes_queued or 0) >= self.byte_limit or len(self.buffer) >= self.buffer_size + return (self.bytes_queued or 0) >= self.byte_limit or len( + self.buffer + ) >= self.buffer_size def flush(self) -> None: """ - Sends all events in the buffer to the collector. + Sends all events in the buffer to the collector. """ with self.lock: self.send_events(self.buffer) @@ -190,8 +203,8 @@ def flush(self) -> None: def http_post(self, data: str) -> bool: """ - :param data: The array of JSONs to be sent - :type data: string + :param data: The array of JSONs to be sent + :type data: string """ logger.info("Sending POST request to %s..." % self.endpoint) logger.debug("Payload: %s" % data) @@ -200,10 +213,13 @@ def http_post(self, data: str) -> bool: r = requests.post( self.endpoint, data=data, - headers={'Content-Type': 'application/json; charset=utf-8'}, - timeout=self.request_timeout) + headers={"Content-Type": "application/json; charset=utf-8"}, + timeout=self.request_timeout, + ) post_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if post_succeeded else "warning")("POST request finished with status code: " + str(r.status_code)) + getattr(logger, "info" if post_succeeded else "warning")( + "POST request finished with status code: " + str(r.status_code) + ) except requests.RequestException as e: logger.warning(e) @@ -211,16 +227,20 @@ def http_post(self, data: str) -> bool: def http_get(self, payload: PayloadDict) -> bool: """ - :param payload: The event properties - :type payload: dict(string:\\*) + :param payload: The event properties + :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) get_succeeded = False try: - r = requests.get(self.endpoint, params=payload, timeout=self.request_timeout) + r = requests.get( + self.endpoint, params=payload, timeout=self.request_timeout + ) get_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if get_succeeded else "warning")("GET request finished with status code: " + str(r.status_code)) + getattr(logger, "info" if get_succeeded else "warning")( + "GET request finished with status code: " + str(r.status_code) + ) except requests.RequestException as e: logger.warning(e) @@ -228,8 +248,8 @@ def http_get(self, payload: PayloadDict) -> bool: def sync_flush(self) -> None: """ - Calls the flush method of the base Emitter class. - This is guaranteed to be blocking, not asynchronous. + Calls the flush method of the base Emitter class. + This is guaranteed to be blocking, not asynchronous. """ logger.debug("Starting synchronous flush...") Emitter.flush(self) @@ -238,16 +258,16 @@ def sync_flush(self) -> None: @staticmethod def is_good_status_code(status_code: int) -> bool: """ - :param status_code: HTTP status code - :type status_code: int - :rtype: bool + :param status_code: HTTP status code + :type status_code: int + :rtype: bool """ return 200 <= status_code < 400 def send_events(self, evts: PayloadDictList) -> None: """ - :param evts: Array of events to be sent - :type evts: list(dict(string:\\*)) + :param evts: Array of events to be sent + :type evts: list(dict(string:\\*)) """ if len(evts) > 0: logger.info("Attempting to send %s events" % len(evts)) @@ -256,7 +276,7 @@ def send_events(self, evts: PayloadDictList) -> None: success_events = [] failure_events = [] - if self.method == 'post': + if self.method == "post": data = SelfDescribingJson(PAYLOAD_DATA_SCHEMA, evts).to_string() request_succeeded = self.http_post(data) if request_succeeded: @@ -264,7 +284,7 @@ def send_events(self, evts: PayloadDictList) -> None: else: failure_events += evts - elif self.method == 'get': + elif self.method == "get": for evt in evts: request_succeeded = self.http_get(evt) if request_succeeded: @@ -282,12 +302,12 @@ def send_events(self, evts: PayloadDictList) -> None: def set_flush_timer(self, timeout: float, flush_now: bool = False) -> None: """ - Set an interval at which the buffer will be flushed + Set an interval at which the buffer will be flushed - :param timeout: interval in seconds - :type timeout: int | float - :param flush_now: immediately flush buffer - :type flush_now: bool + :param timeout: interval in seconds + :type timeout: int | float + :param flush_now: immediately flush buffer + :type flush_now: bool """ # Repeatable create new timer @@ -299,7 +319,7 @@ def set_flush_timer(self, timeout: float, flush_now: bool = False) -> None: def cancel_flush_timer(self) -> None: """ - Abort automatic async flushing + Abort automatic async flushing """ if self.timer is not None: @@ -307,16 +327,17 @@ def cancel_flush_timer(self) -> None: @staticmethod def attach_sent_timestamp(events: PayloadDictList) -> None: - """ - Attach (by mutating in-place) current timestamp in milliseconds - as `stm` param + """ + Attach (by mutating in-place) current timestamp in milliseconds + as `stm` param - :param events: Array of events to be sent - :type events: list(dict(string:\\*)) - :rtype: None + :param events: Array of events to be sent + :type events: list(dict(string:\\*)) + :rtype: None """ + def update(e: PayloadDict) -> None: - e.update({'stm': str(int(time.time()) * 1000)}) + e.update({"stm": str(int(time.time()) * 1000)}) for event in events: update(event) @@ -324,46 +345,56 @@ def update(e: PayloadDict) -> None: class AsyncEmitter(Emitter): """ - Uses threads to send HTTP requests asynchronously + Uses threads to send HTTP requests asynchronously """ def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "post", - buffer_size: Optional[int] = None, - on_success: Optional[SuccessCallback] = None, - on_failure: Optional[FailureCallback] = None, - thread_count: int = 1, - byte_limit: Optional[int] = None) -> None: + self, + endpoint: str, + protocol: HttpProtocol = "http", + port: Optional[int] = None, + method: Method = "post", + buffer_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + thread_count: int = 1, + byte_limit: Optional[int] = None, + ) -> None: """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. - :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to http. - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: The HTTP request method - :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None - :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. - :type on_success: function | None - :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 - Gets passed two arguments: - 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads - :type on_failure: function | None - :param thread_count: Number of worker threads to use for HTTP requests - :type thread_count: int - :param byte_limit: The size event list after reaching which queued events will be flushed - :type byte_limit: int | None + :param endpoint: The collector URL. Don't include "http://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to http. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method + :type method: method + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type buffer_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed the number of events flushed. + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) If method is "post": The unsent data in string form; + If method is "get": An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param thread_count: Number of worker threads to use for HTTP requests + :type thread_count: int + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None """ - super(AsyncEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, on_success, on_failure, byte_limit) + super(AsyncEmitter, self).__init__( + endpoint, + protocol, + port, + method, + buffer_size, + on_success, + on_failure, + byte_limit, + ) self.queue = Queue() for i in range(thread_count): t = threading.Thread(target=self.consume) @@ -379,8 +410,8 @@ def sync_flush(self) -> None: def flush(self) -> None: """ - Removes all dead threads, then creates a new thread which - executes the flush method of the base Emitter class + Removes all dead threads, then creates a new thread which + executes the flush method of the base Emitter class """ with self.lock: self.queue.put(self.buffer) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index bb47a1d6..aacc6243 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -26,10 +26,9 @@ class Payload: - def __init__(self, dict_: Optional[PayloadDict] = None) -> None: """ - Constructor + Constructor """ self.nv_pairs = {} @@ -44,41 +43,42 @@ def __init__(self, dict_: Optional[PayloadDict] = None) -> None: def add(self, name: str, value: Any) -> None: """ - Add a name value pair to the Payload object + Add a name value pair to the Payload object """ if not (value == "" or value is None): self.nv_pairs[name] = value def add_dict(self, dict_: PayloadDict, base64: bool = False) -> None: """ - Add a dict of name value pairs to the Payload object + Add a dict of name value pairs to the Payload object - :param dict_: Dictionary to be added to the Payload - :type dict_: dict(string:\\*) + :param dict_: Dictionary to be added to the Payload + :type dict_: dict(string:\\*) """ for f in dict_: self.add(f, dict_[f]) def add_json( - self, - dict_: Optional[PayloadDict], - encode_base64: bool, - type_when_encoded: str, - type_when_not_encoded: str, - json_encoder: Optional[JsonEncoderFunction] = None) -> None: + self, + dict_: Optional[PayloadDict], + encode_base64: bool, + type_when_encoded: str, + type_when_not_encoded: str, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: """ - Add an encoded or unencoded JSON to the payload - - :param dict_: Custom context for the event - :type dict_: dict(string:\\*) | None - :param encode_base64: If the payload is base64 encoded - :type encode_base64: bool - :param type_when_encoded: Name of the field when encode_base64 is set - :type type_when_encoded: string - :param type_when_not_encoded: Name of the field when encode_base64 is not set - :type type_when_not_encoded: string - :param json_encoder: Custom JSON serializer that gets called on non-serializable object - :type json_encoder: function | None + Add an encoded or unencoded JSON to the payload + + :param dict_: Custom context for the event + :type dict_: dict(string:\\*) | None + :param encode_base64: If the payload is base64 encoded + :type encode_base64: bool + :param type_when_encoded: Name of the field when encode_base64 is set + :type type_when_encoded: string + :param type_when_not_encoded: Name of the field when encode_base64 is not set + :type type_when_not_encoded: string + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ if dict_ is not None and dict_ != {}: @@ -96,6 +96,6 @@ def add_json( def get(self) -> PayloadDict: """ - Returns the context dictionary from the Payload object + Returns the context dictionary from the Payload object """ return self.nv_pairs diff --git a/snowplow_tracker/self_describing_json.py b/snowplow_tracker/self_describing_json.py index 84b49c94..538408ca 100644 --- a/snowplow_tracker/self_describing_json.py +++ b/snowplow_tracker/self_describing_json.py @@ -26,16 +26,12 @@ class SelfDescribingJson(object): - def __init__(self, schema: str, data: Union[PayloadDict, PayloadDictList]) -> None: self.schema = schema self.data = data def to_json(self) -> PayloadDict: - return { - "schema": self.schema, - "data": self.data - } + return {"schema": self.schema, "data": self.data} def to_string(self) -> str: return json.dumps(self.to_json()) diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py new file mode 100644 index 00000000..7b08c071 --- /dev/null +++ b/snowplow_tracker/snowplow.py @@ -0,0 +1,161 @@ +# """ +# snowplow.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ +import logging +from typing import Optional +from snowplow_tracker import ( + Tracker, + Emitter, + subject, + EmitterConfiguration, + TrackerConfiguration, +) +from snowplow_tracker.typing import Method + +# Logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +""" +Snowplow Class +""" + + +class Snowplow: + _trackers = {} + + @staticmethod + def create_tracker( + namespace: str, + endpoint: str, + method: Method = "post", + app_id: Optional[str] = None, + subject: Optional[subject.Subject] = None, + tracker_config: TrackerConfiguration = TrackerConfiguration(), + emitter_config: EmitterConfiguration = EmitterConfiguration(), + ) -> Tracker: + """ + Create a Snowplow tracker with a namespace and collector URL + + :param namespace: Name of the tracker + :type namespace: String + :param endpoint: The collector URL + :type endpoint: String + :param method: The HTTP request method. Defaults to post. + :type method: method + :param appId: Application ID + :type appId: String | None + :param subject: Subject to be tracked + :type subject: Subject | None + :param tracker_config: Tracker configuration + :type tracker_config: TrackerConfiguration + :param emitter_config: Emitter configuration + :type emitter_config: EmitterConfiguration + :rtype Tracker + """ + if endpoint is None: + raise TypeError("Emitter or Collector URL must be provided") + + emitter = Emitter( + endpoint, + method=method, + buffer_size=emitter_config.buffer_size, + on_success=emitter_config.on_success, + on_failure=emitter_config.on_failure, + byte_limit=emitter_config.byte_limit, + request_timeout=emitter_config.request_timeout, + ) + + tracker = Tracker( + emitter, + namespace=namespace, + app_id=app_id, + subject=subject, + encode_base64=tracker_config.encode_base64, + json_encoder=tracker_config.json_encoder, + ) + + return Snowplow.add_tracker(tracker) + + @classmethod + def add_tracker(cls, tracker: Tracker) -> Tracker: + """ + Add a Snowplow tracker to the Snowplow object + + :param tracker: Tracker object to add to Snowplow + :type tracker: Tracker + :rtype Tracker + """ + if not isinstance(tracker, Tracker): + logger.info("Tracker not provided.") + return None + + namespace = tracker.get_namespace() + + if namespace in cls._trackers.keys(): + raise TypeError("Tracker with this namespace already exists") + + cls._trackers[namespace] = tracker + logger.info("Tracker with namespace: '" + namespace + "' added to Snowplow") + return cls._trackers[namespace] + + @classmethod + def remove_tracker(cls, tracker: Tracker): + """ + Remove a Snowplow tracker from the Snowplow object if it exists + + :param tracker: Tracker object to remove from Snowplow + :type tracker: Tracker | None + """ + namespace = tracker.get_namespace() + cls.remove_tracker_by_namespace(namespace) + + @classmethod + def remove_tracker_by_namespace(cls, namespace: str): + """ + Remove a Snowplow tracker from the Snowplow object using it's namespace if it exists + + :param namespace: Tracker namespace to remove from Snowplow + :type tracker: String | None + """ + if not cls._trackers.pop(namespace, False): + logger.info("Tracker with namespace: '" + namespace + "' does not exist") + return + logger.info("Tracker with namespace: '" + namespace + "' removed from Snowplow") + + @classmethod + def reset(cls): + """ + Remove all active Snowplow trackers from the Snowplow object + """ + cls._trackers = {} + + @classmethod + def get_tracker(cls, namespace: str) -> Tracker: + """ + Returns a Snowplow tracker from the Snowplow object if it exists + :param namespace: Snowplow tracker namespace + :type namespace: string + :rtype: Tracker + """ + if namespace in cls._trackers.keys(): + return cls._trackers[namespace] + return None diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index 3b4fb82c..b5f3de83 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -27,41 +27,42 @@ class Subject(object): """ - Class for an event subject, where we view events as of the form + Class for an event subject, where we view events as of the form - (Subject) -> (Verb) -> (Object) + (Subject) -> (Verb) -> (Object) """ + def __init__(self) -> None: self.standard_nv_pairs = {"p": DEFAULT_PLATFORM} - def set_platform(self, value: SupportedPlatform) -> 'Subject': + def set_platform(self, value: SupportedPlatform) -> "Subject": """ - :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] - :type value: supported_platform - :rtype: subject + :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] + :type value: supported_platform + :rtype: subject """ one_of(value, SUPPORTED_PLATFORMS) self.standard_nv_pairs["p"] = value return self - def set_user_id(self, user_id: str) -> 'Subject': + def set_user_id(self, user_id: str) -> "Subject": """ - :param user_id: User ID - :type user_id: string - :rtype: subject + :param user_id: User ID + :type user_id: string + :rtype: subject """ self.standard_nv_pairs["uid"] = user_id return self - def set_screen_resolution(self, width: int, height: int) -> 'Subject': + def set_screen_resolution(self, width: int, height: int) -> "Subject": """ - :param width: Width of the screen - :param height: Height of the screen - :type width: int,>0 - :type height: int,>0 - :rtype: subject + :param width: Width of the screen + :param height: Height of the screen + :type width: int,>0 + :type height: int,>0 + :rtype: subject """ greater_than(width, 0) greater_than(height, 0) @@ -69,13 +70,13 @@ def set_screen_resolution(self, width: int, height: int) -> 'Subject': self.standard_nv_pairs["res"] = "".join([str(width), "x", str(height)]) return self - def set_viewport(self, width: int, height: int) -> 'Subject': + def set_viewport(self, width: int, height: int) -> "Subject": """ - :param width: Width of the viewport - :param height: Height of the viewport - :type width: int,>0 - :type height: int,>0 - :rtype: subject + :param width: Width of the viewport + :param height: Height of the viewport + :type width: int,>0 + :type height: int,>0 + :rtype: subject """ greater_than(width, 0) greater_than(height, 0) @@ -83,96 +84,96 @@ def set_viewport(self, width: int, height: int) -> 'Subject': self.standard_nv_pairs["vp"] = "".join([str(width), "x", str(height)]) return self - def set_color_depth(self, depth: int) -> 'Subject': + def set_color_depth(self, depth: int) -> "Subject": """ - :param depth: Depth of the color on the screen - :type depth: int - :rtype: subject + :param depth: Depth of the color on the screen + :type depth: int + :rtype: subject """ self.standard_nv_pairs["cd"] = depth return self - def set_timezone(self, timezone: str) -> 'Subject': + def set_timezone(self, timezone: str) -> "Subject": """ - :param timezone: Timezone as a string - :type timezone: string - :rtype: subject + :param timezone: Timezone as a string + :type timezone: string + :rtype: subject """ self.standard_nv_pairs["tz"] = timezone return self - def set_lang(self, lang: str) -> 'Subject': + def set_lang(self, lang: str) -> "Subject": """ - Set language. + Set language. - :param lang: Language the application is set to - :type lang: string - :rtype: subject + :param lang: Language the application is set to + :type lang: string + :rtype: subject """ self.standard_nv_pairs["lang"] = lang return self - def set_domain_user_id(self, duid: str) -> 'Subject': + def set_domain_user_id(self, duid: str) -> "Subject": """ - Set the domain user ID + Set the domain user ID - :param duid: Domain user ID - :type duid: string - :rtype: subject + :param duid: Domain user ID + :type duid: string + :rtype: subject """ self.standard_nv_pairs["duid"] = duid return self - def set_domain_session_id(self, sid: str) -> 'Subject': + def set_domain_session_id(self, sid: str) -> "Subject": """ - Set the domain session ID - :param sid: Domain session ID - :type sid: string - :rtype: subject + Set the domain session ID + :param sid: Domain session ID + :type sid: string + :rtype: subject """ self.standard_nv_pairs["sid"] = sid return self - def set_domain_session_index(self, vid: int) -> 'Subject': + def set_domain_session_index(self, vid: int) -> "Subject": """ - Set the domain session Index - :param vid: Domain session Index - :type vid: int - :rtype: subject + Set the domain session Index + :param vid: Domain session Index + :type vid: int + :rtype: subject """ self.standard_nv_pairs["vid"] = vid return self - def set_ip_address(self, ip: str) -> 'Subject': + def set_ip_address(self, ip: str) -> "Subject": """ - Set the domain user ID + Set the domain user ID - :param ip: IP address - :type ip: string - :rtype: subject + :param ip: IP address + :type ip: string + :rtype: subject """ self.standard_nv_pairs["ip"] = ip return self - def set_useragent(self, ua: str) -> 'Subject': + def set_useragent(self, ua: str) -> "Subject": """ - Set the user agent + Set the user agent - :param ua: User agent - :type ua: string - :rtype: subject + :param ua: User agent + :type ua: string + :rtype: subject """ self.standard_nv_pairs["ua"] = ua return self - def set_network_user_id(self, nuid: str) -> 'Subject': + def set_network_user_id(self, nuid: str) -> "Subject": """ - Set the network user ID field - This overwrites the nuid field set by the collector + Set the network user ID field + This overwrites the nuid field set by the collector - :param nuid: Network user ID - :type nuid: string - :rtype: subject + :param nuid: Network user ID + :type nuid: string + :rtype: subject """ self.standard_nv_pairs["tnuid"] = nuid return self diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 16b89d8d..4c988fc8 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -26,8 +26,15 @@ from snowplow_tracker import payload, _version, SelfDescribingJson from snowplow_tracker import subject as _subject from snowplow_tracker.contracts import non_empty_string, one_of, non_empty, form_element -from snowplow_tracker.typing import JsonEncoderFunction, EmitterProtocol,\ - FORM_NODE_NAMES, FORM_TYPES, FormNodeName, ElementClasses, FormClasses +from snowplow_tracker.typing import ( + JsonEncoderFunction, + EmitterProtocol, + FORM_NODE_NAMES, + FORM_TYPES, + FormNodeName, + ElementClasses, + FormClasses, +) """ Constants & config @@ -47,28 +54,28 @@ class Tracker: - def __init__( - self, - emitters: Union[List[EmitterProtocol], EmitterProtocol], - subject: Optional[_subject.Subject] = None, - namespace: Optional[str] = None, - app_id: Optional[str] = None, - encode_base64: bool = DEFAULT_ENCODE_BASE64, - json_encoder: Optional[JsonEncoderFunction] = None) -> None: - """ - :param emitters: Emitters to which events will be sent - :type emitters: list[>0](emitter) | emitter - :param subject: Subject to be tracked - :type subject: subject | None - :param namespace: Identifier for the Tracker instance - :type namespace: string_or_none - :param app_id: Application ID - :type app_id: string_or_none - :param encode_base64: Whether JSONs in the payload should be base-64 encoded - :type encode_base64: bool - :param json_encoder: Custom JSON serializer that gets called on non-serializable object - :type json_encoder: function | None + self, + emitters: Union[List[EmitterProtocol], EmitterProtocol], + subject: Optional[_subject.Subject] = None, + namespace: Optional[str] = None, + app_id: Optional[str] = None, + encode_base64: bool = DEFAULT_ENCODE_BASE64, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + :param emitters: Emitters to which events will be sent + :type emitters: list[>0](emitter) | emitter + :param subject: Subject to be tracked + :type subject: subject | None + :param namespace: Identifier for the Tracker instance + :type namespace: string_or_none + :param app_id: Application ID + :type app_id: string_or_none + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ if subject is None: subject = _subject.Subject() @@ -83,31 +90,33 @@ def __init__( self.encode_base64 = encode_base64 self.json_encoder = json_encoder - self.standard_nv_pairs = { - "tv": VERSION, - "tna": namespace, - "aid": app_id - } + self.standard_nv_pairs = {"tv": VERSION, "tna": namespace, "aid": app_id} self.timer = None @staticmethod def get_uuid() -> str: """ - Set transaction ID for the payload once during the lifetime of the - event. + Set transaction ID for the payload once during the lifetime of the + event. - :rtype: string + :rtype: string """ return str(uuid.uuid4()) @staticmethod def get_timestamp(tstamp: Optional[float] = None) -> int: """ - :param tstamp: User-input timestamp or None - :type tstamp: int | float | None - :rtype: int - """ - if isinstance(tstamp, (int, float, )): + :param tstamp: User-input timestamp or None + :type tstamp: int | float | None + :rtype: int + """ + if isinstance( + tstamp, + ( + int, + float, + ), + ): return int(tstamp) return int(time.time() * 1000) @@ -115,37 +124,38 @@ def get_timestamp(tstamp: Optional[float] = None) -> int: Tracking methods """ - def track(self, pb: payload.Payload) -> 'Tracker': + def track(self, pb: payload.Payload) -> "Tracker": """ - Send the payload to a emitter + Send the payload to a emitter - :param pb: Payload builder - :type pb: payload - :rtype: tracker + :param pb: Payload builder + :type pb: payload + :rtype: tracker """ for emitter in self.emitters: emitter.input(pb.nv_pairs) return self def complete_payload( - self, - pb: payload.Payload, - context: Optional[List[SelfDescribingJson]], - tstamp: Optional[float], - event_subject: Optional[_subject.Subject]) -> 'Tracker': - """ - Called by all tracking events to add the standard name-value pairs - to the Payload object irrespective of the tracked event. - - :param pb: Payload builder - :type pb: payload - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + pb: payload.Payload, + context: Optional[List[SelfDescribingJson]], + tstamp: Optional[float], + event_subject: Optional[_subject.Subject], + ) -> "Tracker": + """ + Called by all tracking events to add the standard name-value pairs + to the Payload object irrespective of the tracked event. + + :param pb: Payload builder + :type pb: payload + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ pb.add("eid", Tracker.get_uuid()) @@ -155,8 +165,12 @@ def complete_payload( if context is not None: context_jsons = list(map(lambda c: c.to_json(), context)) - context_envelope = SelfDescribingJson(CONTEXT_SCHEMA, context_jsons).to_json() - pb.add_json(context_envelope, self.encode_base64, "cx", "co", self.json_encoder) + context_envelope = SelfDescribingJson( + CONTEXT_SCHEMA, context_jsons + ).to_json() + pb.add_json( + context_envelope, self.encode_base64, "cx", "co", self.json_encoder + ) pb.add_dict(self.standard_nv_pairs) @@ -166,32 +180,33 @@ def complete_payload( return self.track(pb) def track_page_view( - self, - page_url: str, - page_title: Optional[str] = None, - referrer: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(page_url) pb = payload.Payload() - pb.add("e", "pv") # pv: page view + pb.add("e", "pv") # pv: page view pb.add("url", page_url) pb.add("page", page_title) pb.add("refr", referrer) @@ -199,44 +214,45 @@ def track_page_view( return self.complete_payload(pb, context, tstamp, event_subject) def track_page_ping( - self, - page_url: str, - page_title: Optional[str] = None, - referrer: Optional[str] = None, - min_x: Optional[int] = None, - max_x: Optional[int] = None, - min_y: Optional[int] = None, - max_y: Optional[int] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - :param min_x: Minimum page x offset seen in the last ping period - :type min_x: int | None - :param max_x: Maximum page x offset seen in the last ping period - :type max_x: int | None - :param min_y: Minimum page y offset seen in the last ping period - :type min_y: int | None - :param max_y: Maximum page y offset seen in the last ping period - :type max_y: int | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(page_url) pb = payload.Payload() - pb.add("e", "pp") # pp: page ping + pb.add("e", "pp") # pp: page ping pb.add("url", page_url) pb.add("page", page_title) pb.add("refr", referrer) @@ -248,33 +264,34 @@ def track_page_ping( return self.complete_payload(pb, context, tstamp, event_subject) def track_link_click( - self, - target_url: str, - element_id: Optional[str] = None, - element_classes: Optional[ElementClasses] = None, - element_target: Optional[str] = None, - element_content: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param target_url: Target URL of the link - :type target_url: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\\*) | None - :param element_target: ID attribute of the HTML element - :type element_target: string_or_none - :param element_content: The content of the HTML element - :type element_content: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + target_url: str, + element_id: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + element_target: Optional[str] = None, + element_content: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param target_url: Target URL of the link + :type target_url: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param element_target: ID attribute of the HTML element + :type element_target: string_or_none + :param element_content: The content of the HTML element + :type element_content: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(target_url) @@ -289,41 +306,44 @@ def track_link_click( if element_content is not None: properties["elementContent"] = element_content - event_json = SelfDescribingJson("%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_add_to_cart( - self, - sku: str, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - unit_price: Optional[float] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param sku: Item SKU or ID - :type sku: non_empty_string - :param quantity: Number added to cart - :type quantity: int - :param name: Item's name - :type name: string_or_none - :param category: Item's category - :type category: string_or_none - :param unit_price: Item's price - :type unit_price: int | float | None - :param currency: Type of currency the price is in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(sku) @@ -339,41 +359,44 @@ def track_add_to_cart( if currency is not None: properties["currency"] = currency - event_json = SelfDescribingJson("%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_remove_from_cart( - self, - sku: str, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - unit_price: Optional[float] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param sku: Item SKU or ID - :type sku: non_empty_string - :param quantity: Number added to cart - :type quantity: int - :param name: Item's name - :type name: string_or_none - :param category: Item's category - :type category: string_or_none - :param unit_price: Item's price - :type unit_price: int | float | None - :param currency: Type of currency the price is in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(sku) @@ -389,41 +412,44 @@ def track_remove_from_cart( if currency is not None: properties["currency"] = currency - event_json = SelfDescribingJson("%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_form_change( - self, - form_id: str, - element_id: Optional[str], - node_name: FormNodeName, - value: Optional[str], - type_: Optional[str] = None, - element_classes: Optional[ElementClasses] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param node_name: Type of input element - :type node_name: form_node_name - :param value: Value of the input element - :type value: string_or_none - :param type_: Type of data the element represents - :type type_: non_empty_string, form_type - :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\\*) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + form_id: str, + element_id: Optional[str], + node_name: FormNodeName, + value: Optional[str], + type_: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param node_name: Type of input element + :type node_name: form_node_name + :param value: Value of the input element + :type value: string_or_none + :param type_: Type of data the element represents + :type type_: non_empty_string, form_type + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(form_id) one_of(node_name, FORM_NODE_NAMES) @@ -440,73 +466,79 @@ def track_form_change( if element_classes is not None: properties["elementClasses"] = element_classes - event_json = SelfDescribingJson("%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_form_submit( - self, - form_id: str, - form_classes: Optional[FormClasses] = None, - elements: Optional[List[Dict[str, Any]]] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param form_classes: Classes of the HTML form - :type form_classes: list(str) | tuple(str,\\*) | None - :param elements: Classes of the HTML form - :type elements: list(form_element) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + form_id: str, + form_classes: Optional[FormClasses] = None, + elements: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param form_classes: Classes of the HTML form + :type form_classes: list(str) | tuple(str,\\*) | None + :param elements: Classes of the HTML form + :type elements: list(form_element) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(form_id) for element in elements or []: form_element(element) properties = dict() - properties['formId'] = form_id + properties["formId"] = form_id if form_classes is not None: - properties['formClasses'] = form_classes + properties["formClasses"] = form_classes if elements is not None and len(elements) > 0: - properties['elements'] = elements + properties["elements"] = elements - event_json = SelfDescribingJson("%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_site_search( - self, - terms: Sequence[str], - filters: Optional[Dict[str, Union[str, bool]]] = None, - total_results: Optional[int] = None, - page_results: Optional[int] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param terms: Search terms - :type terms: seq[>=1](str) - :param filters: Filters applied to the search - :type filters: dict(str:str|bool) | None - :param total_results: Total number of results returned - :type total_results: int | None - :param page_results: Total number of pages of results - :type page_results: int | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + terms: Sequence[str], + filters: Optional[Dict[str, Union[str, bool]]] = None, + total_results: Optional[int] = None, + page_results: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param terms: Search terms + :type terms: seq[>=1](str) + :param filters: Filters applied to the search + :type filters: dict(str:str|bool) | None + :param total_results: Total number of results returned + :type total_results: int | None + :param page_results: Total number of pages of results + :type page_results: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty(terms) @@ -519,47 +551,50 @@ def track_site_search( if page_results is not None: properties["pageResults"] = page_results - event_json = SelfDescribingJson("%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_ecommerce_transaction_item( - self, - order_id: str, - sku: str, - price: float, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - This is an internal method called by track_ecommerce_transaction. - It is not for public use. - - :param order_id: Order ID - :type order_id: non_empty_string - :param sku: Item SKU - :type sku: non_empty_string - :param price: Item price - :type price: int | float - :param quantity: Item quantity - :type quantity: int - :param name: Item name - :type name: string_or_none - :param category: Item category - :type category: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + order_id: str, + sku: str, + price: float, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + This is an internal method called by track_ecommerce_transaction. + It is not for public use. + + :param order_id: Order ID + :type order_id: non_empty_string + :param sku: Item SKU + :type sku: non_empty_string + :param price: Item price + :type price: int | float + :param quantity: Item quantity + :type quantity: int + :param name: Item name + :type name: string_or_none + :param category: Item category + :type category: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(order_id) non_empty_string(sku) @@ -577,48 +612,49 @@ def track_ecommerce_transaction_item( return self.complete_payload(pb, context, tstamp, event_subject) def track_ecommerce_transaction( - self, - order_id: str, - total_value: float, - affiliation: Optional[str] = None, - tax_value: Optional[float] = None, - shipping: Optional[float] = None, - city: Optional[str] = None, - state: Optional[str] = None, - country: Optional[str] = None, - currency: Optional[str] = None, - items: Optional[List[Dict[str, Any]]] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param order_id: ID of the eCommerce transaction - :type order_id: non_empty_string - :param total_value: Total transaction value - :type total_value: int | float - :param affiliation: Transaction affiliation - :type affiliation: string_or_none - :param tax_value: Transaction tax value - :type tax_value: int | float | None - :param shipping: Delivery cost charged - :type shipping: int | float | None - :param city: Delivery address city - :type city: string_or_none - :param state: Delivery address state - :type state: string_or_none - :param country: Delivery address country - :type country: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param items: The items in the transaction - :type items: list(dict(str:\\*)) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + order_id: str, + total_value: float, + affiliation: Optional[str] = None, + tax_value: Optional[float] = None, + shipping: Optional[float] = None, + city: Optional[str] = None, + state: Optional[str] = None, + country: Optional[str] = None, + currency: Optional[str] = None, + items: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param order_id: ID of the eCommerce transaction + :type order_id: non_empty_string + :param total_value: Total transaction value + :type total_value: int | float + :param affiliation: Transaction affiliation + :type affiliation: string_or_none + :param tax_value: Transaction tax value + :type tax_value: int | float | None + :param shipping: Delivery cost charged + :type shipping: int | float | None + :param city: Delivery address city + :type city: string_or_none + :param state: Delivery address state + :type state: string_or_none + :param country: Delivery address country + :type country: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param items: The items in the transaction + :type items: list(dict(str:\\*)) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(order_id) @@ -650,24 +686,25 @@ def track_ecommerce_transaction( return self def track_screen_view( - self, - name: Optional[str] = None, - id_: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param name: The name of the screen view event - :type name: string_or_none - :param id_: Screen view ID - :type id_: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + name: Optional[str] = None, + id_: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param name: The name of the screen view event + :type name: string_or_none + :param id_: Screen view ID + :type id_: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ screen_view_properties = {} if name is not None: @@ -675,40 +712,44 @@ def track_screen_view( if id_ is not None: screen_view_properties["id"] = id_ - event_json = SelfDescribingJson("%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), screen_view_properties) + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), + screen_view_properties, + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_struct_event( - self, - category: str, - action: str, - label: Optional[str] = None, - property_: Optional[str] = None, - value: Optional[float] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param category: Category of the event - :type category: non_empty_string - :param action: The event itself - :type action: non_empty_string - :param label: Refer to the object the action is - performed on - :type label: string_or_none - :param property_: Property associated with either the action - or the object - :type property_: string_or_none - :param value: A value associated with the user action - :type value: int | float | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[float] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(category) non_empty_string(action) @@ -724,26 +765,29 @@ def track_struct_event( return self.complete_payload(pb, context, tstamp, event_subject) def track_unstruct_event( - self, - event_json: SelfDescribingJson, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param event_json: The properties of the event. Has two field: - A "data" field containing the event properties and - A "schema" field identifying the schema against which the data is validated - :type event_json: self_describing_json - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker - """ - - envelope = SelfDescribingJson(UNSTRUCT_EVENT_SCHEMA, event_json.to_json()).to_json() + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker + """ + + envelope = SelfDescribingJson( + UNSTRUCT_EVENT_SCHEMA, event_json.to_json() + ).to_json() pb = payload.Payload() @@ -755,41 +799,44 @@ def track_unstruct_event( # Alias track_self_describing_event = track_unstruct_event - def flush(self, is_async: bool = False) -> 'Tracker': + def flush(self, is_async: bool = False) -> "Tracker": """ - Flush the emitter + Flush the emitter - :param is_async: Whether the flush is done asynchronously. Default is False - :type is_async: bool - :rtype: tracker + :param is_async: Whether the flush is done asynchronously. Default is False + :type is_async: bool + :rtype: tracker """ for emitter in self.emitters: if is_async: - if hasattr(emitter, 'flush'): + if hasattr(emitter, "flush"): emitter.flush() else: - if hasattr(emitter, 'sync_flush'): + if hasattr(emitter, "sync_flush"): emitter.sync_flush() return self - def set_subject(self, subject: Optional[_subject.Subject]) -> 'Tracker': + def set_subject(self, subject: Optional[_subject.Subject]) -> "Tracker": """ - Set the subject of the events fired by the tracker + Set the subject of the events fired by the tracker - :param subject: Subject to be tracked - :type subject: subject | None - :rtype: tracker + :param subject: Subject to be tracked + :type subject: subject | None + :rtype: tracker """ self.subject = subject return self - def add_emitter(self, emitter: EmitterProtocol) -> 'Tracker': + def add_emitter(self, emitter: EmitterProtocol) -> "Tracker": """ - Add a new emitter to which events should be passed + Add a new emitter to which events should be passed - :param emitter: New emitter - :type emitter: emitter - :rtype: tracker + :param emitter: New emitter + :type emitter: emitter + :rtype: tracker """ self.emitters.append(emitter) return self + + def get_namespace(self): + return self.standard_nv_pairs["tna"] diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py new file mode 100644 index 00000000..9c717721 --- /dev/null +++ b/snowplow_tracker/tracker_configuration.py @@ -0,0 +1,66 @@ +# """ +# tracker_configuration.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ + +from typing import Optional +from snowplow_tracker.typing import JsonEncoderFunction + + +class TrackerConfiguration(object): + def __init__( + self, + encode_base64: Optional[bool] = None, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + Configuration for additional tracker configuration options. + :param encode_base64: Whether JSONs in the payload should be base-64 encoded. Default is True. + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object. + :type json_encoder: function | None + """ + + self.encode_base64 = encode_base64 + self.json_encoder = json_encoder + + @property + def encode_base64(self) -> Optional[bool]: + """ + Whether JSONs in the payload should be base-64 encoded. Default is True. + """ + return self._encode_base64 + + @encode_base64.setter + def encode_base64(self, value: Optional[bool]): + if isinstance(value, bool) or value is None: + self._encode_base64 = value + else: + raise ValueError("encode_base64 must be True or False") + + @property + def json_encoder(self) -> Optional[JsonEncoderFunction]: + """ + Custom JSON serializer that gets called on non-serializable object. + """ + return self._json_encoder + + @json_encoder.setter + def json_encoder(self, value: Optional[JsonEncoderFunction]): + self._json_encoder = value diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py index 7800a657..db27a671 100644 --- a/snowplow_tracker/typing.py +++ b/snowplow_tracker/typing.py @@ -29,10 +29,29 @@ # tracker FORM_NODE_NAMES = {"INPUT", "TEXTAREA", "SELECT"} FORM_TYPES = { - "button", "checkbox", "color", "date", "datetime", - "datetime-local", "email", "file", "hidden", "image", "month", - "number", "password", "radio", "range", "reset", "search", - "submit", "tel", "text", "time", "url", "week" + "button", + "checkbox", + "color", + "date", + "datetime", + "datetime-local", + "email", + "file", + "hidden", + "image", + "month", + "number", + "password", + "radio", + "range", + "reset", + "search", + "submit", + "tel", + "text", + "time", + "url", + "week", } FormNodeName = Literal["INPUT", "TEXTAREA", "SELECT"] ElementClasses = Union[List[str], Tuple[str, Any]] From 578153b8e8fe88bb68a078a9fb614dbdcc9944f5 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Fri, 18 Nov 2022 10:17:32 +0000 Subject: [PATCH 08/51] Add example app with Snowplow interface (close #302) PR #304 * Add example app with Snowplow interface --- examples/snowplow_app.py | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 examples/snowplow_app.py diff --git a/examples/snowplow_app.py b/examples/snowplow_app.py new file mode 100644 index 00000000..39484450 --- /dev/null +++ b/examples/snowplow_app.py @@ -0,0 +1,58 @@ +import sys +from snowplow_tracker import ( + Snowplow, + EmitterConfiguration, + Subject, + TrackerConfiguration, + SelfDescribingJson, +) + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +def main(): + + collector_url = get_url_from_args() + # Configure Emitter + emitter_config = EmitterConfiguration(buffer_size=5) + + # Configure Tracker + tracker_config = TrackerConfiguration(encode_base64=True) + + # Initialise subject + subject = Subject() + subject.set_user_id("uid") + + Snowplow.create_tracker( + namespace="ns", + endpoint=collector_url, + app_id="app1", + subject=subject, + tracker_config=tracker_config, + emitter_config=emitter_config, + ) + + tracker = Snowplow.get_tracker("ns") + + tracker.track_page_view("https://www.snowplow.io", "Homepage") + tracker.track_page_ping("https://www.snowplow.io", "Homepage") + tracker.track_link_click("https://www.snowplow.io/about") + tracker.track_page_view("https://www.snowplow.io/about", "About") + + tracker.track_self_describing_event( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "example.com"}, + ) + ) + tracker.track_struct_event("shop", "add-to-basket", None, "pcs", 2) + + tracker.flush() + + +if __name__ == "__main__": + main() From 71d2bfdc1592e44a8fb56a27658e348c28219a8e Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 29 Nov 2022 15:10:52 +0000 Subject: [PATCH 09/51] Fix Collector URL with trailing '/' (close #300) PR #305 * Strip trailing slash character from endpoint --- snowplow_tracker/emitters.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 5ddb67c8..3d71b767 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -144,6 +144,8 @@ def as_collector_uri( if len(endpoint) < 1: raise ValueError("No endpoint provided.") + endpoint = endpoint.rstrip('/') + if endpoint.split("://")[0] in PROTOCOLS: endpoint_arr = endpoint.split("://") protocol = endpoint_arr[0] From d943ebcd162791510248ca99679a786f198b35ba Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 13 Dec 2022 10:53:30 +0000 Subject: [PATCH 10/51] Upgrade set-output in cd (close #294) PR #299 * Update deprecated set-output command --- .github/workflows/cd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 10b24303..c028f253 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -21,8 +21,8 @@ jobs: - name: Get tag and tracker versions id: version run: | - echo ::set-output name=TAG_VERSION::${GITHUB_REF#refs/*/} - echo "##[set-output name=PYTHON_TRACKER_VERSION;]$(python setup.py --version)" + echo "TAG_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT + echo "PYTHON_TRACKER_VERSION=$(python setup.py --version)" >> $GITHUB_OUTPUT - name: Fail if version mismatch if: ${{ steps.version.outputs.TAG_VERSION != steps.version.outputs.PYTHON_TRACKER_VERSION }} From 39fd50a3aff98a5efdd5c5c7fb5518fe4761305b Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Fri, 6 Jan 2023 15:31:53 +0000 Subject: [PATCH 11/51] Add Retry for failed events (close #296) PR #306 * Make http requests return status code * Add retry functionality * Add failure retry tests * Add buffer capacity * Rename buffer_size to batch_size * Encapsulate timers in an FlushTimer class --- examples/snowplow_app.py | 2 +- snowplow_tracker/celery/celery_emitter.py | 4 +- snowplow_tracker/emitter_configuration.py | 40 +++- snowplow_tracker/emitters.py | 218 ++++++++++++++---- snowplow_tracker/snowplow.py | 2 +- .../test/integration/test_integration.py | 8 +- snowplow_tracker/test/unit/test_emitters.py | 141 ++++++++--- 7 files changed, 312 insertions(+), 103 deletions(-) diff --git a/examples/snowplow_app.py b/examples/snowplow_app.py index 39484450..7ce2c1d8 100644 --- a/examples/snowplow_app.py +++ b/examples/snowplow_app.py @@ -18,7 +18,7 @@ def main(): collector_url = get_url_from_args() # Configure Emitter - emitter_config = EmitterConfiguration(buffer_size=5) + emitter_config = EmitterConfiguration(batch_size=5) # Configure Tracker tracker_config = TrackerConfiguration(encode_base64=True) diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py index d9aafaa7..682ee24b 100644 --- a/snowplow_tracker/celery/celery_emitter.py +++ b/snowplow_tracker/celery/celery_emitter.py @@ -53,9 +53,9 @@ def __init__( protocol: HttpProtocol = "http", port: Optional[int] = None, method: Method = "post", - buffer_size: Optional[int] = None, + batch_size: Optional[int] = None, byte_limit: Optional[int] = None) -> None: - super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, None, None, byte_limit) + super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, batch_size, None, None, byte_limit) try: # Check whether a custom Celery configuration module named "snowplow_celery_config" exists diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 6d4ef0e6..c38823a1 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -26,16 +26,17 @@ class EmitterConfiguration(object): def __init__( self, - buffer_size: Optional[int] = None, + batch_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + buffer_capacity: Optional[int] = None ) -> None: """ Configuration for the emitter that sends events to the Snowplow collector. - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. :type on_success: function | None @@ -53,26 +54,27 @@ def __init__( :type request_timeout: float | tuple | None """ - self.buffer_size = buffer_size + self.batch_size = batch_size self.on_success = on_success self.on_failure = on_failure self.byte_limit = byte_limit self.request_timeout = request_timeout + self.buffer_capacity = buffer_capacity @property - def buffer_size(self) -> Optional[int]: + def batch_size(self) -> Optional[int]: """ The maximum number of queued events before the buffer is flushed. Default is 10. """ - return self._buffer_size + return self._batch_size - @buffer_size.setter - def buffer_size(self, value: Optional[int]): + @batch_size.setter + def batch_size(self, value: Optional[int]): if isinstance(value, int) and value < 0: - raise ValueError("buffer_size must greater than 0") + raise ValueError("batch_size must greater than 0") if not isinstance(value, int) and value is not None: - raise ValueError("buffer_size must be of type int") - self._buffer_size = value + raise ValueError("batch_size must be of type int") + self._batch_size = value @property def on_success(self) -> Optional[SuccessCallback]: @@ -127,3 +129,19 @@ def request_timeout(self) -> Optional[Union[float, Tuple[float, float]]]: @request_timeout.setter def request_timeout(self, value: Optional[Union[float, Tuple[float, float]]]): self._request_timeout = value + + @property + def buffer_capacity(self) -> Optional[int]: + """ + The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + When the buffer is full new events are lost. + """ + return self._buffer_capacity + + @buffer_capacity.setter + def buffer_capacity(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("buffer_capacity must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("buffer_capacity must be of type int") + self._buffer_capacity = value \ No newline at end of file diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 3d71b767..20acda8e 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -24,6 +24,7 @@ import time import threading import requests +import random from typing import Optional, Union, Tuple from queue import Queue @@ -63,11 +64,13 @@ def __init__( protocol: HttpProtocol = "https", port: Optional[int] = None, method: Method = "post", - buffer_size: Optional[int] = None, + batch_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: int = 10000, ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -78,8 +81,8 @@ def __init__( :type port: int | None :param method: The HTTP request method. Defaults to post. :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. :type on_success: function | None @@ -95,6 +98,11 @@ def __init__( applies to both "connect" AND "read" timeout, or as tuple with two float values which specify the "connect" and "read" timeouts separately :type request_timeout: float | tuple | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + When the buffer is full new events are lost. + :type buffer_capacity: int """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -103,12 +111,15 @@ def __init__( self.method = method - if buffer_size is None: + if batch_size is None: if method == "post": - buffer_size = DEFAULT_MAX_LENGTH + batch_size = DEFAULT_MAX_LENGTH else: - buffer_size = 1 - self.buffer_size = buffer_size + batch_size = 1 + + if batch_size > buffer_capacity: + batch_size = buffer_capacity + self.batch_size = batch_size self.buffer = [] self.byte_limit = byte_limit self.bytes_queued = None if byte_limit is None else 0 @@ -119,8 +130,13 @@ def __init__( self.lock = threading.RLock() - self.timer = None + self.timer = FlushTimer(emitter=self, repeating=True) + self.retry_timer = FlushTimer(emitter=self, repeating=False) + self.max_retry_delay_seconds = max_retry_delay_seconds + self.retry_delay = 0 + + self.buffer_capacity = buffer_capacity logger.info("Emitter initialized with endpoint " + self.endpoint) @staticmethod @@ -187,30 +203,33 @@ def reached_limit(self) -> bool: :rtype: bool """ if self.byte_limit is None: - return len(self.buffer) >= self.buffer_size + return len(self.buffer) >= self.batch_size else: return (self.bytes_queued or 0) >= self.byte_limit or len( self.buffer - ) >= self.buffer_size + ) >= self.batch_size def flush(self) -> None: """ Sends all events in the buffer to the collector. """ with self.lock: - self.send_events(self.buffer) + if self.retry_timer.is_active(): + return + + send_events = self.buffer self.buffer = [] + self.send_events(send_events) if self.bytes_queued is not None: self.bytes_queued = 0 - def http_post(self, data: str) -> bool: + def http_post(self, data: str) -> int: """ :param data: The array of JSONs to be sent :type data: string """ logger.info("Sending POST request to %s..." % self.endpoint) logger.debug("Payload: %s" % data) - post_succeeded = False try: r = requests.post( self.endpoint, @@ -218,35 +237,28 @@ def http_post(self, data: str) -> bool: headers={"Content-Type": "application/json; charset=utf-8"}, timeout=self.request_timeout, ) - post_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if post_succeeded else "warning")( - "POST request finished with status code: " + str(r.status_code) - ) except requests.RequestException as e: logger.warning(e) + return -1 - return post_succeeded + return r.status_code - def http_get(self, payload: PayloadDict) -> bool: + def http_get(self, payload: PayloadDict) -> int: """ :param payload: The event properties :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) - get_succeeded = False try: r = requests.get( self.endpoint, params=payload, timeout=self.request_timeout ) - get_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if get_succeeded else "warning")( - "GET request finished with status code: " + str(r.status_code) - ) except requests.RequestException as e: logger.warning(e) + return -1 - return get_succeeded + return r.status_code def sync_flush(self) -> None: """ @@ -254,7 +266,7 @@ def sync_flush(self) -> None: This is guaranteed to be blocking, not asynchronous. """ logger.debug("Starting synchronous flush...") - Emitter.flush(self) + self.flush() logger.info("Finished synchronous flush") @staticmethod @@ -264,7 +276,7 @@ def is_good_status_code(status_code: int) -> bool: :type status_code: int :rtype: bool """ - return 200 <= status_code < 400 + return 200 <= status_code < 300 def send_events(self, evts: PayloadDictList) -> None: """ @@ -280,7 +292,8 @@ def send_events(self, evts: PayloadDictList) -> None: if self.method == "post": data = SelfDescribingJson(PAYLOAD_DATA_SCHEMA, evts).to_string() - request_succeeded = self.http_post(data) + status_code = self.http_post(data) + request_succeeded = Emitter.is_good_status_code(status_code) if request_succeeded: success_events += evts else: @@ -288,7 +301,9 @@ def send_events(self, evts: PayloadDictList) -> None: elif self.method == "get": for evt in evts: - request_succeeded = self.http_get(evt) + status_code = self.http_get(evt) + request_succeeded = Emitter.is_good_status_code(status_code) + if request_succeeded: success_events += [evt] else: @@ -299,33 +314,36 @@ def send_events(self, evts: PayloadDictList) -> None: if self.on_failure is not None and len(failure_events) > 0: self.on_failure(len(success_events), failure_events) + if self._should_retry(status_code): + self._set_retry_delay() + self._retry_failed_events(failure_events) + else: + self._reset_retry_delay() else: logger.info("Skipping flush since buffer is empty") - def set_flush_timer(self, timeout: float, flush_now: bool = False) -> None: + def _set_retry_timer(self, timeout: float) -> None: """ - Set an interval at which the buffer will be flushed + Set an interval at which failed events will be retried :param timeout: interval in seconds :type timeout: int | float - :param flush_now: immediately flush buffer - :type flush_now: bool """ + self.retry_timer.start(timeout=timeout) - # Repeatable create new timer - if flush_now: - self.flush() - self.timer = threading.Timer(timeout, self.set_flush_timer, [timeout, True]) - self.timer.daemon = True - self.timer.start() + def set_flush_timer(self, timeout: float) -> None: + """ + Set an interval at which the buffer will be flushed + :param timeout: interval in seconds + :type timeout: int | float + """ + self.timer.start(timeout=timeout) def cancel_flush_timer(self) -> None: """ Abort automatic async flushing """ - - if self.timer is not None: - self.timer.cancel() + self.timer.cancel() @staticmethod def attach_sent_timestamp(events: PayloadDictList) -> None: @@ -344,6 +362,59 @@ def update(e: PayloadDict) -> None: for event in events: update(event) + def _should_retry(self, status_code: int) -> bool: + """ + Checks if a request should be retried + + :param status_code: Response status code + :type status_code: int + :rtype: bool + """ + if Emitter.is_good_status_code(status_code): + return False + + return status_code not in [400, 401, 403, 410, 422] + + def _set_retry_delay(self) -> None: + """ + Sets a delay to retry failed events + """ + random_noise = random.random() + self.retry_delay = min(self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds) + + def _reset_retry_delay(self) -> None: + """ + Resets retry delay to 0 + """ + self.retry_delay = 0 + + def _retry_failed_events(self, failed_events) -> None: + """ + Adds failed events back to the buffer to retry + + :param failed_events: List of failed events + :type List + """ + for event in failed_events: + if not event in self.buffer and not self._buffer_capacity_reached(): + self.buffer.append(event) + + self._set_retry_timer(self.retry_delay) + + def _buffer_capacity_reached(self) -> bool: + """ + Returns true if buffer capacity is reached + + :rtype: bool + """ + return len(self.buffer) >= self.buffer_capacity + + def _cancel_retry_timer(self) -> None: + """ + Cancels a retry timer + """ + self.retry_timer.cancel() + class AsyncEmitter(Emitter): """ @@ -356,14 +427,16 @@ def __init__( protocol: HttpProtocol = "http", port: Optional[int] = None, method: Method = "post", - buffer_size: Optional[int] = None, + batch_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, thread_count: int = 1, byte_limit: Optional[int] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: int = 10000, ) -> None: """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. :type endpoint: string :param protocol: The protocol to use - http or https. Defaults to http. :type protocol: protocol @@ -371,8 +444,8 @@ def __init__( :type port: int | None :param method: The HTTP request method :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. :type on_success: function | None @@ -386,16 +459,23 @@ def __init__( :type thread_count: int :param byte_limit: The size event list after reaching which queued events will be flushed :type byte_limit: int | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10,000 events. + When the buffer is full new events are lost. + :type buffer_capacity: int """ super(AsyncEmitter, self).__init__( endpoint, protocol, port, method, - buffer_size, + batch_size, on_success, on_failure, byte_limit, + max_retry_delay_seconds, + buffer_capacity ) self.queue = Queue() for i in range(thread_count): @@ -426,3 +506,47 @@ def consume(self) -> None: evts = self.queue.get() self.send_events(evts) self.queue.task_done() + + +class FlushTimer(object): + """ + Internal class used by the Emitter to schedule flush calls for later. + """ + + def __init__(self, emitter: Emitter, repeating: bool): + self.emitter = emitter + self.repeating = repeating + self.timer: Optional[threading.Timer] = None + self.lock = threading.RLock() + + def start(self, timeout: float) -> bool: + with self.lock: + if self.timer is not None: + return False + else: + self._schedule_timer(timeout=timeout) + return True + + def cancel(self) -> None: + with self.lock: + if self.timer is not None: + self.timer.cancel() + self.timer = None + + def is_active(self) -> bool: + with self.lock: + return self.timer is not None + + def _fire(self, timeout: float) -> None: + with self.lock: + if self.repeating: + self._schedule_timer(timeout) + else: + self.timer = None + + self.emitter.flush() + + def _schedule_timer(self, timeout: float) -> None: + self.timer = threading.Timer(timeout, self._fire, [timeout]) + self.timer.daemon = True + self.timer.start() diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 7b08c071..8dd31645 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -77,7 +77,7 @@ def create_tracker( emitter = Emitter( endpoint, method=method, - buffer_size=emitter_config.buffer_size, + batch_size=emitter_config.batch_size, on_success=emitter_config.on_success, on_failure=emitter_config.on_failure, byte_limit=emitter_config.byte_limit, diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index c487bb70..05e2ee96 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -36,7 +36,7 @@ querystrings = [""] -default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=1) +default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=1) get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get') @@ -333,7 +333,7 @@ def test_post_page_view(self) -> None: self.assertEqual(request["data"][0][key], expected_fields[key]) def test_post_batched(self) -> None: - default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=2) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=2) t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") @@ -343,7 +343,7 @@ def test_post_batched(self) -> None: @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 def test_timestamps(self) -> None: - emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=3) + emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=3) t = tracker.Tracker([emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "stamp0", None, tstamp=None) @@ -363,7 +363,7 @@ def test_timestamps(self) -> None: self.assertEqual(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) def test_bytelimit(self) -> None: - default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=5, byte_limit=420) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=5, byte_limit=420) t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") # 140 bytes diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 51b107d0..c8790915 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -46,6 +46,14 @@ def mocked_http_success(*args: Any) -> bool: def mocked_http_failure(*args: Any) -> bool: return False +def mocked_http_response_success(*args: Any) -> int: + return 200 + +def mocked_http_response_failure(*args: Any) -> int: + return 400 + +def mocked_http_response_failure_retry(*args: Any) -> int: + return 500 class TestEmitters(unittest.TestCase): @@ -56,22 +64,22 @@ def test_init(self) -> None: e = Emitter('0.0.0.0') self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') self.assertEqual(e.method, 'post') - self.assertEqual(e.buffer_size, 10) + self.assertEqual(e.batch_size, 10) self.assertEqual(e.buffer, []) self.assertIsNone(e.byte_limit) self.assertIsNone(e.bytes_queued) self.assertIsNone(e.on_success) self.assertIsNone(e.on_failure) - self.assertIsNone(e.timer) + self.assertFalse(e.timer.is_active()) self.assertIsNone(e.request_timeout) - def test_init_buffer_size(self) -> None: - e = Emitter('0.0.0.0', buffer_size=10) - self.assertEqual(e.buffer_size, 10) + def test_init_batch_size(self) -> None: + e = Emitter('0.0.0.0', batch_size=10) + self.assertEqual(e.batch_size, 10) def test_init_post(self) -> None: e = Emitter('0.0.0.0') - self.assertEqual(e.buffer_size, DEFAULT_MAX_LENGTH) + self.assertEqual(e.batch_size, DEFAULT_MAX_LENGTH) def test_init_byte_limit(self) -> None: e = Emitter('0.0.0.0', byte_limit=512) @@ -113,7 +121,7 @@ def test_as_collector_uri_endpoint_protocol_http(self) -> None: def test_input_no_flush(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2) + e = Emitter('0.0.0.0', method="get", batch_size=2) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -127,7 +135,7 @@ def test_input_no_flush(self, mok_flush: Any) -> None: def test_input_flush_byte_limit(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=16) + e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=16) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -140,7 +148,7 @@ def test_input_flush_byte_limit(self, mok_flush: Any) -> None: def test_input_flush_buffer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=1024) + e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -159,7 +167,7 @@ def test_input_flush_buffer(self, mok_flush: Any) -> None: def test_input_bytes_queued(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=1024) + e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -183,7 +191,7 @@ def test_input_bytes_post(self, mok_flush: Any) -> None: def test_flush(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - e = Emitter('0.0.0.0', buffer_size=2, byte_limit=None) + e = Emitter('0.0.0.0', batch_size=2, byte_limit=None) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) @@ -195,7 +203,7 @@ def test_flush(self, mok_send_events: Any) -> None: def test_flush_bytes_queued(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - e = Emitter('0.0.0.0', buffer_size=2, byte_limit=256) + e = Emitter('0.0.0.0', batch_size=2, byte_limit=256) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) @@ -219,7 +227,7 @@ def test_attach_sent_tstamp(self) -> None: def test_flush_timer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', buffer_size=10) + e = Emitter('0.0.0.0', batch_size=10) ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] for i in ev_list: e.input(i) @@ -227,15 +235,15 @@ def test_flush_timer(self, mok_flush: Any) -> None: e.set_flush_timer(3) self.assertEqual(len(e.buffer), 3) time.sleep(5) - self.assertEqual(mok_flush.call_count, 1) + self.assertGreaterEqual(mok_flush.call_count, 1) @mock.patch('snowplow_tracker.Emitter.http_get') def test_send_events_get_success(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_success + mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -244,11 +252,11 @@ def test_send_events_get_success(self, mok_http_get: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_get') def test_send_events_get_failure(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_failure + mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -257,11 +265,11 @@ def test_send_events_get_failure(self, mok_http_get: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_post') def test_send_events_post_success(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_success + mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -270,11 +278,11 @@ def test_send_events_post_success(self, mok_http_post: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_post') def test_send_events_post_failure(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_failure + mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -285,7 +293,8 @@ def test_send_events_post_failure(self, mok_http_post: Any) -> None: def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout e = Emitter('0.0.0.0') - post_succeeded = e.http_post("dummy_string") + response = e.http_post("dummy_string") + post_succeeded = Emitter.is_good_status_code(response) self.assertFalse(post_succeeded) @@ -293,8 +302,8 @@ def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout e = Emitter('0.0.0.0', method='get') - get_succeeded = e.http_get({"a": "b"}) - + response = e.http_get({"a": "b"}) + get_succeeded = Emitter.is_good_status_code(response) self.assertFalse(get_succeeded) ### @@ -304,7 +313,7 @@ def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: def test_async_emitter_input(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", buffer_size=3, thread_count=5) + ae = AsyncEmitter('0.0.0.0', port=9090, method="get", batch_size=3, thread_count=5) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) @@ -320,7 +329,7 @@ def test_async_emitter_input(self, mok_flush: Any) -> None: def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", buffer_size=3, thread_count=5, byte_limit=1024) + ae = AsyncEmitter('0.0.0.0', port=9090, method="get", batch_size=3, thread_count=5, byte_limit=1024) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) @@ -336,11 +345,11 @@ def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_get') def test_async_send_events_get_success(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_success + mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -349,11 +358,11 @@ def test_async_send_events_get_success(self, mok_http_get: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_get') def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_failure + mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -362,11 +371,11 @@ def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_post') def test_async_send_events_post_success(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_success + mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -375,11 +384,11 @@ def test_async_send_events_post_success(self, mok_http_post: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_post') def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_failure + mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -392,7 +401,7 @@ def test_input_unicode_get(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=2) + ae = AsyncEmitter('0.0.0.0', method="get", batch_size=2) ae.input(payload) self.assertEqual(len(ae.buffer), 1) @@ -403,8 +412,66 @@ def test_input_unicode_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', buffer_size=2) + ae = AsyncEmitter('0.0.0.0', batch_size=2) ae.input(payload) self.assertEqual(len(ae.buffer), 1) self.assertDictEqual(payload, ae.buffer[0]) + + @mock.patch('snowplow_tracker.Emitter.http_post') + def test_send_events_post_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch('snowplow_tracker.Emitter.http_get') + def test_send_events_get_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch('snowplow_tracker.Emitter.http_get') + def test_send_events_get_no_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() + + @mock.patch('snowplow_tracker.Emitter.http_post') + def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() From 3335069198b5eaf7f8df451884f187887a29e848 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 10 Jan 2023 10:31:11 +0000 Subject: [PATCH 12/51] Add customizable no-retry HTTP status codes (close #297) PR #307 * Add custom retry codes to emitter class * Add custom retry codes to Snowplow class * Add custom retry codes to emitter configuration * Add custom retry unit tests --- examples/snowplow_app.py | 3 +- snowplow_tracker/emitter_configuration.py | 47 +++++++++++++++++++-- snowplow_tracker/emitters.py | 11 ++++- snowplow_tracker/snowplow.py | 1 + snowplow_tracker/test/unit/test_emitters.py | 33 +++++++++++++++ 5 files changed, 90 insertions(+), 5 deletions(-) diff --git a/examples/snowplow_app.py b/examples/snowplow_app.py index 7ce2c1d8..9cc9f093 100644 --- a/examples/snowplow_app.py +++ b/examples/snowplow_app.py @@ -18,7 +18,8 @@ def main(): collector_url = get_url_from_args() # Configure Emitter - emitter_config = EmitterConfiguration(batch_size=5) + custom_retry_codes = {500: False, 401: True} + emitter_config = EmitterConfiguration(batch_size=5, custom_retry_codes=custom_retry_codes) # Configure Tracker tracker_config = TrackerConfiguration(encode_base64=True) diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index c38823a1..77f80bc5 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -19,7 +19,7 @@ # License: Apache License Version 2.0 # """ -from typing import Optional, Union, Tuple +from typing import Optional, Union, Tuple, Dict from snowplow_tracker.typing import SuccessCallback, FailureCallback @@ -31,7 +31,8 @@ def __init__( on_failure: Optional[FailureCallback] = None, byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, - buffer_capacity: Optional[int] = None + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {} ) -> None: """ Configuration for the emitter that sends events to the Snowplow collector. @@ -52,6 +53,10 @@ def __init__( applies to both "connect" AND "read" timeout, or as tuple with two float values which specify the "connect" and "read" timeouts separately :type request_timeout: float | tuple | None + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict """ self.batch_size = batch_size @@ -60,6 +65,7 @@ def __init__( self.byte_limit = byte_limit self.request_timeout = request_timeout self.buffer_capacity = buffer_capacity + self.custom_retry_codes = custom_retry_codes @property def batch_size(self) -> Optional[int]: @@ -144,4 +150,39 @@ def buffer_capacity(self, value: Optional[int]): raise ValueError("buffer_capacity must greater than 0") if not isinstance(value, int) and value is not None: raise ValueError("buffer_capacity must be of type int") - self._buffer_capacity = value \ No newline at end of file + self._buffer_capacity = value + + @property + def custom_retry_codes(self) -> Dict[int, bool]: + """ + Custom retry rules for HTTP status codes received in emit responses from the Collector. + """ + return self._custom_retry_codes + + @custom_retry_codes.setter + def custom_retry_codes(self, value: Dict[int, bool]): + self._custom_retry_codes = value + def set_retry_code(self, status_code: int, retry = True) -> bool: + """ + Add a retry rule for HTTP status code received from emit responses from the Collector. + :param status_code: HTTP response code + :type status_code: int + :param retry: Set the status_code to retry (True) or not retry (False). Default is True + :type retry: bool + """ + if not isinstance(status_code, int): + print("status_code must be of type int") + return False + + if not isinstance(retry, bool): + print("retry must be of type bool") + return False + + if 200 <= status_code < 300: + print("custom_retry_codes should not include codes for succesful requests (2XX codes)") + return False + + self.custom_retry_codes[status_code] = retry + + return status_code in self.custom_retry_codes.keys() + diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 20acda8e..3beaaecf 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -25,7 +25,7 @@ import threading import requests import random -from typing import Optional, Union, Tuple +from typing import Optional, Union, Tuple, Dict from queue import Queue from snowplow_tracker.self_describing_json import SelfDescribingJson @@ -71,6 +71,7 @@ def __init__( request_timeout: Optional[Union[float, Tuple[float, float]]] = None, max_retry_delay_seconds: int = 60, buffer_capacity: int = 10000, + custom_retry_codes: Dict[int, bool] = {} ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -103,6 +104,10 @@ def __init__( :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. When the buffer is full new events are lost. :type buffer_capacity: int + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -137,6 +142,7 @@ def __init__( self.retry_delay = 0 self.buffer_capacity = buffer_capacity + self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) @staticmethod @@ -373,6 +379,9 @@ def _should_retry(self, status_code: int) -> bool: if Emitter.is_good_status_code(status_code): return False + if status_code in self.custom_retry_codes.keys(): + return self.custom_retry_codes[status_code] + return status_code not in [400, 401, 403, 410, 422] def _set_retry_delay(self) -> None: diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 8dd31645..946802e7 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -82,6 +82,7 @@ def create_tracker( on_failure=emitter_config.on_failure, byte_limit=emitter_config.byte_limit, request_timeout=emitter_config.request_timeout, + custom_retry_codes=emitter_config.custom_retry_codes ) tracker = Tracker( diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index c8790915..439b66a8 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -475,3 +475,36 @@ def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: mok_failure.assert_called_once_with(0, evBuffer) mok_success.assert_not_called() + + @mock.patch('snowplow_tracker.Emitter.http_post') + def test_send_events_post_custom_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure, custom_retry_codes={400: True}) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch('snowplow_tracker.Emitter.http_get') + def test_send_events_get_custom_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', method='get',batch_size=10, on_success=mok_success, on_failure=mok_failure, custom_retry_codes={400: True}) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + From dccef831a493641ea55c6e244050569e50d9693c Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Thu, 12 Jan 2023 09:08:38 +0000 Subject: [PATCH 13/51] Rename unstruct_event to self_describing_event (close #298) PR #308 * Update unstruct_event to self_describing_event * Add deprecation warning to track_unstruct_event --- .../test/integration/test_integration.py | 8 ++-- snowplow_tracker/test/unit/test_tracker.py | 46 +++++++++---------- snowplow_tracker/tracker.py | 40 ++++++++++++---- 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 05e2ee96..b9ec214d 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -153,10 +153,10 @@ def test_integration_struct_event(self) -> None: for key in expected_fields: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) - def test_integration_unstruct_event_non_base64(self) -> None: + def test_integration_self_describing_event_non_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): - t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} for key in expected_fields: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) @@ -167,10 +167,10 @@ def test_integration_unstruct_event_non_base64(self) -> None: "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} }) - def test_integration_unstruct_event_base64(self) -> None: + def test_integration_self_describing_event_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): - t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} for key in expected_fields: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 34964e0a..5f361ae8 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -159,7 +159,7 @@ def test_get_timestamp_3(self) -> None: self.assertEqual(tstamp, 1000) # 1970-01-01 00:00:01 in ms @mock.patch("snowplow_tracker.Tracker.track") - def test_alias_of_track_unstruct_event(self, mok_track: Any) -> None: + def test_alias_of_track_self_describing_event(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -441,7 +441,7 @@ def test_complete_payload_event_subject( ### @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event(self, mok_complete_payload: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -449,7 +449,7 @@ def test_track_unstruct_event(self, mok_complete_payload: Any) -> None: t = Tracker(e, encode_base64=False) evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) - t.track_unstruct_event(evJson) + t.track_self_describing_event(evJson) self.assertEqual(mok_complete_payload.call_count, 1) completeArgsList = mok_complete_payload.call_args_list[0][0] self.assertEqual(len(completeArgsList), 4) @@ -474,7 +474,7 @@ def test_track_unstruct_event(self, mok_complete_payload: Any) -> None: self.assertTrue(actualTstampArg is None) @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event_all_args(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event_all_args(self, mok_complete_payload: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -485,7 +485,7 @@ def test_track_unstruct_event_all_args(self, mok_complete_payload: Any) -> None: ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evContext = [ctx] evTstamp = 1399021242030 - t.track_unstruct_event(evJson, evContext, evTstamp) + t.track_self_describing_event(evJson, evContext, evTstamp) self.assertEqual(mok_complete_payload.call_count, 1) completeArgsList = mok_complete_payload.call_args_list[0][0] self.assertEqual(len(completeArgsList), 4) @@ -510,7 +510,7 @@ def test_track_unstruct_event_all_args(self, mok_complete_payload: Any) -> None: self.assertEqual(actualTstampArg, evTstamp) @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event_encode(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event_encode(self, mok_complete_payload: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -518,7 +518,7 @@ def test_track_unstruct_event_encode(self, mok_complete_payload: Any) -> None: t = Tracker(e, encode_base64=True) evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) - t.track_unstruct_event(evJson) + t.track_self_describing_event(evJson) self.assertEqual(mok_complete_payload.call_count, 1) completeArgsList = mok_complete_payload.call_args_list[0][0] self.assertEqual(len(completeArgsList), 4) @@ -829,7 +829,7 @@ def test_track_ecommerce_transaction_with_items( } self.assertDictEqual(secItemCallKwargs, expectedSecItemPairs) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_link_click(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -867,7 +867,7 @@ def test_track_link_click(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -891,7 +891,7 @@ def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -931,7 +931,7 @@ def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -953,7 +953,7 @@ def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -993,7 +993,7 @@ def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart_optional_none( self, mok_track_unstruct: Any ) -> None: @@ -1017,7 +1017,7 @@ def test_track_remove_from_cart_optional_none( self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1057,7 +1057,7 @@ def test_track_form_change(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1083,7 +1083,7 @@ def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1125,7 +1125,7 @@ def test_track_form_submit(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_invalid_element_type( self, mok_track_unstruct: Any ) -> None: @@ -1155,7 +1155,7 @@ def test_track_form_submit_invalid_element_type( tstamp=evTstamp, ) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_invalid_element_type_disabled_contracts( self, mok_track_unstruct: Any ) -> None: @@ -1200,7 +1200,7 @@ def test_track_form_submit_invalid_element_type_disabled_contracts( self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1218,7 +1218,7 @@ def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1234,7 +1234,7 @@ def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs[0].to_json(), expected) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1265,7 +1265,7 @@ def test_track_site_search(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1286,7 +1286,7 @@ def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_screen_view(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 4c988fc8..0215bf16 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -22,6 +22,7 @@ import time import uuid from typing import Any, Optional, Union, List, Dict, Sequence +from warnings import warn from snowplow_tracker import payload, _version, SelfDescribingJson from snowplow_tracker import subject as _subject @@ -310,7 +311,7 @@ def track_link_click( "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_add_to_cart( self, @@ -363,7 +364,7 @@ def track_add_to_cart( "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_remove_from_cart( self, @@ -416,7 +417,7 @@ def track_remove_from_cart( "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_form_change( self, @@ -470,7 +471,7 @@ def track_form_change( "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_form_submit( self, @@ -511,7 +512,7 @@ def track_form_submit( "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_site_search( self, @@ -555,7 +556,7 @@ def track_site_search( "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_ecommerce_transaction_item( self, @@ -717,7 +718,7 @@ def track_screen_view( screen_view_properties, ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_struct_event( self, @@ -764,7 +765,7 @@ def track_struct_event( return self.complete_payload(pb, context, tstamp, event_subject) - def track_unstruct_event( + def track_self_describing_event( self, event_json: SelfDescribingJson, context: Optional[List[SelfDescribingJson]] = None, @@ -797,7 +798,28 @@ def track_unstruct_event( return self.complete_payload(pb, context, tstamp, event_subject) # Alias - track_self_describing_event = track_unstruct_event + def track_unstruct_event( + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker + """ + warn('track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.', DeprecationWarning, stacklevel=2) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def flush(self, is_async: bool = False) -> "Tracker": """ From bbb276e87ab32e584a4115f2ed1ce61cbb33bcea Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Wed, 18 Jan 2023 13:48:02 +0000 Subject: [PATCH 14/51] EventStore with max limit (close #309) PR #310 * Define EventStore protocol * Add default InMemoryEventStore --- snowplow_tracker/__init__.py | 1 + snowplow_tracker/emitter_configuration.py | 35 +++-- snowplow_tracker/emitters.py | 110 +++++++------- snowplow_tracker/event_store.py | 142 ++++++++++++++++++ snowplow_tracker/snowplow.py | 2 +- snowplow_tracker/test/unit/test_emitters.py | 48 +++--- .../test/unit/test_in_memory_event_store.py | 110 ++++++++++++++ snowplow_tracker/tracker.py | 40 +++-- 8 files changed, 391 insertions(+), 97 deletions(-) create mode 100644 snowplow_tracker/event_store.py create mode 100644 snowplow_tracker/test/unit/test_in_memory_event_store.py diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index da683ada..8726efd9 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -7,6 +7,7 @@ from snowplow_tracker.tracker_configuration import TrackerConfiguration from snowplow_tracker.snowplow import Snowplow from snowplow_tracker.contracts import disable_contracts, enable_contracts +from snowplow_tracker.event_store import EventStore # celery extra from .celery import CeleryEmitter diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 77f80bc5..9b636d02 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -21,6 +21,7 @@ from typing import Optional, Union, Tuple, Dict from snowplow_tracker.typing import SuccessCallback, FailureCallback +from snowplow_tracker.event_store import EventStore class EmitterConfiguration(object): @@ -32,7 +33,8 @@ def __init__( byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, buffer_capacity: Optional[int] = None, - custom_retry_codes: Dict[int, bool] = {} + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, ) -> None: """ Configuration for the emitter that sends events to the Snowplow collector. @@ -57,6 +59,8 @@ def __init__( By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. Note that 2xx codes will never retry as they are considered successful. :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None """ self.batch_size = batch_size @@ -66,6 +70,7 @@ def __init__( self.request_timeout = request_timeout self.buffer_capacity = buffer_capacity self.custom_retry_codes = custom_retry_codes + self.event_store = event_store @property def batch_size(self) -> Optional[int]: @@ -155,34 +160,44 @@ def buffer_capacity(self, value: Optional[int]): @property def custom_retry_codes(self) -> Dict[int, bool]: """ - Custom retry rules for HTTP status codes received in emit responses from the Collector. + Custom retry rules for HTTP status codes received in emit responses from the Collector. """ return self._custom_retry_codes @custom_retry_codes.setter def custom_retry_codes(self, value: Dict[int, bool]): self._custom_retry_codes = value - def set_retry_code(self, status_code: int, retry = True) -> bool: + + def set_retry_code(self, status_code: int, retry=True) -> bool: """ - Add a retry rule for HTTP status code received from emit responses from the Collector. - :param status_code: HTTP response code - :type status_code: int - :param retry: Set the status_code to retry (True) or not retry (False). Default is True - :type retry: bool + Add a retry rule for HTTP status code received from emit responses from the Collector. + :param status_code: HTTP response code + :type status_code: int + :param retry: Set the status_code to retry (True) or not retry (False). Default is True + :type retry: bool """ if not isinstance(status_code, int): print("status_code must be of type int") return False if not isinstance(retry, bool): - print("retry must be of type bool") + print("retry must be of type bool") return False if 200 <= status_code < 300: - print("custom_retry_codes should not include codes for succesful requests (2XX codes)") + print( + "custom_retry_codes should not include codes for succesful requests (2XX codes)" + ) return False self.custom_retry_codes[status_code] = retry return status_code in self.custom_retry_codes.keys() + @property + def event_store(self) -> Optional[EventStore]: + return self._event_store + + @event_store.setter + def event_store(self, value: Optional[EventStore]): + self._event_store = value diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 3beaaecf..d6ac1ef2 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -38,6 +38,7 @@ FailureCallback, ) from snowplow_tracker.contracts import one_of +from snowplow_tracker.event_store import EventStore, InMemoryEventStore # logging logging.basicConfig() @@ -70,8 +71,9 @@ def __init__( byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, max_retry_delay_seconds: int = 60, - buffer_capacity: int = 10000, - custom_retry_codes: Dict[int, bool] = {} + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -101,13 +103,15 @@ def __init__( :type request_timeout: float | tuple | None :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds :type max_retry_delay_seconds: int - :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + :param buffer_capacity: The maximum capacity of the event buffer. When the buffer is full new events are lost. - :type buffer_capacity: int + :type buffer_capacity: int :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. Note that 2xx codes will never retry as they are considered successful. :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -116,16 +120,26 @@ def __init__( self.method = method + if event_store is None: + if buffer_capacity is None: + event_store = InMemoryEventStore(logger=logger) + else: + event_store = InMemoryEventStore( + buffer_capacity=buffer_capacity, logger=logger + ) + + self.event_store = event_store + if batch_size is None: if method == "post": batch_size = DEFAULT_MAX_LENGTH else: batch_size = 1 - - if batch_size > buffer_capacity: + + if buffer_capacity is not None and batch_size > buffer_capacity: batch_size = buffer_capacity + self.batch_size = batch_size - self.buffer = [] self.byte_limit = byte_limit self.bytes_queued = None if byte_limit is None else 0 self.request_timeout = request_timeout @@ -141,7 +155,6 @@ def __init__( self.max_retry_delay_seconds = max_retry_delay_seconds self.retry_delay = 0 - self.buffer_capacity = buffer_capacity self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) @@ -166,7 +179,7 @@ def as_collector_uri( if len(endpoint) < 1: raise ValueError("No endpoint provided.") - endpoint = endpoint.rstrip('/') + endpoint = endpoint.rstrip("/") if endpoint.split("://")[0] in PROTOCOLS: endpoint_arr = endpoint.split("://") @@ -195,9 +208,9 @@ def input(self, payload: PayloadDict) -> None: self.bytes_queued += len(str(payload)) if self.method == "post": - self.buffer.append({key: str(payload[key]) for key in payload}) + self.event_store.add_event({key: str(payload[key]) for key in payload}) else: - self.buffer.append(payload) + self.event_store.add_event(payload) if self.reached_limit(): self.flush() @@ -209,11 +222,11 @@ def reached_limit(self) -> bool: :rtype: bool """ if self.byte_limit is None: - return len(self.buffer) >= self.batch_size + return self.event_store.size() >= self.batch_size else: - return (self.bytes_queued or 0) >= self.byte_limit or len( - self.buffer - ) >= self.batch_size + return ( + self.bytes_queued or 0 + ) >= self.byte_limit or self.event_store.size() >= self.batch_size def flush(self) -> None: """ @@ -222,9 +235,7 @@ def flush(self) -> None: with self.lock: if self.retry_timer.is_active(): return - - send_events = self.buffer - self.buffer = [] + send_events = self.event_store.get_events_batch() self.send_events(send_events) if self.bytes_queued is not None: self.bytes_queued = 0 @@ -324,6 +335,7 @@ def send_events(self, evts: PayloadDictList) -> None: self._set_retry_delay() self._retry_failed_events(failure_events) else: + self.event_store.cleanup(success_events, False) self._reset_retry_delay() else: logger.info("Skipping flush since buffer is empty") @@ -339,9 +351,9 @@ def _set_retry_timer(self, timeout: float) -> None: def set_flush_timer(self, timeout: float) -> None: """ - Set an interval at which the buffer will be flushed - :param timeout: interval in seconds - :type timeout: int | float + Set an interval at which the buffer will be flushed + :param timeout: interval in seconds + :type timeout: int | float """ self.timer.start(timeout=timeout) @@ -370,11 +382,11 @@ def update(e: PayloadDict) -> None: def _should_retry(self, status_code: int) -> bool: """ - Checks if a request should be retried - - :param status_code: Response status code - :type status_code: int - :rtype: bool + Checks if a request should be retried + + :param status_code: Response status code + :type status_code: int + :rtype: bool """ if Emitter.is_good_status_code(status_code): return False @@ -386,41 +398,32 @@ def _should_retry(self, status_code: int) -> bool: def _set_retry_delay(self) -> None: """ - Sets a delay to retry failed events + Sets a delay to retry failed events """ random_noise = random.random() - self.retry_delay = min(self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds) + self.retry_delay = min( + self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds + ) def _reset_retry_delay(self) -> None: """ - Resets retry delay to 0 + Resets retry delay to 0 """ self.retry_delay = 0 def _retry_failed_events(self, failed_events) -> None: """ - Adds failed events back to the buffer to retry + Adds failed events back to the buffer to retry - :param failed_events: List of failed events - :type List + :param failed_events: List of failed events + :type List """ - for event in failed_events: - if not event in self.buffer and not self._buffer_capacity_reached(): - self.buffer.append(event) - + self.event_store.cleanup(failed_events, True) self._set_retry_timer(self.retry_delay) - def _buffer_capacity_reached(self) -> bool: - """ - Returns true if buffer capacity is reached - - :rtype: bool - """ - return len(self.buffer) >= self.buffer_capacity - def _cancel_retry_timer(self) -> None: """ - Cancels a retry timer + Cancels a retry timer """ self.retry_timer.cancel() @@ -442,7 +445,8 @@ def __init__( thread_count: int = 1, byte_limit: Optional[int] = None, max_retry_delay_seconds: int = 60, - buffer_capacity: int = 10000, + buffer_capacity: int = None, + event_store: Optional[EventStore] = None, ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -470,9 +474,11 @@ def __init__( :type byte_limit: int | None :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds :type max_retry_delay_seconds: int - :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10,000 events. + :param buffer_capacity: The maximum capacity of the event buffer. When the buffer is full new events are lost. - :type buffer_capacity: int + :type buffer_capacity: int + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore """ super(AsyncEmitter, self).__init__( endpoint, @@ -484,7 +490,8 @@ def __init__( on_failure, byte_limit, max_retry_delay_seconds, - buffer_capacity + buffer_capacity, + event_store, ) self.queue = Queue() for i in range(thread_count): @@ -496,7 +503,7 @@ def sync_flush(self) -> None: while True: self.flush() self.queue.join() - if len(self.buffer) < 1: + if self.event_store.size() < 1: break def flush(self) -> None: @@ -505,8 +512,7 @@ def flush(self) -> None: executes the flush method of the base Emitter class """ with self.lock: - self.queue.put(self.buffer) - self.buffer = [] + self.queue.put(self.event_store.get_events_batch()) if self.bytes_queued is not None: self.bytes_queued = 0 diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py new file mode 100644 index 00000000..ad47ac5c --- /dev/null +++ b/snowplow_tracker/event_store.py @@ -0,0 +1,142 @@ +# """ +# event_store.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ + +from typing_extensions import Protocol +from snowplow_tracker.typing import PayloadDict, PayloadDictList +from logging import Logger + + +class EventStore(Protocol): + """ + EventStore protocol. For buffering events in the Emitter. + """ + + def add_event(payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. Returns True if successful. + + :param payload: The payload to add + :type payload: PayloadDict + :rtype bool + """ + ... + + def get_events_batch() -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the buffer. + + :rtype PayloadDictList + """ + ... + + def cleanup(batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the event store. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + ... + + def size() -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + ... + + +class InMemoryEventStore(EventStore): + """ + Create a InMemoryEventStore object with custom buffer capacity. The default is 10,000 events. + """ + + def __init__(self, logger: Logger, buffer_capacity: int = 10000) -> None: + """ + :param logger: Logging module + :type logger: Logger + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity int + """ + self.event_buffer = [] + self.buffer_capacity = buffer_capacity + self.logger = logger + + def add_event(self, payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. + + :param payload: The payload to add + :type payload: PayloadDict + """ + if self._buffer_capacity_reached(): + self.logger.error("Event buffer is full, dropping event.") + return False + + self.event_buffer.append(payload) + return True + + def get_events_batch(self) -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the in the buffer. + + :rtype PayloadDictList + """ + batch = self.event_buffer + self.event_buffer = [] + return batch + + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the InMemoryEventStore buffer. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + if not need_retry: + return + + for event in batch: + if not event in self.event_buffer: + if not self.add_event(event): + return + + def size(self) -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + return len(self.event_buffer) + + def _buffer_capacity_reached(self) -> bool: + """ + Returns true if buffer capacity is reached + + :rtype: bool + """ + return self.size() >= self.buffer_capacity diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 946802e7..88f1e878 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -82,7 +82,7 @@ def create_tracker( on_failure=emitter_config.on_failure, byte_limit=emitter_config.byte_limit, request_timeout=emitter_config.request_timeout, - custom_retry_codes=emitter_config.custom_retry_codes + custom_retry_codes=emitter_config.custom_retry_codes, ) tracker = Tracker( diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 439b66a8..c52eb0b2 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -65,7 +65,7 @@ def test_init(self) -> None: self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') self.assertEqual(e.method, 'post') self.assertEqual(e.batch_size, 10) - self.assertEqual(e.buffer, []) + self.assertEqual(e.event_store.event_buffer, []) self.assertIsNone(e.byte_limit) self.assertIsNone(e.bytes_queued) self.assertIsNone(e.on_success) @@ -125,8 +125,8 @@ def test_input_no_flush(self, mok_flush: Any) -> None: nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) self.assertIsNone(e.byte_limit) self.assertFalse(e.reached_limit()) mok_flush.assert_not_called() @@ -139,8 +139,8 @@ def test_input_flush_byte_limit(self, mok_flush: Any) -> None: nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) @@ -152,14 +152,14 @@ def test_input_flush_buffer(self, mok_flush: Any) -> None: nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) + self.assertEqual(len(e.event_store.event_buffer), 1) self.assertFalse(e.reached_limit()) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) nextPairs = {"n0": "v0"} e.input(nextPairs) # since we mock flush, the buffer is not empty - self.assertEqual(e.buffer, [nvPairs, nextPairs]) + self.assertEqual(e.event_store.event_buffer, [nvPairs, nextPairs]) self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) @@ -171,7 +171,7 @@ def test_input_bytes_queued(self, mok_flush: Any) -> None: nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) + self.assertEqual(len(e.event_store.event_buffer), 1) self.assertEqual(e.bytes_queued, 24) e.input(nvPairs) @@ -185,11 +185,11 @@ def test_input_bytes_post(self, mok_flush: Any) -> None: nvPairs = {"testString": "test", "testNum": 2.72} e.input(nvPairs) - self.assertEqual(e.buffer, [{"testString": "test", "testNum": "2.72"}]) + self.assertEqual(e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}]) - @mock.patch('snowplow_tracker.Emitter.send_events') + @mock.patch('snowplow_tracker.Emitter.http_post') def test_flush(self, mok_send_events: Any) -> None: - mok_send_events.side_effect = mocked_send_events + mok_send_events.side_effect = mocked_http_response_success e = Emitter('0.0.0.0', batch_size=2, byte_limit=None) nvPairs = {"n": "v"} @@ -197,11 +197,11 @@ def test_flush(self, mok_send_events: Any) -> None: e.input(nvPairs) self.assertEqual(mok_send_events.call_count, 1) - self.assertEqual(len(e.buffer), 0) + self.assertEqual(len(e.event_store.event_buffer), 0) - @mock.patch('snowplow_tracker.Emitter.send_events') + @mock.patch('snowplow_tracker.Emitter.http_post') def test_flush_bytes_queued(self, mok_send_events: Any) -> None: - mok_send_events.side_effect = mocked_send_events + mok_send_events.side_effect = mocked_http_response_success e = Emitter('0.0.0.0', batch_size=2, byte_limit=256) nvPairs = {"n": "v"} @@ -209,7 +209,7 @@ def test_flush_bytes_queued(self, mok_send_events: Any) -> None: e.input(nvPairs) self.assertEqual(mok_send_events.call_count, 1) - self.assertEqual(len(e.buffer), 0) + self.assertEqual(len(e.event_store.event_buffer), 0) self.assertEqual(e.bytes_queued, 0) @freeze_time("2021-04-14 00:00:02") # unix: 1618358402000 @@ -233,7 +233,7 @@ def test_flush_timer(self, mok_flush: Any) -> None: e.input(i) e.set_flush_timer(3) - self.assertEqual(len(e.buffer), 3) + self.assertEqual(len(e.event_store.event_buffer), 3) time.sleep(5) self.assertGreaterEqual(mok_flush.call_count, 1) @@ -318,7 +318,7 @@ def test_async_emitter_input(self, mok_flush: Any) -> None: ae.input({"a": "aa"}) ae.input({"b": "bb"}) - self.assertEqual(len(ae.buffer), 2) + self.assertEqual(len(ae.event_store.event_buffer), 2) self.assertTrue(ae.queue.empty()) mok_flush.assert_not_called() @@ -334,12 +334,12 @@ def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: ae.input({"a": "aa"}) ae.input({"b": "bb"}) - self.assertEqual(len(ae.buffer), 2) + self.assertEqual(len(ae.event_store.event_buffer), 2) self.assertTrue(ae.queue.empty()) mok_send_events.assert_not_called() ae.sync_flush() - self.assertEqual(len(ae.buffer), 0) + self.assertEqual(len(ae.event_store.event_buffer), 0) self.assertEqual(ae.bytes_queued, 0) self.assertEqual(mok_send_events.call_count, 1) @@ -404,8 +404,8 @@ def test_input_unicode_get(self, mok_flush: Any) -> None: ae = AsyncEmitter('0.0.0.0', method="get", batch_size=2) ae.input(payload) - self.assertEqual(len(ae.buffer), 1) - self.assertDictEqual(payload, ae.buffer[0]) + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) @mock.patch('snowplow_tracker.AsyncEmitter.flush') def test_input_unicode_post(self, mok_flush: Any) -> None: @@ -415,8 +415,8 @@ def test_input_unicode_post(self, mok_flush: Any) -> None: ae = AsyncEmitter('0.0.0.0', batch_size=2) ae.input(payload) - self.assertEqual(len(ae.buffer), 1) - self.assertDictEqual(payload, ae.buffer[0]) + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) @mock.patch('snowplow_tracker.Emitter.http_post') def test_send_events_post_retry(self, mok_http_post: Any) -> None: diff --git a/snowplow_tracker/test/unit/test_in_memory_event_store.py b/snowplow_tracker/test/unit/test_in_memory_event_store.py new file mode 100644 index 00000000..90c9e887 --- /dev/null +++ b/snowplow_tracker/test/unit/test_in_memory_event_store.py @@ -0,0 +1,110 @@ +# """ +# test_in_memory_event_store.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ + +import unittest +from snowplow_tracker.event_store import InMemoryEventStore +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class TestInMemoryEventStore(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event_store = InMemoryEventStore(logger) + self.assertEqual(event_store.buffer_capacity, 10000) + self.assertEqual(event_store.event_buffer, []) + + def test_add_event(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + self.assertDictEqual(nvPairs, event_store.event_buffer[0]) + + def test_size(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.size(), 3) + + def test_add_failed_events_to_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPair1 = {"n0": "v0", "n1": "v1"} + nvPair2 = {"n2": "v2", "n3": "v3"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + payload_list = event_store.get_events_batch() + + event_store.cleanup(payload_list, True) + + self.assertEqual(event_store.event_buffer, payload_list) + + def test_remove_success_events_from_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + payload_list = event_store.get_events_batch() + event_store.cleanup(payload_list, False) + + self.assertEqual(event_store.event_buffer, []) + + def test_drop_new_events_buffer_full(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPair1 = {"n0": "v0"} + nvPair2 = {"n1": "v1"} + nvPair3 = {"n2": "v2"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + event_store.add_event(nvPair3) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + def test_get_events(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPairs = {"n0": "v0"} + batch = [nvPairs, nvPairs] + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.get_events_batch(), batch) diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 0215bf16..3a6ee7cd 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -311,7 +311,9 @@ def track_link_click( "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_add_to_cart( self, @@ -364,7 +366,9 @@ def track_add_to_cart( "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_remove_from_cart( self, @@ -417,7 +421,9 @@ def track_remove_from_cart( "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_form_change( self, @@ -471,7 +477,9 @@ def track_form_change( "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_form_submit( self, @@ -512,7 +520,9 @@ def track_form_submit( "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_site_search( self, @@ -556,7 +566,9 @@ def track_site_search( "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_ecommerce_transaction_item( self, @@ -718,7 +730,9 @@ def track_screen_view( screen_view_properties, ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_struct_event( self, @@ -817,9 +831,15 @@ def track_unstruct_event( :param event_subject: Optional per event subject :type event_subject: subject | None :rtype: tracker - """ - warn('track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.', DeprecationWarning, stacklevel=2) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + """ + warn( + "track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.", + DeprecationWarning, + stacklevel=2, + ) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def flush(self, is_async: bool = False) -> "Tracker": """ From 6cf399ae4eacff1a209d493fb20584be5657b557 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Mon, 23 Jan 2023 09:45:40 +0000 Subject: [PATCH 15/51] Refactor ScreenView using a single schema for all the trackers (#close 303) PR #311 * Add deprecation warning to track_screen_view() * Add track_mobile_screen_view() * Add mobile screen schema --- .../test/integration/test_integration.py | 18 ++--- snowplow_tracker/test/unit/test_tracker.py | 37 +++++++++- snowplow_tracker/tracker.py | 70 +++++++++++++++++++ 3 files changed, 114 insertions(+), 11 deletions(-) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index b9ec214d..7e931388 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -125,10 +125,10 @@ def test_integration_ecommerce_transaction(self) -> None: self.assertEqual(from_querystring("ttm", querystrings[-3]), from_querystring("ttm", querystrings[-2])) - def test_integration_screen_view(self) -> None: + def test_integration_mobile_screen_view(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): - t.track_screen_view("Game HUD 2", id_="534") + t.track_mobile_screen_view("534", "Game HUD 2") expected_fields = {"e": "ue"} for key in expected_fields: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) @@ -137,10 +137,10 @@ def test_integration_screen_view(self) -> None: self.assertEqual(envelope, { "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data": { - "schema": "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0", + "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", "data": { - "name": "Game HUD 2", - "id": "534" + "id": "534", + "name": "Game HUD 2" } } }) @@ -380,7 +380,7 @@ def test_unicode_get(self) -> None: test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) with HTTMock(pass_response_content): t.track_page_view(unicode_b, context=[test_ctx]) - t.track_screen_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) url_string = unquote_plus(from_querystring("url", querystrings[-2])) try: @@ -394,7 +394,7 @@ def test_unicode_get(self) -> None: self.assertEqual(actual_a, unicode_a) uepr_string = unquote_plus(from_querystring("ue_pr", querystrings[-1])) - actual_b = json.loads(uepr_string)['data']['data']['name'] + actual_b = json.loads(uepr_string)['data']['data']['id'] self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: @@ -404,7 +404,7 @@ def test_unicode_post(self) -> None: test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) with HTTMock(pass_post_response_content): t.track_page_view(unicode_b, context=[test_ctx]) - t.track_screen_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) pv_event = querystrings[-2] self.assertEqual(pv_event['data'][0]['url'], unicode_b) @@ -413,5 +413,5 @@ def test_unicode_post(self) -> None: self.assertEqual(in_test_ctx, unicode_a) sv_event = querystrings[-1] - in_uepr_name = json.loads(sv_event['data'][0]['ue_pr'])['data']['data']['name'] + in_uepr_name = json.loads(sv_event['data'][0]['ue_pr'])['data']['data']['id'] self.assertEqual(in_uepr_name, unicode_b) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 5f361ae8..91a2eff5 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -44,6 +44,9 @@ FORM_CHANGE_SCHEMA = "iglu:com.snowplowanalytics.snowplow/change_form/jsonschema/1-0-0" FORM_SUBMIT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/submit_form/jsonschema/1-0-0" SITE_SEARCH_SCHEMA = "iglu:com.snowplowanalytics.snowplow/site_search/jsonschema/1-0-0" +MOBILE_SCREEN_VIEW_SCHEMA = ( + "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0" +) SCREEN_VIEW_SCHEMA = "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0" # helpers @@ -474,7 +477,9 @@ def test_track_self_describing_event(self, mok_complete_payload: Any) -> None: self.assertTrue(actualTstampArg is None) @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_self_describing_event_all_args(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event_all_args( + self, mok_complete_payload: Any + ) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -510,7 +515,9 @@ def test_track_self_describing_event_all_args(self, mok_complete_payload: Any) - self.assertEqual(actualTstampArg, evTstamp) @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_self_describing_event_encode(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event_encode( + self, mok_complete_payload: Any + ) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1286,6 +1293,32 @@ def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_mobile_screen_view(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker(e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_mobile_screen_view( + "screenId", "screenName", context=[ctx], tstamp=evTstamp + ) + + expected = { + "schema": MOBILE_SCREEN_VIEW_SCHEMA, + "data": {"name": "screenName", "id": "screenId"}, + } + + callArgs = mok_track_unstruct.call_args_list[0][0] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs[0].to_json(), expected) + self.assertIs(callArgs[1][0], ctx) + self.assertEqual(callArgs[2], evTstamp) + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_screen_view(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 3a6ee7cd..7791b103 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -44,6 +44,7 @@ VERSION = "py-%s" % _version.__version__ DEFAULT_ENCODE_BASE64 = True BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" +MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" SCHEMA_TAG = "jsonschema" CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) @@ -719,6 +720,11 @@ def track_screen_view( :type event_subject: subject | None :rtype: tracker """ + warn( + "track_screen_view will be deprecated in future versions. Please use track_mobile_screen_view.", + DeprecationWarning, + stacklevel=2, + ) screen_view_properties = {} if name is not None: screen_view_properties["name"] = name @@ -734,6 +740,70 @@ def track_screen_view( event_json, context, tstamp, event_subject ) + def track_mobile_screen_view( + self, + id_: Optional[str] = None, + name: Optional[str] = None, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param id_: Screen view ID. This must be of type UUID. + :type id_: string | None + :param name: The name of the screen view event + :type name: string_or_none + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker + """ + screen_view_properties = {} + + if id_ is None: + id_ = self.get_uuid() + + screen_view_properties["id"] = id_ + + if name is not None: + screen_view_properties["name"] = name + if type is not None: + screen_view_properties["type"] = type + if previous_name is not None: + screen_view_properties["previousName"] = previous_name + if previous_id is not None: + screen_view_properties["previousId"] = previous_id + if previous_type is not None: + screen_view_properties["previousType"] = previous_type + if transition_type is not None: + screen_view_properties["transitionType"] = transition_type + + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (MOBILE_SCHEMA_PATH, SCHEMA_TAG), + screen_view_properties, + ) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) + def track_struct_event( self, category: str, From 1771798f0046243e93886ca28e2d827d4cab9d2e Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Mon, 23 Jan 2023 10:32:59 +0000 Subject: [PATCH 16/51] Bump copyright to 2023 (close #313) PR #314 * Update copyright notices for 2023 * Remove author list from copyright notice --- LICENSE | 2 +- README.md | 2 +- docs/source/conf.py | 2 +- setup.py | 8 +- snowplow_tracker/_version.py | 6 +- snowplow_tracker/celery/celery_emitter.py | 43 +- snowplow_tracker/contracts.py | 6 +- snowplow_tracker/emitter_configuration.py | 6 +- snowplow_tracker/emitters.py | 7 +- snowplow_tracker/event_store.py | 6 +- snowplow_tracker/payload.py | 6 +- snowplow_tracker/redis/redis_emitter.py | 35 +- snowplow_tracker/redis/redis_worker.py | 33 +- snowplow_tracker/self_describing_json.py | 6 +- snowplow_tracker/snowplow.py | 7 +- snowplow_tracker/subject.py | 6 +- .../test/integration/test_integration.py | 423 +++++++++++++----- snowplow_tracker/test/unit/test_contracts.py | 57 +-- snowplow_tracker/test/unit/test_emitters.py | 257 +++++++---- .../test/unit/test_in_memory_event_store.py | 6 +- snowplow_tracker/test/unit/test_payload.py | 70 +-- snowplow_tracker/test/unit/test_subject.py | 15 +- snowplow_tracker/test/unit/test_tracker.py | 6 +- snowplow_tracker/tracker.py | 6 +- snowplow_tracker/tracker_configuration.py | 6 +- snowplow_tracker/typing.py | 6 +- 26 files changed, 610 insertions(+), 423 deletions(-) diff --git a/LICENSE b/LICENSE index ec02d7cc..db047f7e 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2013-2022 Snowplow Analytics Ltd. + Copyright 2013-2023 Snowplow Analytics Ltd. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 114aecb7..16fd5b51 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ Assuming [docker](https://www.docker.com/) is installed Copyright and license --------------------- -The Snowplow Python Tracker is copyright 2013-2022 Snowplow Analytics +The Snowplow Python Tracker is copyright 2013-2023 Snowplow Analytics Ltd. Licensed under the [Apache License, Version diff --git a/docs/source/conf.py b/docs/source/conf.py index d906b8be..f18cc0eb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,7 +24,7 @@ # -- Project information ----------------------------------------------------- project = 'Snowplow Python Tracker' -copyright = '2022, Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' +copyright = "2023, Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene" author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags diff --git a/setup.py b/setup.py index cbb9e7cc..e12f7835 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ # # setup.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,11 +13,7 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 -# +# """ #!/usr/bin/env python # -*- coding: utf-8 -*- diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index d77b9f6d..aa7e4852 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -1,7 +1,7 @@ # """ # _version.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ __version_info__ = (0, 12, 0) diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py index 682ee24b..bb1a1b96 100644 --- a/snowplow_tracker/celery/celery_emitter.py +++ b/snowplow_tracker/celery/celery_emitter.py @@ -1,7 +1,7 @@ # """ # celery_emitter.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import logging @@ -39,27 +35,32 @@ class CeleryEmitter(Emitter): """ - Uses a Celery worker to send HTTP requests asynchronously. - Works like the base Emitter class, - but on_success and on_failure callbacks cannot be set. + Uses a Celery worker to send HTTP requests asynchronously. + Works like the base Emitter class, + but on_success and on_failure callbacks cannot be set. """ + if _CELERY_OPT: celery_app = None def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "post", - batch_size: Optional[int] = None, - byte_limit: Optional[int] = None) -> None: - super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, batch_size, None, None, byte_limit) + self, + endpoint: str, + protocol: HttpProtocol = "http", + port: Optional[int] = None, + method: Method = "post", + batch_size: Optional[int] = None, + byte_limit: Optional[int] = None, + ) -> None: + super(CeleryEmitter, self).__init__( + endpoint, protocol, port, method, batch_size, None, None, byte_limit + ) try: # Check whether a custom Celery configuration module named "snowplow_celery_config" exists import snowplow_celery_config + self.celery_app = Celery() self.celery_app.config_from_object(snowplow_celery_config) except ImportError: @@ -80,6 +81,10 @@ def async_flush(self) -> None: else: - def __new__(cls, *args: Any, **kwargs: Any) -> 'CeleryEmitter': - logger.error("CeleryEmitter is not available. Please install snowplow-tracker with celery extra dependency.") - raise RuntimeError('CeleryEmitter is not available. To use: `pip install snowplow-tracker[celery]`') + def __new__(cls, *args: Any, **kwargs: Any) -> "CeleryEmitter": + logger.error( + "CeleryEmitter is not available. Please install snowplow-tracker with celery extra dependency." + ) + raise RuntimeError( + "CeleryEmitter is not available. To use: `pip install snowplow-tracker[celery]`" + ) diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py index 7a9253af..c54ac668 100644 --- a/snowplow_tracker/contracts.py +++ b/snowplow_tracker/contracts.py @@ -1,7 +1,7 @@ # """ # contracts.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock, Matus Tomlein -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import traceback diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 9b636d02..87fa6c9e 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -1,7 +1,7 @@ # """ # emitter_configuration.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing import Optional, Union, Tuple, Dict diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index d6ac1ef2..760fc398 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -1,7 +1,7 @@ # """ # emitters.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,13 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ - import logging import time import threading diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py index ad47ac5c..898f92ff 100644 --- a/snowplow_tracker/event_store.py +++ b/snowplow_tracker/event_store.py @@ -1,7 +1,7 @@ # """ # event_store.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing_extensions import Protocol diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index aacc6243..26e3262c 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -1,7 +1,7 @@ # """ # payload.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json diff --git a/snowplow_tracker/redis/redis_emitter.py b/snowplow_tracker/redis/redis_emitter.py index a24ee50b..bb4cf5f4 100644 --- a/snowplow_tracker/redis/redis_emitter.py +++ b/snowplow_tracker/redis/redis_emitter.py @@ -1,7 +1,7 @@ # """ # redis_emitter.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json @@ -38,16 +34,19 @@ class RedisEmitter(object): """ - Sends Snowplow events to a Redis database + Sends Snowplow events to a Redis database """ + if _REDIS_OPT: - def __init__(self, rdb: Optional[RedisProtocol] = None, key: str = "snowplow") -> None: + def __init__( + self, rdb: Optional[RedisProtocol] = None, key: str = "snowplow" + ) -> None: """ - :param rdb: Optional custom Redis database - :type rdb: redis | None - :param key: The Redis key for the list of events - :type key: string + :param rdb: Optional custom Redis database + :type rdb: redis | None + :param key: The Redis key for the list of events + :type key: string """ if rdb is None: rdb = redis.StrictRedis() @@ -57,8 +56,8 @@ def __init__(self, rdb: Optional[RedisProtocol] = None, key: str = "snowplow") - def input(self, payload: PayloadDict) -> None: """ - :param payload: The event properties - :type payload: dict(string:*) + :param payload: The event properties + :type payload: dict(string:*) """ logger.debug("Pushing event to Redis queue...") self.rdb.rpush(self.key, json.dumps(payload)) @@ -72,6 +71,10 @@ def sync_flush(self) -> None: else: - def __new__(cls, *args: Any, **kwargs: Any) -> 'RedisEmitter': - logger.error("RedisEmitter is not available. Please install snowplow-tracker with redis extra dependency.") - raise RuntimeError('RedisEmitter is not available. To use: `pip install snowplow-tracker[redis]`') + def __new__(cls, *args: Any, **kwargs: Any) -> "RedisEmitter": + logger.error( + "RedisEmitter is not available. Please install snowplow-tracker with redis extra dependency." + ) + raise RuntimeError( + "RedisEmitter is not available. To use: `pip install snowplow-tracker[redis]`" + ) diff --git a/snowplow_tracker/redis/redis_worker.py b/snowplow_tracker/redis/redis_worker.py index 3f1e9f51..e0e9353f 100644 --- a/snowplow_tracker/redis/redis_worker.py +++ b/snowplow_tracker/redis/redis_worker.py @@ -1,7 +1,7 @@ # """ # redis_worker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,13 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ - import json import signal from typing import Any, Optional @@ -39,11 +34,17 @@ class RedisWorker(object): """ - Asynchronously take events from redis and send them to an emitter + Asynchronously take events from redis and send them to an emitter """ + if _REDIS_OPT: - def __init__(self, emitter: EmitterProtocol, rdb: Optional[RedisProtocol] = None, key: str = DEFAULT_KEY) -> None: + def __init__( + self, + emitter: EmitterProtocol, + rdb: Optional[RedisProtocol] = None, + key: str = DEFAULT_KEY, + ) -> None: self.emitter = emitter self.key = key if rdb is None: @@ -57,14 +58,14 @@ def __init__(self, emitter: EmitterProtocol, rdb: Optional[RedisProtocol] = None def send(self, payload: PayloadDict) -> None: """ - Send an event to an emitter + Send an event to an emitter """ self.emitter.input(payload) def pop_payload(self) -> None: """ - Get a single event from Redis and send it - If the Redis queue is empty, sleep to avoid making continual requests + Get a single event from Redis and send it + If the Redis queue is empty, sleep to avoid making continual requests """ payload = self.rdb.lpop(self.key) if payload: @@ -74,7 +75,7 @@ def pop_payload(self) -> None: def run(self) -> None: """ - Run indefinitely + Run indefinitely """ self._shutdown = False @@ -84,11 +85,13 @@ def run(self) -> None: def request_shutdown(self, *args: Any) -> None: """ - Halt the worker + Halt the worker """ self._shutdown = True else: - def __new__(cls, *args: Any, **kwargs: Any) -> 'RedisWorker': - raise RuntimeError('RedisWorker is not available. To use: `pip install snowplow-tracker[redis]`') + def __new__(cls, *args: Any, **kwargs: Any) -> "RedisWorker": + raise RuntimeError( + "RedisWorker is not available. To use: `pip install snowplow-tracker[redis]`" + ) diff --git a/snowplow_tracker/self_describing_json.py b/snowplow_tracker/self_describing_json.py index 538408ca..b386c039 100644 --- a/snowplow_tracker/self_describing_json.py +++ b/snowplow_tracker/self_describing_json.py @@ -1,7 +1,7 @@ # """ # self_describing_json.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 88f1e878..92e96461 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -1,7 +1,7 @@ # """ # snowplow.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,11 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ + import logging from typing import Optional from snowplow_tracker import ( diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index b5f3de83..10bcbe2a 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -1,7 +1,7 @@ # """ # subject.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from snowplow_tracker.contracts import one_of, greater_than diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 7e931388..7758a266 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -1,7 +1,7 @@ # """ # test_integration.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest @@ -38,7 +34,7 @@ default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=1) -get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get') +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method="get") default_subject = subject.Subject() @@ -53,77 +49,113 @@ def from_querystring(field: str, url: str) -> Optional[str]: @all_requests def pass_response_content(url: str, request: Any) -> Dict[str, Any]: querystrings.append(request.url) - return { - "url": request.url, - "status_code": 200 - } + return {"url": request.url, "status_code": 200} @all_requests def pass_post_response_content(url: str, request: Any) -> Dict[str, Any]: querystrings.append(json.loads(request.body)) - return { - "url": request.url, - "status_code": 200 - } + return {"url": request.url, "status_code": 200} @all_requests def fail_response_content(url: str, request: Any) -> Dict[str, Any]: - return { - "url": request.url, - "status_code": 501 - } + return {"url": request.url, "status_code": 501} class IntegrationTest(unittest.TestCase): - def test_integration_page_view(self) -> None: t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_page_view("http://savethearctic.org", "Save The Arctic", "http://referrer.com") - expected_fields = {"e": "pv", "page": "Save+The+Arctic", "url": "http%3A%2F%2Fsavethearctic.org", "refr": "http%3A%2F%2Freferrer.com"} + t.track_page_view( + "http://savethearctic.org", "Save The Arctic", "http://referrer.com" + ) + expected_fields = { + "e": "pv", + "page": "Save+The+Arctic", + "url": "http%3A%2F%2Fsavethearctic.org", + "refr": "http%3A%2F%2Freferrer.com", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_ecommerce_transaction_item(self) -> None: t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_ecommerce_transaction_item("12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP") - expected_fields = {"ti_ca": "tarot", "ti_id": "12345", "ti_qu": "2", "ti_sk": "pbz0025", "e": "ti", "ti_nm": "black-tarot", "ti_pr": "7.99", "ti_cu": "GBP"} + t.track_ecommerce_transaction_item( + "12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP" + ) + expected_fields = { + "ti_ca": "tarot", + "ti_id": "12345", + "ti_qu": "2", + "ti_sk": "pbz0025", + "e": "ti", + "ti_nm": "black-tarot", + "ti_pr": "7.99", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_ecommerce_transaction(self) -> None: t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction( - "6a8078be", 35, city="London", currency="GBP", + "6a8078be", + 35, + city="London", + currency="GBP", items=[ - { - "sku": "pbz0026", - "price": 20, - "quantity": 1 - }, - { - "sku": "pbz0038", - "price": 15, - "quantity": 1 - }]) + {"sku": "pbz0026", "price": 20, "quantity": 1}, + {"sku": "pbz0038", "price": 15, "quantity": 1}, + ], + ) - expected_fields = {"e": "tr", "tr_id": "6a8078be", "tr_tt": "35", "tr_ci": "London", "tr_cu": "GBP"} + expected_fields = { + "e": "tr", + "tr_id": "6a8078be", + "tr_tt": "35", + "tr_ci": "London", + "tr_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-3]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-3]), expected_fields[key] + ) - expected_fields = {"e": "ti", "ti_id": "6a8078be", "ti_sk": "pbz0026", "ti_pr": "20", "ti_cu": "GBP"} + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0026", + "ti_pr": "20", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-2]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-2]), expected_fields[key] + ) - expected_fields = {"e": "ti", "ti_id": "6a8078be", "ti_sk": "pbz0038", "ti_pr": "15", "ti_cu": "GBP"} + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0038", + "ti_pr": "15", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) - self.assertEqual(from_querystring("ttm", querystrings[-3]), from_querystring("ttm", querystrings[-2])) + self.assertEqual( + from_querystring("ttm", querystrings[-3]), + from_querystring("ttm", querystrings[-2]), + ) def test_integration_mobile_screen_view(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) @@ -131,77 +163,168 @@ def test_integration_mobile_screen_view(self) -> None: t.track_mobile_screen_view("534", "Game HUD 2") expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = from_querystring("ue_pr", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": { - "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data": { - "id": "534", - "name": "Game HUD 2" - } - } - }) + "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", + "data": {"id": "534", "name": "Game HUD 2"}, + }, + }, + ) def test_integration_struct_event(self) -> None: t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_struct_event("Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99) - expected_fields = {"se_ca": "Ecomm", "se_pr": "hd", "se_la": "dog-skateboarding-video", "se_va": "13.99", "se_ac": "add-to-basket", "e": "se"} + t.track_struct_event( + "Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99 + ) + expected_fields = { + "se_ca": "Ecomm", + "se_pr": "hd", + "se_la": "dog-skateboarding-video", + "se_va": "13.99", + "se_ac": "add-to-basket", + "e": "se", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_self_describing_event_non_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): - t.track_self_describing_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = from_querystring("ue_pr", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} - }) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) def test_integration_self_describing_event_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): - t.track_self_describing_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = unquote_plus(from_querystring("ue_px", querystrings[-1])) - envelope = json.loads((base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode("utf-8")) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} - }) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) def test_integration_context_non_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): - t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) envelope_string = from_querystring("co", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", - "data": [{"schema": "iglu:com.example/user/jsonschema/2-0-3", "data": {"user_type": "tester"}}] - }) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) def test_integration_context_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): - t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) envelope_string = unquote_plus(from_querystring("cx", querystrings[-1])) - envelope = json.loads((base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode("utf-8")) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", - "data": [{"schema": "iglu:com.example/user/jsonschema/2-0-3", "data": {"user_type": "tester"}}] - }) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) def test_integration_standard_nv_pairs(self) -> None: s = subject.Subject() @@ -212,14 +335,28 @@ def test_integration_standard_nv_pairs(self) -> None: s.set_timezone("Europe London") s.set_lang("en") - t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker( + [emitters.Emitter("localhost", method="get")], + s, + "cf", + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") - expected_fields = {"tna": "cf", "res": "100x200", - "lang": "en", "aid": "angry-birds-android", "cd": "24", "tz": "Europe+London", - "p": "mob", "tv": "py-" + _version.__version__} + expected_fields = { + "tna": "cf", + "res": "100x200", + "lang": "en", + "aid": "angry-birds-android", + "cd": "24", + "tz": "Europe+London", + "p": "mob", + "tv": "py-" + _version.__version__, + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) self.assertIsNotNone(from_querystring("eid", querystrings[-1])) self.assertIsNotNone(from_querystring("dtm", querystrings[-1])) @@ -229,10 +366,17 @@ def test_integration_identification_methods(self) -> None: s.set_domain_session_id("59ed13b1a5724dae") s.set_domain_session_index(1) s.set_ip_address("255.255.255.255") - s.set_useragent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)") + s.set_useragent( + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)" + ) s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") - t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker( + [emitters.Emitter("localhost", method="get")], + s, + "cf", + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = { @@ -241,30 +385,39 @@ def test_integration_identification_methods(self) -> None: "vid": "1", "ip": "255.255.255.255", "ua": "Mozilla%2F5.0+%28compatible%3B+MSIE+9.0%3B+Windows+NT+6.0%3B+Trident%2F5.0%29", - "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5" + "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5", } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_event_subject(self) -> None: s = subject.Subject() s.set_domain_user_id("4616bfb38f872d16") s.set_lang("ES") - t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") - evSubject = subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") + t = tracker.Tracker( + [emitters.Emitter("localhost", method="get")], + s, + "cf", + app_id="angry-birds-android", + ) + evSubject = ( + subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", event_subject=evSubject) - expected_fields = { - "duid": "1111aaa11a111a11", - "lang": "EN" - } + expected_fields = {"duid": "1111aaa11a111a11", "lang": "EN"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_redis_default(self) -> None: try: import fakeredis + r = fakeredis.FakeStrictRedis() t = tracker.Tracker([redis_emitter.RedisEmitter(rdb=r)], default_subject) t.track_page_view("http://www.example.com") @@ -278,8 +431,11 @@ def test_integration_redis_default(self) -> None: def test_integration_redis_custom(self) -> None: try: import fakeredis + r = fakeredis.FakeStrictRedis() - t = tracker.Tracker([redis_emitter.RedisEmitter(rdb=r, key="custom_key")], default_subject) + t = tracker.Tracker( + [redis_emitter.RedisEmitter(rdb=r, key="custom_key")], default_subject + ) t.track_page_view("http://www.example.com") event_string = r.rpop("custom_key") event_dict = json.loads(event_string.decode("utf-8")) @@ -293,9 +449,10 @@ def test_integration_success_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", - method='get', + method="get", on_success=lambda x: callback_success_queue.append(x), - on_failure=lambda x, y: callback_failure_queue.append(x)) + on_failure=lambda x, y: callback_failure_queue.append(x), + ) t = tracker.Tracker([callback_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view("http://www.example.com") @@ -313,9 +470,10 @@ def test_integration_failure_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", - method='get', + method="get", on_success=lambda x: callback_success_queue.append(x), - on_failure=lambda x, y: callback_failure_queue.append(x)) + on_failure=lambda x, y: callback_failure_queue.append(x), + ) t = tracker.Tracker([callback_emitter], default_subject) with HTTMock(fail_response_content): t.track_page_view("http://www.example.com") @@ -328,12 +486,17 @@ def test_post_page_view(self) -> None: t.track_page_view("localhost", "local host", None) expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} request = querystrings[-1] - self.assertEqual(request["schema"], "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4") + self.assertEqual( + request["schema"], + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4", + ) for key in expected_fields: self.assertEqual(request["data"][0][key], expected_fields[key]) def test_post_batched(self) -> None: - default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=2) + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=2 + ) t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") @@ -353,65 +516,77 @@ def test_timestamps(self) -> None: expected_timestamps = [ {"dtm": "1618790401000", "ttm": None, "stm": "1618790401000"}, {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, - {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"} + {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, ] request = querystrings[-1] for i, event in enumerate(expected_timestamps): - self.assertEqual(request["data"][i].get("dtm"), expected_timestamps[i]["dtm"]) - self.assertEqual(request["data"][i].get("ttm"), expected_timestamps[i]["ttm"]) - self.assertEqual(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) + self.assertEqual( + request["data"][i].get("dtm"), expected_timestamps[i]["dtm"] + ) + self.assertEqual( + request["data"][i].get("ttm"), expected_timestamps[i]["ttm"] + ) + self.assertEqual( + request["data"][i].get("stm"), expected_timestamps[i]["stm"] + ) def test_bytelimit(self) -> None: - default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=5, byte_limit=420) + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=5, byte_limit=420 + ) t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): - t.track_struct_event("Test", "A") # 140 bytes - t.track_struct_event("Test", "A") # 280 bytes - t.track_struct_event("Test", "A") # 420 bytes. Send - t.track_struct_event("Test", "AA") # 141 + t.track_struct_event("Test", "A") # 140 bytes + t.track_struct_event("Test", "A") # 280 bytes + t.track_struct_event("Test", "A") # 420 bytes. Send + t.track_struct_event("Test", "AA") # 141 self.assertEqual(len(querystrings[-1]["data"]), 3) self.assertEqual(default_emitter.bytes_queued, 136 + len(_version.__version__)) def test_unicode_get(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) - unicode_a = u'\u0107' - unicode_b = u'test.\u0107om' - test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) with HTTMock(pass_response_content): t.track_page_view(unicode_b, context=[test_ctx]) t.track_mobile_screen_view(unicode_b, context=[test_ctx]) url_string = unquote_plus(from_querystring("url", querystrings[-2])) try: - self.assertEqual(url_string.decode('utf-8'), unicode_b) + self.assertEqual(url_string.decode("utf-8"), unicode_b) except AttributeError: # in python 3: str type contains unicode (so no 'decode') self.assertEqual(url_string, unicode_b) context_string = unquote_plus(from_querystring("co", querystrings[-1])) - actual_a = json.loads(context_string)['data'][0]['data']['test'] + actual_a = json.loads(context_string)["data"][0]["data"]["test"] self.assertEqual(actual_a, unicode_a) uepr_string = unquote_plus(from_querystring("ue_pr", querystrings[-1])) - actual_b = json.loads(uepr_string)['data']['data']['id'] + actual_b = json.loads(uepr_string)["data"]["data"]["id"] self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) - unicode_a = u'\u0107' - unicode_b = u'test.\u0107om' - test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) with HTTMock(pass_post_response_content): t.track_page_view(unicode_b, context=[test_ctx]) t.track_mobile_screen_view(unicode_b, context=[test_ctx]) pv_event = querystrings[-2] - self.assertEqual(pv_event['data'][0]['url'], unicode_b) + self.assertEqual(pv_event["data"][0]["url"], unicode_b) - in_test_ctx = json.loads(pv_event['data'][0]['co'])['data'][0]['data']['test'] + in_test_ctx = json.loads(pv_event["data"][0]["co"])["data"][0]["data"]["test"] self.assertEqual(in_test_ctx, unicode_a) sv_event = querystrings[-1] - in_uepr_name = json.loads(sv_event['data'][0]['ue_pr'])['data']['data']['id'] + in_uepr_name = json.loads(sv_event["data"][0]["ue_pr"])["data"]["data"]["id"] self.assertEqual(in_uepr_name, unicode_b) diff --git a/snowplow_tracker/test/unit/test_contracts.py b/snowplow_tracker/test/unit/test_contracts.py index 98c4a154..9a913bca 100644 --- a/snowplow_tracker/test/unit/test_contracts.py +++ b/snowplow_tracker/test/unit/test_contracts.py @@ -1,7 +1,7 @@ # """ # test_tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,19 +13,21 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest -from snowplow_tracker.contracts import form_element, greater_than, non_empty, non_empty_string, one_of, satisfies +from snowplow_tracker.contracts import ( + form_element, + greater_than, + non_empty, + non_empty_string, + one_of, + satisfies, +) class TestContracts(unittest.TestCase): - def setUp(self) -> None: pass @@ -37,25 +39,25 @@ def test_greater_than_fails(self) -> None: greater_than(0, 10) def test_non_empty_succeeds(self) -> None: - non_empty(['something']) + non_empty(["something"]) def test_non_empty_fails(self) -> None: with self.assertRaises(ValueError): non_empty([]) def test_non_empty_string_succeeds(self) -> None: - non_empty_string('something') + non_empty_string("something") def test_non_empty_string_fails(self) -> None: with self.assertRaises(ValueError): - non_empty_string('') + non_empty_string("") def test_one_of_succeeds(self) -> None: - one_of('something', ['something', 'something else']) + one_of("something", ["something", "something else"]) def test_one_of_fails(self) -> None: with self.assertRaises(ValueError): - one_of('something', ['something else']) + one_of("something", ["something else"]) def test_satisfies_succeeds(self) -> None: satisfies(10, lambda v: v == 10) @@ -65,11 +67,7 @@ def test_satisfies_fails(self) -> None: satisfies(0, lambda v: v == 10) def test_form_element_no_type(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue", - "nodeName": "INPUT" - } + elem = {"name": "elemName", "value": "elemValue", "nodeName": "INPUT"} form_element(elem) def test_form_element_type_valid(self) -> None: @@ -77,7 +75,7 @@ def test_form_element_type_valid(self) -> None: "name": "elemName", "value": "elemValue", "nodeName": "TEXTAREA", - "type": "button" + "type": "button", } form_element(elem) @@ -86,40 +84,27 @@ def test_form_element_type_invalid(self) -> None: "name": "elemName", "value": "elemValue", "nodeName": "SELECT", - "type": "invalid" + "type": "invalid", } with self.assertRaises(ValueError): form_element(elem) def test_form_element_nodename_invalid(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue", - "nodeName": "invalid" - } + elem = {"name": "elemName", "value": "elemValue", "nodeName": "invalid"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_nodename(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue" - } + elem = {"name": "elemName", "value": "elemValue"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_value(self) -> None: - elem = { - "name": "elemName", - "nodeName": "INPUT" - } + elem = {"name": "elemName", "nodeName": "INPUT"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_name(self) -> None: - elem = { - "value": "elemValue", - "nodeName": "INPUT" - } + elem = {"value": "elemValue", "nodeName": "INPUT"} with self.assertRaises(ValueError): form_element(elem) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index c52eb0b2..f02be943 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -1,7 +1,7 @@ # """ # test_emitters.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,13 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ - import time import unittest import unittest.mock as mock @@ -46,24 +41,29 @@ def mocked_http_success(*args: Any) -> bool: def mocked_http_failure(*args: Any) -> bool: return False + def mocked_http_response_success(*args: Any) -> int: return 200 + def mocked_http_response_failure(*args: Any) -> int: return 400 + def mocked_http_response_failure_retry(*args: Any) -> int: return 500 -class TestEmitters(unittest.TestCase): +class TestEmitters(unittest.TestCase): def setUp(self) -> None: pass def test_init(self) -> None: - e = Emitter('0.0.0.0') - self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') - self.assertEqual(e.method, 'post') + e = Emitter("0.0.0.0") + self.assertEqual( + e.endpoint, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2" + ) + self.assertEqual(e.method, "post") self.assertEqual(e.batch_size, 10) self.assertEqual(e.event_store.event_buffer, []) self.assertIsNone(e.byte_limit) @@ -74,40 +74,40 @@ def test_init(self) -> None: self.assertIsNone(e.request_timeout) def test_init_batch_size(self) -> None: - e = Emitter('0.0.0.0', batch_size=10) + e = Emitter("0.0.0.0", batch_size=10) self.assertEqual(e.batch_size, 10) def test_init_post(self) -> None: - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") self.assertEqual(e.batch_size, DEFAULT_MAX_LENGTH) def test_init_byte_limit(self) -> None: - e = Emitter('0.0.0.0', byte_limit=512) + e = Emitter("0.0.0.0", byte_limit=512) self.assertEqual(e.bytes_queued, 0) def test_init_requests_timeout(self) -> None: - e = Emitter('0.0.0.0', request_timeout=(2.5, 5)) + e = Emitter("0.0.0.0", request_timeout=(2.5, 5)) self.assertEqual(e.request_timeout, (2.5, 5)) def test_as_collector_uri(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0') - self.assertEqual(uri, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + uri = Emitter.as_collector_uri("0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_get(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', method='get') - self.assertEqual(uri, 'https://0.0.0.0/i') + uri = Emitter.as_collector_uri("0.0.0.0", method="get") + self.assertEqual(uri, "https://0.0.0.0/i") def test_as_collector_uri_port(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', port=9090) - self.assertEqual(uri, 'https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') + uri = Emitter.as_collector_uri("0.0.0.0", port=9090) + self.assertEqual(uri, "https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_http(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', protocol="http") - self.assertEqual(uri, 'http://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + uri = Emitter.as_collector_uri("0.0.0.0", protocol="http") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_empty_string(self) -> None: with self.assertRaises(ValueError): - Emitter.as_collector_uri('') + Emitter.as_collector_uri("") def test_as_collector_uri_endpoint_protocol(self) -> None: uri = Emitter.as_collector_uri("https://0.0.0.0") @@ -116,12 +116,12 @@ def test_as_collector_uri_endpoint_protocol(self) -> None: def test_as_collector_uri_endpoint_protocol_http(self) -> None: uri = Emitter.as_collector_uri("http://0.0.0.0") self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") - - @mock.patch('snowplow_tracker.Emitter.flush') + + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_no_flush(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", batch_size=2) + e = Emitter("0.0.0.0", method="get", batch_size=2) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -131,11 +131,11 @@ def test_input_no_flush(self, mok_flush: Any) -> None: self.assertFalse(e.reached_limit()) mok_flush.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_flush_byte_limit(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=16) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=16) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -144,11 +144,11 @@ def test_input_flush_byte_limit(self, mok_flush: Any) -> None: self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_flush_buffer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=1024) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -163,11 +163,11 @@ def test_input_flush_buffer(self, mok_flush: Any) -> None: self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_bytes_queued(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=1024) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -177,21 +177,23 @@ def test_input_bytes_queued(self, mok_flush: Any) -> None: e.input(nvPairs) self.assertEqual(e.bytes_queued, 48) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_bytes_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") nvPairs = {"testString": "test", "testNum": 2.72} e.input(nvPairs) - self.assertEqual(e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}]) + self.assertEqual( + e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}] + ) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_flush(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_http_response_success - e = Emitter('0.0.0.0', batch_size=2, byte_limit=None) + e = Emitter("0.0.0.0", batch_size=2, byte_limit=None) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) @@ -199,11 +201,11 @@ def test_flush(self, mok_send_events: Any) -> None: self.assertEqual(mok_send_events.call_count, 1) self.assertEqual(len(e.event_store.event_buffer), 0) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_flush_bytes_queued(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_http_response_success - e = Emitter('0.0.0.0', batch_size=2, byte_limit=256) + e = Emitter("0.0.0.0", batch_size=2, byte_limit=256) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) @@ -214,7 +216,7 @@ def test_flush_bytes_queued(self, mok_send_events: Any) -> None: @freeze_time("2021-04-14 00:00:02") # unix: 1618358402000 def test_attach_sent_tstamp(self) -> None: - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.attach_sent_timestamp(ev_list) @@ -223,11 +225,11 @@ def test_attach_sent_tstamp(self) -> None: reduced = reduced and "stm" in ev.keys() and ev["stm"] == "1618358402000" self.assertTrue(reduced) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_flush_timer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', batch_size=10) + e = Emitter("0.0.0.0", batch_size=10) ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] for i in ev_list: e.input(i) @@ -237,71 +239,87 @@ def test_flush_timer(self, mok_flush: Any) -> None: time.sleep(5) self.assertGreaterEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_success(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_failure(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_once_with(0, evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_success(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_failure(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_with(0, evBuffer) - @mock.patch('snowplow_tracker.emitters.requests.post') + @mock.patch("snowplow_tracker.emitters.requests.post") def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") response = e.http_post("dummy_string") post_succeeded = Emitter.is_good_status_code(response) self.assertFalse(post_succeeded) - @mock.patch('snowplow_tracker.emitters.requests.post') + @mock.patch("snowplow_tracker.emitters.requests.post") def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0', method='get') + e = Emitter("0.0.0.0", method="get") response = e.http_get({"a": "b"}) get_succeeded = Emitter.is_good_status_code(response) self.assertFalse(get_succeeded) @@ -309,11 +327,13 @@ def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: ### # AsyncEmitter ### - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_async_emitter_input(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", batch_size=3, thread_count=5) + ae = AsyncEmitter( + "0.0.0.0", port=9090, method="get", batch_size=3, thread_count=5 + ) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) @@ -325,11 +345,18 @@ def test_async_emitter_input(self, mok_flush: Any) -> None: ae.input({"c": "cc"}) # meet buffer size self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.AsyncEmitter.send_events') + @mock.patch("snowplow_tracker.AsyncEmitter.send_events") def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", batch_size=3, thread_count=5, byte_limit=1024) + ae = AsyncEmitter( + "0.0.0.0", + port=9090, + method="get", + batch_size=3, + thread_count=5, + byte_limit=1024, + ) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) @@ -343,52 +370,68 @@ def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: self.assertEqual(ae.bytes_queued, 0) self.assertEqual(mok_send_events.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_async_send_events_get_success(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_once_with(0, evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_async_send_events_post_success(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -396,115 +439,147 @@ def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: mok_failure.assert_called_with(0, evBuffer) # Unicode - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_input_unicode_get(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="get", batch_size=2) + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", method="get", batch_size=2) ae.input(payload) self.assertEqual(len(ae.event_store.event_buffer), 1) self.assertDictEqual(payload, ae.event_store.event_buffer[0]) - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_input_unicode_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', batch_size=2) + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", batch_size=2) ae.input(payload) self.assertEqual(len(ae.event_store.event_buffer), 1) self.assertDictEqual(payload, ae.event_store.event_buffer[0]) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_retry(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure_retry mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_http_post.side_effect = mocked_http_response_success time.sleep(5) mok_failure.assert_called_with(0, evBuffer) mok_success.assert_called_with(evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_retry(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure_retry mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_http_get.side_effect = mocked_http_response_success time.sleep(5) mok_failure.assert_called_with(0, evBuffer) mok_success.assert_called_with(evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_no_retry(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_failure.assert_called_once_with(0, evBuffer) mok_success.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_failure.assert_called_once_with(0, evBuffer) mok_success.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_custom_retry(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure, custom_retry_codes={400: True}) + e = Emitter( + "0.0.0.0", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_http_post.side_effect = mocked_http_response_success time.sleep(5) mok_failure.assert_called_with(0, evBuffer) mok_success.assert_called_with(evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_custom_retry(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method='get',batch_size=10, on_success=mok_success, on_failure=mok_failure, custom_retry_codes={400: True}) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_http_get.side_effect = mocked_http_response_success time.sleep(5) mok_failure.assert_called_with(0, evBuffer) mok_success.assert_called_with(evBuffer) - diff --git a/snowplow_tracker/test/unit/test_in_memory_event_store.py b/snowplow_tracker/test/unit/test_in_memory_event_store.py index 90c9e887..93a0c8b2 100644 --- a/snowplow_tracker/test/unit/test_in_memory_event_store.py +++ b/snowplow_tracker/test/unit/test_in_memory_event_store.py @@ -1,7 +1,7 @@ # """ # test_in_memory_event_store.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest diff --git a/snowplow_tracker/test/unit/test_payload.py b/snowplow_tracker/test/unit/test_payload.py index 6d89caa1..c174e8f4 100644 --- a/snowplow_tracker/test/unit/test_payload.py +++ b/snowplow_tracker/test/unit/test_payload.py @@ -1,7 +1,7 @@ # """ # test_payload.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json @@ -55,7 +51,6 @@ def date_encoder(o: Any) -> str: class TestPayload(unittest.TestCase): - def setUp(self) -> None: pass @@ -64,7 +59,12 @@ def test_object_generation(self) -> None: self.assertDictEqual({}, p.nv_pairs) def test_object_generation_2(self) -> None: - p = payload.Payload({"test1": "result1", "test2": "result2", }) + p = payload.Payload( + { + "test1": "result1", + "test2": "result2", + } + ) output = {"test1": "result1", "test2": "result2"} self.assertDictEqual(output, p.nv_pairs) @@ -72,7 +72,10 @@ def test_add(self) -> None: p = payload.Payload() p.add("name1", "value1") p.add("name2", "value2") - output = {"name1": "value1", "name2": "value2", } + output = { + "name1": "value1", + "name2": "value2", + } self.assertDictEqual(output, p.nv_pairs) def test_add_empty_val(self) -> None: @@ -88,51 +91,58 @@ def test_add_none(self) -> None: self.assertDictEqual(output, p.nv_pairs) def test_add_dict(self) -> None: - p = payload.Payload({"n1": "v1", "n2": "v2", }) - p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter + p = payload.Payload( + { + "n1": "v1", + "n2": "v2", + } + ) + p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter output = {"n1": "v1", "n2": "v2", "name3": 3, "name4": 4} self.assertDictEqual(output, p.nv_pairs) def test_add_json_empty(self) -> None: - p = payload.Payload({'name': 'value'}) + p = payload.Payload({"name": "value"}) input = {} - p.add_json(input, False, 'ue_px', 'ue_pr') - output = {'name': 'value'} + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} self.assertDictEqual(output, p.nv_pairs) def test_add_json_none(self) -> None: - p = payload.Payload({'name': 'value'}) + p = payload.Payload({"name": "value"}) input = None - p.add_json(input, False, 'ue_px', 'ue_pr') - output = {'name': 'value'} + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} self.assertDictEqual(output, p.nv_pairs) def test_add_json_encode_false(self) -> None: p = payload.Payload() - input = {'a': 1} - p.add_json(input, False, 'ue_px', 'ue_pr') - self.assertTrue('ue_pr' in p.nv_pairs.keys()) - self.assertFalse('ue_px' in p.nv_pairs.keys()) + input = {"a": 1} + p.add_json(input, False, "ue_px", "ue_pr") + self.assertTrue("ue_pr" in p.nv_pairs.keys()) + self.assertFalse("ue_px" in p.nv_pairs.keys()) def test_add_json_encode_true(self) -> None: p = payload.Payload() - input = {'a': 1} - p.add_json(input, True, 'ue_px', 'ue_pr') - self.assertFalse('ue_pr' in p.nv_pairs.keys()) - self.assertTrue('ue_px' in p.nv_pairs.keys()) + input = {"a": 1} + p.add_json(input, True, "ue_px", "ue_pr") + self.assertFalse("ue_pr" in p.nv_pairs.keys()) + self.assertTrue("ue_px" in p.nv_pairs.keys()) def test_add_json_unicode_encode_false(self) -> None: p = payload.Payload() - input = {'a': u'\u0107', u'\u0107': 'b'} - p.add_json(input, False, 'ue_px', 'ue_pr') + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, False, "ue_px", "ue_pr") ue_pr = json.loads(p.nv_pairs["ue_pr"]) self.assertDictEqual(input, ue_pr) def test_add_json_unicode_encode_true(self) -> None: p = payload.Payload() - input = {'a': '\u0107', '\u0107': 'b'} - p.add_json(input, True, 'ue_px', 'ue_pr') - ue_px = json.loads(base64.urlsafe_b64decode(p.nv_pairs["ue_px"]).decode('utf-8')) + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, True, "ue_px", "ue_pr") + ue_px = json.loads( + base64.urlsafe_b64decode(p.nv_pairs["ue_px"]).decode("utf-8") + ) self.assertDictEqual(input, ue_px) def test_add_json_with_custom_enc(self) -> None: @@ -148,5 +158,5 @@ def test_add_json_with_custom_enc(self) -> None: self.assertTrue(is_subset({"key1": "2020-02-01"}, results)) def test_subject_get(self) -> None: - p = payload.Payload({'name1': 'val1'}) + p = payload.Payload({"name1": "val1"}) self.assertDictEqual(p.get(), p.nv_pairs) diff --git a/snowplow_tracker/test/unit/test_subject.py b/snowplow_tracker/test/unit/test_subject.py index 93e2b278..7645781a 100644 --- a/snowplow_tracker/test/unit/test_subject.py +++ b/snowplow_tracker/test/unit/test_subject.py @@ -1,7 +1,7 @@ # """ # test_subject.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest @@ -26,7 +22,6 @@ class TestSubject(unittest.TestCase): - def setUp(self) -> None: pass @@ -61,18 +56,14 @@ def test_subject_0(self) -> None: "duid": "domain-user-id", "sid": "domain-session-id", "vid": 1, - "tnuid": "network-user-id" + "tnuid": "network-user-id", } self.assertDictEqual(s.standard_nv_pairs, exp) def test_subject_1(self) -> None: s = _subject.Subject().set_platform("srv").set_user_id("1234").set_lang("EN") - exp = { - "p": "srv", - "uid": "1234", - "lang": "EN" - } + exp = {"p": "srv", "uid": "1234", "lang": "EN"} self.assertDictEqual(s.standard_nv_pairs, exp) with pytest.raises(KeyError): diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 91a2eff5..9db98395 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -1,7 +1,7 @@ # """ # test_tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import re diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 7791b103..4f9d9047 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -1,7 +1,7 @@ # """ # tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import time diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py index 9c717721..af2a4b9d 100644 --- a/snowplow_tracker/tracker_configuration.py +++ b/snowplow_tracker/tracker_configuration.py @@ -1,7 +1,7 @@ # """ # tracker_configuration.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing import Optional diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py index db27a671..52b68664 100644 --- a/snowplow_tracker/typing.py +++ b/snowplow_tracker/typing.py @@ -1,7 +1,7 @@ # """ # typing.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock, Matus Tomlein -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing import Dict, List, Callable, Any, Optional, Union, Tuple From 65539bd80f57a77335fc61890c70e0e7ed7a1c0d Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Mon, 23 Jan 2023 16:51:44 +0000 Subject: [PATCH 17/51] Prepare for release 0.13.0 --- CHANGES.txt | 11 +++++++++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 06ba1ae9..d6e2f6be 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,14 @@ +Version 0.13.0 (2023-01-24) +--------------------------- +Adds Snowplow Interface (#295) +Adds retry for failed events (#296) +Adds customisable retry codes (#297) +Adds EventStore with max limit (#309) +Adds Snowplow Example App (#302) +Fix Collector URL with trailing '/' (#300) +Rename unstruct_event to self_describing_event (#298) +Upgrade `set-output` in cd (#294) + Version 0.12.0 (2022-11-03) --------------------------- Adds Domain Session ID and Domain Session Index to Subject class (#282) (Thanks to @cpnat) diff --git a/docs/source/conf.py b/docs/source/conf.py index f18cc0eb..9edb36f8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = '0.12' +release = "0.13" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index e12f7835..2ae76007 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ setup( name="snowplow-tracker", - version="0.12.0", + version="0.13.0", author=authors_str, author_email=authors_email_str, packages=[ diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index aa7e4852..aacd565c 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (0, 12, 0) +__version_info__ = (0, 13, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From 139f12cc13eddc7d162db381542a8f445c4015c3 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Thu, 16 Feb 2023 10:20:22 +0000 Subject: [PATCH 18/51] Add deprecation warnings for V1 changes (close #315) PR #316 * Add Celery deprecation warning * Add Redis deprecation warning * Add ecommerce event deprecation warnings --- snowplow_tracker/celery/celery_emitter.py | 7 ++++++- snowplow_tracker/redis/redis_emitter.py | 6 ++++++ snowplow_tracker/tracker.py | 20 ++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py index bb1a1b96..c92712e3 100644 --- a/snowplow_tracker/celery/celery_emitter.py +++ b/snowplow_tracker/celery/celery_emitter.py @@ -17,6 +17,7 @@ import logging from typing import Any, Optional +from warnings import warn from snowplow_tracker.emitters import Emitter from snowplow_tracker.typing import HttpProtocol, Method @@ -41,7 +42,6 @@ class CeleryEmitter(Emitter): """ if _CELERY_OPT: - celery_app = None def __init__( @@ -53,6 +53,11 @@ def __init__( batch_size: Optional[int] = None, byte_limit: Optional[int] = None, ) -> None: + warn( + "The Celery Emitter will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) super(CeleryEmitter, self).__init__( endpoint, protocol, port, method, batch_size, None, None, byte_limit ) diff --git a/snowplow_tracker/redis/redis_emitter.py b/snowplow_tracker/redis/redis_emitter.py index bb4cf5f4..86eee6b9 100644 --- a/snowplow_tracker/redis/redis_emitter.py +++ b/snowplow_tracker/redis/redis_emitter.py @@ -18,6 +18,7 @@ import json import logging from typing import Any, Optional +from warnings import warn from snowplow_tracker.typing import PayloadDict, RedisProtocol _REDIS_OPT = True @@ -48,6 +49,11 @@ def __init__( :param key: The Redis key for the list of events :type key: string """ + warn( + "The Redis Emitter will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) if rdb is None: rdb = redis.StrictRedis() diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 4f9d9047..20882a4f 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -345,6 +345,11 @@ def track_add_to_cart( :type event_subject: subject | None :rtype: tracker """ + warn( + "track_add_to_cart will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(sku) properties = {} @@ -400,6 +405,11 @@ def track_remove_from_cart( :type event_subject: subject | None :rtype: tracker """ + warn( + "track_remove_from_cart will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(sku) properties = {} @@ -606,6 +616,11 @@ def track_ecommerce_transaction_item( :type event_subject: subject | None :rtype: tracker """ + warn( + "track_ecommerce_transaction_item will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(order_id) non_empty_string(sku) @@ -666,6 +681,11 @@ def track_ecommerce_transaction( :type event_subject: subject | None :rtype: tracker """ + warn( + "track_ecommerce_transaction will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(order_id) pb = payload.Payload() From fb7aa1cde3d91d017a3a684d903054dfadad7a2b Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Mon, 27 Feb 2023 14:14:55 +0000 Subject: [PATCH 19/51] Update GH actions to use Node16 (close #317) PR #318 * Update GH actions to Node16 --- .github/workflows/cd.yml | 18 +++++++++--------- .github/workflows/ci.yml | 4 ++-- .github/workflows/documentation.yml | 2 +- .github/workflows/snyk.yml | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index c028f253..5e95d341 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -13,10 +13,10 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 - name: Get tag and tracker versions id: version @@ -40,10 +40,10 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -57,7 +57,7 @@ jobs: python setup.py sdist bdist_wheel - name: Upload artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: distfiles_${{ github.run_id }} path: dist @@ -68,15 +68,15 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.x' - name: Download artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: distfiles_${{ github.run_id }} path: ${{ github.workspace }}/dist @@ -100,7 +100,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Release uses: softprops/action-gh-release@v1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e5d5fb09..346c62f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,10 +28,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index eefd7831..c50ac5e2 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -8,7 +8,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 - uses: ammaraskar/sphinx-action@master with: docs-folder: "docs/" diff --git a/.github/workflows/snyk.yml b/.github/workflows/snyk.yml index 7e86a6ba..b2e36c27 100644 --- a/.github/workflows/snyk.yml +++ b/.github/workflows/snyk.yml @@ -9,11 +9,11 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: snyk/actions/setup@master - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.8 - name: Install dependencies From b2993c784fd0b77bc417d5c9684f15a0446b538c Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 7 Mar 2023 10:42:08 +0000 Subject: [PATCH 20/51] Add event store to snowplow interface (close #320) (#321) - Add event store to snowplow interface --- snowplow_tracker/snowplow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 92e96461..b967cdec 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -80,6 +80,7 @@ def create_tracker( byte_limit=emitter_config.byte_limit, request_timeout=emitter_config.request_timeout, custom_retry_codes=emitter_config.custom_retry_codes, + event_store=emitter_config.event_store, ) tracker = Tracker( From c5a916687f4c43ad811973af2a8cc77020c9164e Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Tue, 21 Mar 2023 15:53:11 +0000 Subject: [PATCH 21/51] Add missing parameters to async emitter (close #323) --- snowplow_tracker/emitters.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 760fc398..19151885 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -439,8 +439,10 @@ def __init__( on_failure: Optional[FailureCallback] = None, thread_count: int = 1, byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, max_retry_delay_seconds: int = 60, buffer_capacity: int = None, + custom_retry_codes: Dict[int, bool] = {}, event_store: Optional[EventStore] = None, ) -> None: """ @@ -476,17 +478,19 @@ def __init__( :type event_store: EventStore """ super(AsyncEmitter, self).__init__( - endpoint, - protocol, - port, - method, - batch_size, - on_success, - on_failure, - byte_limit, - max_retry_delay_seconds, - buffer_capacity, - event_store, + endpoint=endpoint, + protocol=protocol, + port=port, + method=method, + batch_size=batch_size, + on_success=on_success, + on_failure=on_failure, + byte_limit=byte_limit, + request_timeout=request_timeout, + max_retry_delay_seconds=max_retry_delay_seconds, + buffer_capacity=buffer_capacity, + custom_retry_codes=custom_retry_codes, + event_store=event_store, ) self.queue = Queue() for i in range(thread_count): From b04e35a301900464cc2246e886a4a58c2eea7f6e Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Tue, 21 Mar 2023 16:17:53 +0000 Subject: [PATCH 22/51] Prepare for release 0.14.0 --- CHANGES.txt | 7 +++++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index d6e2f6be..9a9b82b9 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,10 @@ +Version 0.14.0 (2023-03-21) +--------------------------- +Adds deprecation warnings for V1 changes (#315) +Update GH actions to use Node16 (#317) +Adds event store parameter to Snowplow interface (#320) +Adds missing parameters to async emitter (#323) + Version 0.13.0 (2023-01-24) --------------------------- Adds Snowplow Interface (#295) diff --git a/docs/source/conf.py b/docs/source/conf.py index 9edb36f8..59bfe9df 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "0.13" +release = "0.14" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 2ae76007..b7968c9a 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ setup( name="snowplow-tracker", - version="0.13.0", + version="0.14.0", author=authors_str, author_email=authors_email_str, packages=[ diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index aacd565c..d9f39c84 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (0, 13, 0) +__version_info__ = (0, 14, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From 771ec8057d89e09207451892294269123b73cd0e Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 18 Apr 2023 12:00:49 +0100 Subject: [PATCH 23/51] Use Requests session for sending events (close #221) PR #332 * Add requests.Session to emitter * Add optional session requests * Add session to emitter configuration * Add session to Snowplow api * Add request_method property * Update request method assignment * Add session to async emitter --- snowplow_tracker/emitter_configuration.py | 16 ++++++++++++++++ snowplow_tracker/emitters.py | 13 +++++++++++-- snowplow_tracker/snowplow.py | 1 + 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 87fa6c9e..1cf90238 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -18,6 +18,7 @@ from typing import Optional, Union, Tuple, Dict from snowplow_tracker.typing import SuccessCallback, FailureCallback from snowplow_tracker.event_store import EventStore +import requests class EmitterConfiguration(object): @@ -31,6 +32,7 @@ def __init__( buffer_capacity: Optional[int] = None, custom_retry_codes: Dict[int, bool] = {}, event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, ) -> None: """ Configuration for the emitter that sends events to the Snowplow collector. @@ -57,6 +59,8 @@ def __init__( :type custom_retry_codes: dict :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. :type event_store: EventStore | None + :param session: Persist parameters across requests by using a session object + :type session: request.Session | None """ self.batch_size = batch_size @@ -67,6 +71,7 @@ def __init__( self.buffer_capacity = buffer_capacity self.custom_retry_codes = custom_retry_codes self.event_store = event_store + self.session = session @property def batch_size(self) -> Optional[int]: @@ -197,3 +202,14 @@ def event_store(self) -> Optional[EventStore]: @event_store.setter def event_store(self, value: Optional[EventStore]): self._event_store = value + + @property + def session(self) -> Optional[requests.Session]: + """ + Persist parameters across requests using a requests.Session object + """ + return self._session + + @session.setter + def session(self, value: Optional[requests.Session]): + self._session = value diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 19151885..af233566 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -69,6 +69,7 @@ def __init__( buffer_capacity: Optional[int] = None, custom_retry_codes: Dict[int, bool] = {}, event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -107,6 +108,8 @@ def __init__( :type custom_retry_codes: dict :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. :type event_store: EventStore | None + :param session: Persist parameters across requests by using a session object + :type session: requests.Session | None """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -153,6 +156,8 @@ def __init__( self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) + self.request_method = requests if session is None else session + @staticmethod def as_collector_uri( endpoint: str, @@ -243,7 +248,7 @@ def http_post(self, data: str) -> int: logger.info("Sending POST request to %s..." % self.endpoint) logger.debug("Payload: %s" % data) try: - r = requests.post( + r = self.request_method.post( self.endpoint, data=data, headers={"Content-Type": "application/json; charset=utf-8"}, @@ -263,7 +268,7 @@ def http_get(self, payload: PayloadDict) -> int: logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) try: - r = requests.get( + r = self.request_method.get( self.endpoint, params=payload, timeout=self.request_timeout ) except requests.RequestException as e: @@ -444,6 +449,7 @@ def __init__( buffer_capacity: int = None, custom_retry_codes: Dict[int, bool] = {}, event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -476,6 +482,8 @@ def __init__( :type buffer_capacity: int :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. :type event_store: EventStore + :param session: Persist parameters across requests by using a session object + :type session: requests.Session | None """ super(AsyncEmitter, self).__init__( endpoint=endpoint, @@ -491,6 +499,7 @@ def __init__( buffer_capacity=buffer_capacity, custom_retry_codes=custom_retry_codes, event_store=event_store, + session=session, ) self.queue = Queue() for i in range(thread_count): diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index b967cdec..953c1587 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -81,6 +81,7 @@ def create_tracker( request_timeout=emitter_config.request_timeout, custom_retry_codes=emitter_config.custom_retry_codes, event_store=emitter_config.event_store, + session=emitter_config.session, ) tracker = Tracker( From c41a966bb37cbcedfc513c9c286c5f927667671b Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 18 Apr 2023 12:46:11 +0100 Subject: [PATCH 24/51] Add Redis example app (close #322) PR #331 * Add redis example app * Add redis worker * Add example app subfolders * Add snowplow API readme * Add tracker API readme --- .github/workflows/ci.yml | 1 + examples/redis_example/README.md | 26 +++++++ examples/redis_example/redis_app.py | 60 +++++++++++++++ examples/redis_example/redis_worker.py | 74 +++++++++++++++++++ examples/redis_example/requirements.txt | 2 + examples/snowplow_api_example/README.md | 18 +++++ .../snowplow_app.py | 0 examples/tracker_api_example/README.md | 18 +++++ examples/{ => tracker_api_example}/app.py | 7 +- 9 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 examples/redis_example/README.md create mode 100644 examples/redis_example/redis_app.py create mode 100644 examples/redis_example/redis_worker.py create mode 100644 examples/redis_example/requirements.txt create mode 100644 examples/snowplow_api_example/README.md rename examples/{ => snowplow_api_example}/snowplow_app.py (100%) create mode 100644 examples/tracker_api_example/README.md rename examples/{ => tracker_api_example}/app.py (90%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 346c62f8..36488543 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,6 +56,7 @@ jobs: - name: Demo run: | cd examples + cd tracker_api_example python app.py "localhost:9090" - name: Coveralls diff --git a/examples/redis_example/README.md b/examples/redis_example/README.md new file mode 100644 index 00000000..59b1aeb7 --- /dev/null +++ b/examples/redis_example/README.md @@ -0,0 +1,26 @@ +# Redis Example App + +This example shows how to set up the Python tracker with a Redis database and a Redis worker to forward events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +- Install redis for your machine. More information can be found [here](https://redis.io/docs/getting-started/installation/) + +`brew install redis` + +- Run `redis-server` to check your redis installation, to stop the server enter `ctrl+c`. + +#### Usage +Navigate to the example folder. + +`cd examples/redis_example` + +This example has two programmes, `redis_app.py` tracks events and sends them to a redis database, `redis_worker.py` then forwards these events onto a Snowplow pipeline. + +To send events to your pipeline, run `redis-server`, followed by the `redis_worker.py {{your_collector_endpoint}}` and finally `redis_app.py`. You should see 3 events in your pipleine. + + + diff --git a/examples/redis_example/redis_app.py b/examples/redis_example/redis_app.py new file mode 100644 index 00000000..db7ffc32 --- /dev/null +++ b/examples/redis_example/redis_app.py @@ -0,0 +1,60 @@ +from snowplow_tracker import Tracker +from snowplow_tracker.typing import PayloadDict +import json +import redis +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class RedisEmitter(object): + """ + Sends Snowplow events to a Redis database + """ + + def __init__(self, rdb=None, key: str = "redis_key") -> None: + """ + :param rdb: Optional custom Redis database + :type rdb: redis | None + :param key: The Redis key for the list of events + :type key: string + """ + + if rdb is None: + rdb = redis.StrictRedis() + + self.rdb = rdb + self.key = key + + def input(self, payload: PayloadDict) -> None: + """ + :param payload: The event properties + :type payload: dict(string:*) + """ + logger.info("Pushing event to Redis queue...") + self.rdb.rpush(self.key, json.dumps(payload)) + logger.info("Finished sending event to Redis.") + + def flush(self) -> None: + logger.warning("The RedisEmitter class does not need to be flushed") + return + + def sync_flush(self) -> None: + self.flush() + + +def main(): + emitter = RedisEmitter() + + t = Tracker(emitter) + + t.track_page_view("https://www.snowplow.io", "Homepage") + t.track_page_ping("https://www.snowplow.io", "Homepage") + t.track_link_click("https://www.snowplow.io") + + +if __name__ == "__main__": + main() diff --git a/examples/redis_example/redis_worker.py b/examples/redis_example/redis_worker.py new file mode 100644 index 00000000..6a190683 --- /dev/null +++ b/examples/redis_example/redis_worker.py @@ -0,0 +1,74 @@ +import sys +from snowplow_tracker import Emitter +from typing import Any +from snowplow_tracker.typing import PayloadDict +import json +import redis +import signal +import gevent +from gevent.pool import Pool + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +class RedisWorker: + def __init__(self, emitter: Emitter, key) -> None: + self.pool = Pool(5) + self.emitter = emitter + self.rdb = redis.StrictRedis() + self.key = key + + signal.signal(signal.SIGTERM, self.request_shutdown) + signal.signal(signal.SIGINT, self.request_shutdown) + signal.signal(signal.SIGQUIT, self.request_shutdown) + + def send(self, payload: PayloadDict) -> None: + """ + Send an event to an emitter + """ + self.emitter.input(payload) + + def pop_payload(self) -> None: + """ + Get a single event from Redis and send it + If the Redis queue is empty, sleep to avoid making continual requests + """ + payload = self.rdb.lpop(self.key) + if payload: + self.pool.spawn(self.send, json.loads(payload.decode("utf-8"))) + else: + gevent.sleep(5) + + def run(self) -> None: + """ + Run indefinitely + """ + self._shutdown = False + while not self._shutdown: + self.pop_payload() + self.pool.join(timeout=20) + + def request_shutdown(self, *args: Any) -> None: + """ + Halt the worker + """ + self._shutdown = True + + +def main(): + collector_url = get_url_from_args() + + # Configure Emitter + emitter = Emitter(collector_url, batch_size=1) + + # Setup worker + worker = RedisWorker(emitter=emitter, key="redis_key") + worker.run() + + +if __name__ == "__main__": + main() diff --git a/examples/redis_example/requirements.txt b/examples/redis_example/requirements.txt new file mode 100644 index 00000000..ac10dd44 --- /dev/null +++ b/examples/redis_example/requirements.txt @@ -0,0 +1,2 @@ +redis~=4.5 +gevent~=22.10 \ No newline at end of file diff --git a/examples/snowplow_api_example/README.md b/examples/snowplow_api_example/README.md new file mode 100644 index 00000000..6819757b --- /dev/null +++ b/examples/snowplow_api_example/README.md @@ -0,0 +1,18 @@ +# Snowplow API Example App + +This example shows how to set up the Python tracker with the Snowplow API to send events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +#### Usage +Navigate to the example folder. + +`cd examples/snowplow_api_example` + +To send events to your pipeline, run `snowplow_app.py {{your_collector_endpoint}}`. You should see 6 events in your pipleine. + + + diff --git a/examples/snowplow_app.py b/examples/snowplow_api_example/snowplow_app.py similarity index 100% rename from examples/snowplow_app.py rename to examples/snowplow_api_example/snowplow_app.py diff --git a/examples/tracker_api_example/README.md b/examples/tracker_api_example/README.md new file mode 100644 index 00000000..10392b17 --- /dev/null +++ b/examples/tracker_api_example/README.md @@ -0,0 +1,18 @@ +# Example App + +This example shows how to set up the Python tracker with the tracker API to send events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +#### Usage +Navigate to the example folder. + +`cd examples/tracker_api_example` + +To send events to your pipeline, run `app.py {{your_collector_endpoint}}`. You should see 5 events in your pipleine. + + + diff --git a/examples/app.py b/examples/tracker_api_example/app.py similarity index 90% rename from examples/app.py rename to examples/tracker_api_example/app.py index 973f5a99..cee66b47 100644 --- a/examples/app.py +++ b/examples/tracker_api_example/app.py @@ -1,5 +1,10 @@ from distutils.log import error -from snowplow_tracker import Tracker, Emitter, Subject, SelfDescribingJson +from snowplow_tracker import ( + Tracker, + Emitter, + Subject, + SelfDescribingJson, +) import sys From c894d4d0ac740a9a9e67a04c45b54040640a19eb Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Wed, 19 Apr 2023 13:04:38 +0100 Subject: [PATCH 25/51] Prepare for release 0.15.0 --- CHANGES.txt | 5 +++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 9a9b82b9..f4ab14eb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,8 @@ +Version 0.15.0 (2023-04-19) +--------------------------- +Use Requests Session for sending eventss (#221) +Add Redis example app (#322) + Version 0.14.0 (2023-03-21) --------------------------- Adds deprecation warnings for V1 changes (#315) diff --git a/docs/source/conf.py b/docs/source/conf.py index 59bfe9df..d2b51972 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "0.14" +release = "0.15" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index b7968c9a..81776ce5 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ setup( name="snowplow-tracker", - version="0.14.0", + version="0.15.0", author=authors_str, author_email=authors_email_str, packages=[ diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index d9f39c84..69a6ff65 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (0, 14, 0) +__version_info__ = (0, 15, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From e1dd11e2973fbf44059972e4d6d4f62c8826ff13 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Thu, 20 Apr 2023 13:48:35 +0100 Subject: [PATCH 26/51] Remove Redis and Celery Emitters (close #335) PR #336 * Delete Celery Emitter * Delete Redis Emitter * Remove Redis integration tests --- .github/workflows/ci.yml | 14 +-- requirements-test.txt | 1 - run-tests.sh | 91 ----------------- setup.py | 13 --- snowplow_tracker/__init__.py | 6 -- snowplow_tracker/celery/__init__.py | 1 - snowplow_tracker/celery/celery_emitter.py | 95 ------------------ snowplow_tracker/redis/__init__.py | 2 - snowplow_tracker/redis/redis_emitter.py | 86 ---------------- snowplow_tracker/redis/redis_worker.py | 97 ------------------- .../test/integration/test_integration.py | 31 ------ snowplow_tracker/typing.py | 8 -- 12 files changed, 1 insertion(+), 444 deletions(-) delete mode 100644 snowplow_tracker/celery/__init__.py delete mode 100644 snowplow_tracker/celery/celery_emitter.py delete mode 100644 snowplow_tracker/redis/__init__.py delete mode 100644 snowplow_tracker/redis/redis_emitter.py delete mode 100644 snowplow_tracker/redis/redis_worker.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 36488543..106a2585 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,18 +13,6 @@ jobs: strategy: matrix: python-version: [3.6, 3.7, 3.8, 3.9, "3.10", "3.11"] - extras-required: [".", ".[redis]"] - - services: - redis: - image: redis - options: >- - --health-cmd "redis-cli ping" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 6379:6379 steps: - name: Checkout @@ -42,7 +30,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade "${{ env.pip_v }}" setuptools wheel - python -m pip install -e ${{ matrix.extras-required }} + python -m pip install -e . python -m pip install -r requirements-test.txt - name: Build diff --git a/requirements-test.txt b/requirements-test.txt index 668dae79..55463d8c 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -5,4 +5,3 @@ httmock==1.4.0 freezegun==1.1.0 pytest-cov coveralls==3.3.1 -fakeredis==1.7.0 diff --git a/run-tests.sh b/run-tests.sh index 715f72dd..8a6489a0 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -24,14 +24,6 @@ function deploy { source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker35redis ]; then - pyenv virtualenv 3.5.10 tracker35redis - pyenv activate tracker35redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - # pyenv install 3.6.14 if [ ! -e ~/.pyenv/versions/tracker36 ]; then pyenv virtualenv 3.6.14 tracker36 @@ -41,14 +33,6 @@ function deploy { source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker36redis ]; then - pyenv virtualenv 3.6.14 tracker36redis - pyenv activate tracker36redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - # pyenv install 3.7.11 if [ ! -e ~/.pyenv/versions/tracker37 ]; then pyenv virtualenv 3.7.11 tracker37 @@ -58,14 +42,6 @@ function deploy { source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker37redis ]; then - pyenv virtualenv 3.7.11 tracker37redis - pyenv activate tracker37redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - # pyenv install 3.8.11 if [ ! -e ~/.pyenv/versions/tracker38 ]; then pyenv virtualenv 3.8.11 tracker38 @@ -75,14 +51,6 @@ function deploy { source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker38redis ]; then - pyenv virtualenv 3.8.11 tracker38redis - pyenv activate tracker38redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - # pyenv install 3.9.6 if [ ! -e ~/.pyenv/versions/tracker39 ]; then pyenv virtualenv 3.9.6 tracker39 @@ -92,14 +60,6 @@ function deploy { source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker39redis ]; then - pyenv virtualenv 3.9.6 tracker39redis - pyenv activate tracker39redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - # pyenv install 3.10.1 if [ ! -e ~/.pyenv/versions/tracker310 ]; then pyenv virtualenv 3.10.1 tracker310 @@ -109,14 +69,6 @@ function deploy { source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker310redis ]; then - pyenv virtualenv 3.10.1 tracker310redis - pyenv activate tracker310redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - # pyenv install 3.11.0 if [ ! -e ~/.pyenv/versions/tracker311 ]; then pyenv virtualenv 3.11.0 tracker311 @@ -126,13 +78,6 @@ function deploy { source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker311redis ]; then - pyenv virtualenv 3.11.0 tracker311redis - pyenv activate tracker311redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi } @@ -141,75 +86,39 @@ function run_tests { pytest source deactivate - pyenv activate tracker35redis - pytest - source deactivate - pyenv activate tracker36 pytest source deactivate - pyenv activate tracker36redis - pytest - source deactivate - pyenv activate tracker37 pytest source deactivate - pyenv activate tracker37redis - pytest - source deactivate - pyenv activate tracker38 pytest source deactivate - pyenv activate tracker38redis - pytest - source deactivate - pyenv activate tracker39 pytest source deactivate - pyenv activate tracker39redis - pytest - source deactivate - pyenv activate tracker310 pytest source deactivate - pyenv activate tracker310redis - pytest - source deactivate - pyenv activate tracker311 pytest source deactivate - - pyenv activate tracker311redis - pytest - source deactivate - } function refresh_deploy { pyenv uninstall -f tracker35 - pyenv uninstall -f tracker35redis pyenv uninstall -f tracker36 - pyenv uninstall -f tracker36redis pyenv uninstall -f tracker37 - pyenv uninstall -f tracker37redis pyenv uninstall -f tracker38 - pyenv uninstall -f tracker38redis pyenv uninstall -f tracker39 - pyenv uninstall -f tracker39redis pyenv uninstall -f tracker310 - pyenv uninstall -f tracker310redis pyenv uninstall -f tracker311 - pyenv uninstall -f tracker311redis } diff --git a/setup.py b/setup.py index 81776ce5..2510efa9 100644 --- a/setup.py +++ b/setup.py @@ -39,8 +39,6 @@ packages=[ "snowplow_tracker", "snowplow_tracker.test", - "snowplow_tracker.redis", - "snowplow_tracker.celery", ], url="http://snowplow.io", license="Apache License 2.0", @@ -63,15 +61,4 @@ "Operating System :: OS Independent", ], install_requires=["requests>=2.25.1,<3.0", "typing_extensions>=3.7.4"], - extras_require={ - "celery": [ - "celery>=4.0,<5.0;python_version<'3.0'", - "celery>=4.0;python_version>='3.0'", - ], - "redis": [ - "redis>=2.9.1,<4.0;python_version<'3.0'", - "redis>=2.9.1;python_version>='3.0'", - "gevent>=21.1.2", - ], - }, ) diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index 8726efd9..8745b370 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -8,9 +8,3 @@ from snowplow_tracker.snowplow import Snowplow from snowplow_tracker.contracts import disable_contracts, enable_contracts from snowplow_tracker.event_store import EventStore - -# celery extra -from .celery import CeleryEmitter - -# redis extra -from .redis import RedisEmitter, RedisWorker diff --git a/snowplow_tracker/celery/__init__.py b/snowplow_tracker/celery/__init__.py deleted file mode 100644 index 2a4d905a..00000000 --- a/snowplow_tracker/celery/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .celery_emitter import CeleryEmitter diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py deleted file mode 100644 index c92712e3..00000000 --- a/snowplow_tracker/celery/celery_emitter.py +++ /dev/null @@ -1,95 +0,0 @@ -# """ -# celery_emitter.py - -# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - -# This program is licensed to you under the Apache License Version 2.0, -# and you may not use this file except in compliance with the Apache License -# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at -# http://www.apache.org/licenses/LICENSE-2.0. - -# Unless required by applicable law or agreed to in writing, -# software distributed under the Apache License Version 2.0 is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the Apache License Version 2.0 for the specific -# language governing permissions and limitations there under. -# """ - -import logging -from typing import Any, Optional -from warnings import warn - -from snowplow_tracker.emitters import Emitter -from snowplow_tracker.typing import HttpProtocol, Method - -_CELERY_OPT = True -try: - from celery import Celery -except ImportError: - _CELERY_OPT = False - -# logging -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -class CeleryEmitter(Emitter): - """ - Uses a Celery worker to send HTTP requests asynchronously. - Works like the base Emitter class, - but on_success and on_failure callbacks cannot be set. - """ - - if _CELERY_OPT: - celery_app = None - - def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "post", - batch_size: Optional[int] = None, - byte_limit: Optional[int] = None, - ) -> None: - warn( - "The Celery Emitter will be deprecated in future versions.", - DeprecationWarning, - stacklevel=2, - ) - super(CeleryEmitter, self).__init__( - endpoint, protocol, port, method, batch_size, None, None, byte_limit - ) - - try: - # Check whether a custom Celery configuration module named "snowplow_celery_config" exists - import snowplow_celery_config - - self.celery_app = Celery() - self.celery_app.config_from_object(snowplow_celery_config) - except ImportError: - # Otherwise configure Celery with default settings - self.celery_app = Celery("Snowplow", broker="redis://guest@localhost//") - - self.async_flush = self.celery_app.task(self.async_flush) - - def flush(self) -> None: - """ - Schedules a flush task - """ - self.async_flush.delay() - logger.info("Scheduled a Celery task to flush the event queue") - - def async_flush(self) -> None: - super(CeleryEmitter, self).flush() - - else: - - def __new__(cls, *args: Any, **kwargs: Any) -> "CeleryEmitter": - logger.error( - "CeleryEmitter is not available. Please install snowplow-tracker with celery extra dependency." - ) - raise RuntimeError( - "CeleryEmitter is not available. To use: `pip install snowplow-tracker[celery]`" - ) diff --git a/snowplow_tracker/redis/__init__.py b/snowplow_tracker/redis/__init__.py deleted file mode 100644 index 794bcdd3..00000000 --- a/snowplow_tracker/redis/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .redis_emitter import RedisEmitter -from .redis_worker import RedisWorker diff --git a/snowplow_tracker/redis/redis_emitter.py b/snowplow_tracker/redis/redis_emitter.py deleted file mode 100644 index 86eee6b9..00000000 --- a/snowplow_tracker/redis/redis_emitter.py +++ /dev/null @@ -1,86 +0,0 @@ -# """ -# redis_emitter.py - -# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - -# This program is licensed to you under the Apache License Version 2.0, -# and you may not use this file except in compliance with the Apache License -# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at -# http://www.apache.org/licenses/LICENSE-2.0. - -# Unless required by applicable law or agreed to in writing, -# software distributed under the Apache License Version 2.0 is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the Apache License Version 2.0 for the specific -# language governing permissions and limitations there under. -# """ - -import json -import logging -from typing import Any, Optional -from warnings import warn -from snowplow_tracker.typing import PayloadDict, RedisProtocol - -_REDIS_OPT = True -try: - import redis -except ImportError: - _REDIS_OPT = False - -# logging -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -class RedisEmitter(object): - """ - Sends Snowplow events to a Redis database - """ - - if _REDIS_OPT: - - def __init__( - self, rdb: Optional[RedisProtocol] = None, key: str = "snowplow" - ) -> None: - """ - :param rdb: Optional custom Redis database - :type rdb: redis | None - :param key: The Redis key for the list of events - :type key: string - """ - warn( - "The Redis Emitter will be deprecated in future versions.", - DeprecationWarning, - stacklevel=2, - ) - if rdb is None: - rdb = redis.StrictRedis() - - self.rdb = rdb - self.key = key - - def input(self, payload: PayloadDict) -> None: - """ - :param payload: The event properties - :type payload: dict(string:*) - """ - logger.debug("Pushing event to Redis queue...") - self.rdb.rpush(self.key, json.dumps(payload)) - logger.info("Finished sending event to Redis.") - - def flush(self) -> None: - logger.warning("The RedisEmitter class does not need to be flushed") - - def sync_flush(self) -> None: - self.flush() - - else: - - def __new__(cls, *args: Any, **kwargs: Any) -> "RedisEmitter": - logger.error( - "RedisEmitter is not available. Please install snowplow-tracker with redis extra dependency." - ) - raise RuntimeError( - "RedisEmitter is not available. To use: `pip install snowplow-tracker[redis]`" - ) diff --git a/snowplow_tracker/redis/redis_worker.py b/snowplow_tracker/redis/redis_worker.py deleted file mode 100644 index e0e9353f..00000000 --- a/snowplow_tracker/redis/redis_worker.py +++ /dev/null @@ -1,97 +0,0 @@ -# """ -# redis_worker.py - -# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - -# This program is licensed to you under the Apache License Version 2.0, -# and you may not use this file except in compliance with the Apache License -# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at -# http://www.apache.org/licenses/LICENSE-2.0. - -# Unless required by applicable law or agreed to in writing, -# software distributed under the Apache License Version 2.0 is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the Apache License Version 2.0 for the specific -# language governing permissions and limitations there under. -# """ - -import json -import signal -from typing import Any, Optional - -from snowplow_tracker.typing import EmitterProtocol, PayloadDict, RedisProtocol - -_REDIS_OPT = True -try: - import redis - import gevent - from gevent.pool import Pool -except ImportError: - _REDIS_OPT = False - -DEFAULT_KEY = "snowplow" - - -class RedisWorker(object): - """ - Asynchronously take events from redis and send them to an emitter - """ - - if _REDIS_OPT: - - def __init__( - self, - emitter: EmitterProtocol, - rdb: Optional[RedisProtocol] = None, - key: str = DEFAULT_KEY, - ) -> None: - self.emitter = emitter - self.key = key - if rdb is None: - rdb = redis.StrictRedis() - self.rdb = rdb - self.pool = Pool(5) - - signal.signal(signal.SIGTERM, self.request_shutdown) - signal.signal(signal.SIGINT, self.request_shutdown) - signal.signal(signal.SIGQUIT, self.request_shutdown) - - def send(self, payload: PayloadDict) -> None: - """ - Send an event to an emitter - """ - self.emitter.input(payload) - - def pop_payload(self) -> None: - """ - Get a single event from Redis and send it - If the Redis queue is empty, sleep to avoid making continual requests - """ - payload = self.rdb.lpop(self.key) - if payload: - self.pool.spawn(self.send, json.loads(payload.decode("utf-8"))) - else: - gevent.sleep(5) - - def run(self) -> None: - """ - Run indefinitely - """ - self._shutdown = False - - while not self._shutdown: - self.pop_payload() - self.pool.join(timeout=20) - - def request_shutdown(self, *args: Any) -> None: - """ - Halt the worker - """ - self._shutdown = True - - else: - - def __new__(cls, *args: Any, **kwargs: Any) -> "RedisWorker": - raise RuntimeError( - "RedisWorker is not available. To use: `pip install snowplow-tracker[redis]`" - ) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 7758a266..6b444b8f 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -27,7 +27,6 @@ from snowplow_tracker import tracker, _version, emitters, subject from snowplow_tracker.self_describing_json import SelfDescribingJson -from snowplow_tracker.redis import redis_emitter querystrings = [""] @@ -414,36 +413,6 @@ def test_integration_event_subject(self) -> None: from_querystring(key, querystrings[-1]), expected_fields[key] ) - def test_integration_redis_default(self) -> None: - try: - import fakeredis - - r = fakeredis.FakeStrictRedis() - t = tracker.Tracker([redis_emitter.RedisEmitter(rdb=r)], default_subject) - t.track_page_view("http://www.example.com") - event_string = r.rpop("snowplow") - event_dict = json.loads(event_string.decode("utf-8")) - self.assertEqual(event_dict["e"], "pv") - except ImportError: - with pytest.raises(RuntimeError): - redis_emitter.RedisEmitter() - - def test_integration_redis_custom(self) -> None: - try: - import fakeredis - - r = fakeredis.FakeStrictRedis() - t = tracker.Tracker( - [redis_emitter.RedisEmitter(rdb=r, key="custom_key")], default_subject - ) - t.track_page_view("http://www.example.com") - event_string = r.rpop("custom_key") - event_dict = json.loads(event_string.decode("utf-8")) - self.assertEqual(event_dict["e"], "pv") - except ImportError: - with pytest.raises(RuntimeError): - redis_emitter.RedisEmitter("arg", key="kwarg") - def test_integration_success_callback(self) -> None: callback_success_queue = [] callback_failure_queue = [] diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py index 52b68664..5bbc477b 100644 --- a/snowplow_tracker/typing.py +++ b/snowplow_tracker/typing.py @@ -67,11 +67,3 @@ class EmitterProtocol(Protocol): def input(self, payload: PayloadDict) -> None: ... - - -class RedisProtocol(Protocol): - def rpush(self, name: Any, *values: Any) -> int: - ... - - def lpop(self, name: Any, count: Optional[int] = ...) -> Any: - ... From f2c6b3642ebc4a21606dbed01a7f714fa35c97b1 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Fri, 21 Apr 2023 12:02:38 +0100 Subject: [PATCH 27/51] Make tracker namespace mandatory (close #337) PR #339 * Make tracker namespace mandatory * Update unit tests with mandatory namespace * Update integration tests with mandatory tracker namespace * Update tracker examples with mandatory tracker namespace * Add return type to get_namespace() --- examples/redis_example/redis_app.py | 2 +- examples/tracker_api_example/app.py | 2 +- .../test/integration/test_integration.py | 66 ++++++++----- snowplow_tracker/test/unit/test_tracker.py | 94 ++++++++++--------- snowplow_tracker/tracker.py | 8 +- 5 files changed, 95 insertions(+), 77 deletions(-) diff --git a/examples/redis_example/redis_app.py b/examples/redis_example/redis_app.py index db7ffc32..a4ac1e77 100644 --- a/examples/redis_example/redis_app.py +++ b/examples/redis_example/redis_app.py @@ -49,7 +49,7 @@ def sync_flush(self) -> None: def main(): emitter = RedisEmitter() - t = Tracker(emitter) + t = Tracker(namespace="snowplow_tracker", emitters=emitter) t.track_page_view("https://www.snowplow.io", "Homepage") t.track_page_ping("https://www.snowplow.io", "Homepage") diff --git a/examples/tracker_api_example/app.py b/examples/tracker_api_example/app.py index cee66b47..2483d509 100644 --- a/examples/tracker_api_example/app.py +++ b/examples/tracker_api_example/app.py @@ -22,7 +22,7 @@ def main(): s = Subject().set_platform("pc") s.set_lang("en").set_user_id("test_user") - t = Tracker(e, s) + t = Tracker(namespace="snowplow_tracker", emitters=e, subject=s) print("Sending events to " + e.endpoint) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 6b444b8f..f9323a1c 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -64,7 +64,7 @@ def fail_response_content(url: str, request: Any) -> Dict[str, Any]: class IntegrationTest(unittest.TestCase): def test_integration_page_view(self) -> None: - t = tracker.Tracker([get_emitter], default_subject) + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view( "http://savethearctic.org", "Save The Arctic", "http://referrer.com" @@ -81,7 +81,7 @@ def test_integration_page_view(self) -> None: ) def test_integration_ecommerce_transaction_item(self) -> None: - t = tracker.Tracker([get_emitter], default_subject) + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction_item( "12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP" @@ -102,7 +102,7 @@ def test_integration_ecommerce_transaction_item(self) -> None: ) def test_integration_ecommerce_transaction(self) -> None: - t = tracker.Tracker([get_emitter], default_subject) + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction( "6a8078be", @@ -157,7 +157,9 @@ def test_integration_ecommerce_transaction(self) -> None: ) def test_integration_mobile_screen_view(self) -> None: - t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): t.track_mobile_screen_view("534", "Game HUD 2") expected_fields = {"e": "ue"} @@ -179,7 +181,7 @@ def test_integration_mobile_screen_view(self) -> None: ) def test_integration_struct_event(self) -> None: - t = tracker.Tracker([get_emitter], default_subject) + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): t.track_struct_event( "Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99 @@ -198,7 +200,9 @@ def test_integration_struct_event(self) -> None: ) def test_integration_self_describing_event_non_base64(self) -> None: - t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): t.track_self_describing_event( SelfDescribingJson( @@ -229,7 +233,9 @@ def test_integration_self_describing_event_non_base64(self) -> None: ) def test_integration_self_describing_event_base64(self) -> None: - t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=True + ) with HTTMock(pass_response_content): t.track_self_describing_event( SelfDescribingJson( @@ -264,7 +270,9 @@ def test_integration_self_describing_event_base64(self) -> None: ) def test_integration_context_non_base64(self) -> None: - t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): t.track_page_view( "localhost", @@ -293,7 +301,9 @@ def test_integration_context_non_base64(self) -> None: ) def test_integration_context_base64(self) -> None: - t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=True + ) with HTTMock(pass_response_content): t.track_page_view( "localhost", @@ -335,9 +345,9 @@ def test_integration_standard_nv_pairs(self) -> None: s.set_lang("en") t = tracker.Tracker( + "cf", [emitters.Emitter("localhost", method="get")], s, - "cf", app_id="angry-birds-android", ) with HTTMock(pass_response_content): @@ -371,9 +381,9 @@ def test_integration_identification_methods(self) -> None: s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") t = tracker.Tracker( + "cf", [emitters.Emitter("localhost", method="get")], s, - "cf", app_id="angry-birds-android", ) with HTTMock(pass_response_content): @@ -397,9 +407,9 @@ def test_integration_event_subject(self) -> None: s.set_lang("ES") t = tracker.Tracker( + "namespace", [emitters.Emitter("localhost", method="get")], s, - "cf", app_id="angry-birds-android", ) evSubject = ( @@ -422,7 +432,7 @@ def test_integration_success_callback(self) -> None: on_success=lambda x: callback_success_queue.append(x), on_failure=lambda x, y: callback_failure_queue.append(x), ) - t = tracker.Tracker([callback_emitter], default_subject) + t = tracker.Tracker("namespace", [callback_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view("http://www.example.com") expected = { @@ -443,14 +453,14 @@ def test_integration_failure_callback(self) -> None: on_success=lambda x: callback_success_queue.append(x), on_failure=lambda x, y: callback_failure_queue.append(x), ) - t = tracker.Tracker([callback_emitter], default_subject) + t = tracker.Tracker("namespace", [callback_emitter], default_subject) with HTTMock(fail_response_content): t.track_page_view("http://www.example.com") self.assertEqual(callback_success_queue, []) self.assertEqual(callback_failure_queue[0], 0) def test_post_page_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker("namespace", [default_emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "local host", None) expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} @@ -466,7 +476,7 @@ def test_post_batched(self) -> None: default_emitter = emitters.Emitter( "localhost", protocol="http", port=80, batch_size=2 ) - t = tracker.Tracker(default_emitter, default_subject) + t = tracker.Tracker("namespace", default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") t.track_struct_event("Test", "B") @@ -476,7 +486,7 @@ def test_post_batched(self) -> None: @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 def test_timestamps(self) -> None: emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=3) - t = tracker.Tracker([emitter], default_subject) + t = tracker.Tracker("namespace", [emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "stamp0", None, tstamp=None) t.track_page_view("localhost", "stamp1", None, tstamp=1358933694000) @@ -502,19 +512,21 @@ def test_timestamps(self) -> None: def test_bytelimit(self) -> None: default_emitter = emitters.Emitter( - "localhost", protocol="http", port=80, batch_size=5, byte_limit=420 + "localhost", protocol="http", port=80, batch_size=5, byte_limit=483 ) - t = tracker.Tracker(default_emitter, default_subject) + t = tracker.Tracker("namespace", default_emitter, default_subject) with HTTMock(pass_post_response_content): - t.track_struct_event("Test", "A") # 140 bytes - t.track_struct_event("Test", "A") # 280 bytes - t.track_struct_event("Test", "A") # 420 bytes. Send - t.track_struct_event("Test", "AA") # 141 + t.track_struct_event("Test", "A") # 161 bytes + t.track_struct_event("Test", "A") # 322 bytes + t.track_struct_event("Test", "A") # 483 bytes. Send + t.track_struct_event("Test", "AA") # 162 self.assertEqual(len(querystrings[-1]["data"]), 3) - self.assertEqual(default_emitter.bytes_queued, 136 + len(_version.__version__)) + self.assertEqual(default_emitter.bytes_queued, 156 + len(_version.__version__)) def test_unicode_get(self) -> None: - t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) unicode_a = "\u0107" unicode_b = "test.\u0107om" test_ctx = SelfDescribingJson( @@ -540,7 +552,9 @@ def test_unicode_get(self) -> None: self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker( + "namespace", [default_emitter], default_subject, encode_base64=False + ) unicode_a = "\u0107" unicode_b = "test.\u0107om" test_ctx = SelfDescribingJson( diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 9db98395..0862fce5 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -96,7 +96,7 @@ def test_initialisation(self) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - t = Tracker([e], namespace="cloudfront", encode_base64=False, app_id="AF003") + t = Tracker("cloudfront", [e], encode_base64=False, app_id="AF003") self.assertEqual(t.standard_nv_pairs["tna"], "cloudfront") self.assertEqual(t.standard_nv_pairs["aid"], "AF003") self.assertEqual(t.encode_base64, False) @@ -105,9 +105,8 @@ def test_initialisation_default_optional(self) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - t = Tracker(e) + t = Tracker("namespace", e) self.assertEqual(t.emitters, [e]) - self.assertTrue(t.standard_nv_pairs["tna"] is None) self.assertTrue(t.standard_nv_pairs["aid"] is None) self.assertEqual(t.encode_base64, True) @@ -116,19 +115,19 @@ def test_initialisation_emitter_list(self) -> None: e1 = mokEmitter() e2 = mokEmitter() - t = Tracker([e1, e2]) + t = Tracker("namespace", [e1, e2]) self.assertEqual(t.emitters, [e1, e2]) def test_initialisation_error(self) -> None: with self.assertRaises(ValueError): - Tracker([]) + Tracker("namespace", []) def test_initialization_with_subject(self) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() s = Subject() - t = Tracker(e, subject=s) + t = Tracker("namespace", e, subject=s) self.assertIs(t.subject, s) def test_get_uuid(self) -> None: @@ -163,7 +162,7 @@ def test_alias_of_track_self_describing_event(self, mok_track: Any) -> None: e = mokEmitter() mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) evJson = SelfDescribingJson("test.schema", {"n": "v"}) # call the alias t.track_self_describing_event(evJson) @@ -174,7 +173,7 @@ def test_flush(self) -> None: e1 = mokEmitter() e2 = mokEmitter() - t = Tracker([e1, e2]) + t = Tracker("namespace", [e1, e2]) t.flush() e1.flush.assert_not_called() self.assertEqual(e1.sync_flush.call_count, 1) @@ -186,7 +185,7 @@ def test_flush_async(self) -> None: e1 = mokEmitter() e2 = mokEmitter() - t = Tracker([e1, e2]) + t = Tracker("namespace", [e1, e2]) t.flush(is_async=True) self.assertEqual(e1.flush.call_count, 1) e1.sync_flush.assert_not_called() @@ -197,7 +196,7 @@ def test_set_subject(self) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - t = Tracker(e) + t = Tracker("namespace", e) new_subject = Subject() self.assertIsNot(t.subject, new_subject) t.set_subject(new_subject) @@ -208,7 +207,7 @@ def test_add_emitter(self) -> None: e1 = mokEmitter() e2 = mokEmitter() - t = Tracker(e1) + t = Tracker("namespace", e1) t.add_emitter(e2) self.assertEqual(t.emitters, [e1, e2]) @@ -222,7 +221,7 @@ def test_track(self) -> None: e2 = mokEmitter() e3 = mokEmitter() - t = Tracker([e1, e2, e3]) + t = Tracker("namespace", [e1, e2, e3]) p = Payload({"test": "track"}) t.track(p) @@ -241,7 +240,7 @@ def test_complete_payload(self, mok_uuid: Any, mok_track: Any) -> None: mok_uuid.side_effect = mocked_uuid mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) p = Payload() t.complete_payload(p, None, None, None) @@ -255,6 +254,7 @@ def test_complete_payload(self, mok_uuid: Any, mok_track: Any) -> None: "dtm": 1618790401000, "tv": TRACKER_VERSION, "p": "pc", + "tna": "namespace", } self.assertDictEqual(passed_nv_pairs, expected) @@ -268,7 +268,7 @@ def test_complete_payload_tstamp_int(self, mok_uuid: Any, mok_track: Any) -> Non mok_uuid.side_effect = mocked_uuid mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) p = Payload() time_in_millis = 100010001000 t.complete_payload(p, None, time_in_millis, None) @@ -279,6 +279,7 @@ def test_complete_payload_tstamp_int(self, mok_uuid: Any, mok_track: Any) -> Non passed_nv_pairs = trackArgsTuple[0].nv_pairs expected = { + "tna": "namespace", "eid": _TEST_UUID, "dtm": 1618790401000, "ttm": time_in_millis, @@ -297,7 +298,7 @@ def test_complete_payload_tstamp_dtm(self, mok_uuid: Any, mok_track: Any) -> Non mok_uuid.side_effect = mocked_uuid mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) p = Payload() time_in_millis = 100010001000 t.complete_payload(p, None, time_in_millis, None) @@ -308,6 +309,7 @@ def test_complete_payload_tstamp_dtm(self, mok_uuid: Any, mok_track: Any) -> Non passed_nv_pairs = trackArgsTuple[0].nv_pairs expected = { + "tna": "namespace", "eid": _TEST_UUID, "dtm": 1618790401000, "ttm": time_in_millis, @@ -326,7 +328,7 @@ def test_complete_payload_tstamp_ttm(self, mok_uuid: Any, mok_track: Any) -> Non mok_uuid.side_effect = mocked_uuid mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) p = Payload() time_in_millis = 100010001000 t.complete_payload(p, None, time_in_millis, None) @@ -337,6 +339,7 @@ def test_complete_payload_tstamp_ttm(self, mok_uuid: Any, mok_track: Any) -> Non passed_nv_pairs = trackArgsTuple[0].nv_pairs expected = { + "tna": "namespace", "eid": _TEST_UUID, "dtm": 1618790401000, "ttm": time_in_millis, @@ -355,7 +358,7 @@ def test_complete_payload_co(self, mok_uuid: Any, mok_track: Any) -> None: mok_uuid.side_effect = mocked_uuid mok_track.side_effect = mocked_track - t = Tracker(e, encode_base64=False) + t = Tracker("namespace", e, encode_base64=False) p = Payload() geo_ctx = SelfDescribingJson(geoSchema, geoData) @@ -388,7 +391,7 @@ def test_complete_payload_cx(self, mok_uuid: Any, mok_track: Any) -> None: mok_uuid.side_effect = mocked_uuid mok_track.side_effect = mocked_track - t = Tracker(e, encode_base64=True) + t = Tracker("namespace", e, encode_base64=True) p = Payload() geo_ctx = SelfDescribingJson(geoSchema, geoData) @@ -415,7 +418,7 @@ def test_complete_payload_event_subject( mok_uuid.side_effect = mocked_uuid mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) p = Payload() evSubject = Subject().set_lang("EN").set_user_id("tester") t.complete_payload(p, None, None, evSubject) @@ -426,6 +429,7 @@ def test_complete_payload_event_subject( passed_nv_pairs = trackArgsTuple[0].nv_pairs expected = { + "tna": "namespace", "eid": _TEST_UUID, "dtm": 1618790401000, "tv": TRACKER_VERSION, @@ -446,7 +450,7 @@ def test_track_self_describing_event(self, mok_complete_payload: Any) -> None: mok_complete_payload.side_effect = mocked_complete_payload - t = Tracker(e, encode_base64=False) + t = Tracker("namespace", e, encode_base64=False) evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) t.track_self_describing_event(evJson) self.assertEqual(mok_complete_payload.call_count, 1) @@ -481,7 +485,7 @@ def test_track_self_describing_event_all_args( mok_complete_payload.side_effect = mocked_complete_payload - t = Tracker(e, encode_base64=False) + t = Tracker("namespace", e, encode_base64=False) evJson = SelfDescribingJson("test.schema", {"n": "v"}) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evContext = [ctx] @@ -519,7 +523,7 @@ def test_track_self_describing_event_encode( mok_complete_payload.side_effect = mocked_complete_payload - t = Tracker(e, encode_base64=True) + t = Tracker("namespace", e, encode_base64=True) evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) t.track_self_describing_event(evJson) self.assertEqual(mok_complete_payload.call_count, 1) @@ -537,7 +541,7 @@ def test_track_struct_event(self, mok_complete_payload: Any) -> None: mok_complete_payload.side_effect = mocked_complete_payload - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 t.track_struct_event( @@ -577,7 +581,7 @@ def test_track_page_view(self, mok_complete_payload: Any) -> None: mok_complete_payload.side_effect = mocked_complete_payload - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 t.track_page_view( @@ -609,7 +613,7 @@ def test_track_page_ping(self, mok_complete_payload: Any) -> None: mok_complete_payload.side_effect = mocked_complete_payload - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 t.track_page_ping( @@ -653,7 +657,7 @@ def test_track_ecommerce_transaction_item(self, mok_complete_payload: Any) -> No mok_complete_payload.side_effect = mocked_complete_payload - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 t.track_ecommerce_transaction_item( @@ -699,7 +703,7 @@ def test_track_ecommerce_transaction_no_items( mok_complete_payload.side_effect = mocked_complete_payload - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 t.track_ecommerce_transaction( @@ -750,7 +754,7 @@ def test_track_ecommerce_transaction_with_items( mok_complete_payload.side_effect = mocked_complete_payload mok_track_trans_item.side_effect = mocked_track_trans_item - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 transItems = [ @@ -839,7 +843,7 @@ def test_track_link_click(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -877,7 +881,7 @@ def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_link_click("example.com") @@ -901,7 +905,7 @@ def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -941,7 +945,7 @@ def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_add_to_cart("sku1234", 1) @@ -963,7 +967,7 @@ def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -1005,7 +1009,7 @@ def test_track_remove_from_cart_optional_none( mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_remove_from_cart("sku1234", 1) @@ -1027,7 +1031,7 @@ def test_track_form_change(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -1067,7 +1071,7 @@ def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_form_change("testFormId", "testElemId", "INPUT", "testValue") expected = { @@ -1093,7 +1097,7 @@ def test_track_form_submit(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 elems = [ @@ -1137,7 +1141,7 @@ def test_track_form_submit_invalid_element_type( mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 elems = [ @@ -1167,7 +1171,7 @@ def test_track_form_submit_invalid_element_type_disabled_contracts( mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 elems = [ @@ -1210,7 +1214,7 @@ def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_form_submit("testFormId") expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} @@ -1228,7 +1232,7 @@ def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_form_submit("testFormId", elements=[]) expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} @@ -1244,7 +1248,7 @@ def test_track_site_search(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -1275,7 +1279,7 @@ def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_site_search(["track", "search"]) expected = { @@ -1296,7 +1300,7 @@ def test_track_mobile_screen_view(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -1322,7 +1326,7 @@ def test_track_screen_view(self, mok_track_unstruct: Any) -> None: mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 20882a4f..95e79120 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -54,20 +54,20 @@ class Tracker: def __init__( self, + namespace: str, emitters: Union[List[EmitterProtocol], EmitterProtocol], subject: Optional[_subject.Subject] = None, - namespace: Optional[str] = None, app_id: Optional[str] = None, encode_base64: bool = DEFAULT_ENCODE_BASE64, json_encoder: Optional[JsonEncoderFunction] = None, ) -> None: """ + :param namespace: Identifier for the Tracker instance + :type namespace: string :param emitters: Emitters to which events will be sent :type emitters: list[>0](emitter) | emitter :param subject: Subject to be tracked :type subject: subject | None - :param namespace: Identifier for the Tracker instance - :type namespace: string_or_none :param app_id: Application ID :type app_id: string_or_none :param encode_base64: Whether JSONs in the payload should be base-64 encoded @@ -966,5 +966,5 @@ def add_emitter(self, emitter: EmitterProtocol) -> "Tracker": self.emitters.append(emitter) return self - def get_namespace(self): + def get_namespace(self) -> str: return self.standard_nv_pairs["tna"] From e4096d93b6a66a0a3fa0979d0ae618be21f85f20 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 25 Apr 2023 15:17:28 +0100 Subject: [PATCH 28/51] Track function to return event_id (close #338) PR #340 * track() returns event id * track_xxx()functions return tracker * Update tracker unit tests --- snowplow_tracker/test/unit/test_tracker.py | 12 ++- snowplow_tracker/tracker.py | 104 ++++++++++----------- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 0862fce5..8e6cb6bf 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -223,12 +223,14 @@ def test_track(self) -> None: t = Tracker("namespace", [e1, e2, e3]) - p = Payload({"test": "track"}) - t.track(p) + p = Payload({"eid": "event_id"}) + event_id = t.track(p) - e1.input.assert_called_once_with({"test": "track"}) - e2.input.assert_called_once_with({"test": "track"}) - e3.input.assert_called_once_with({"test": "track"}) + e1.input.assert_called_once_with({"eid": "event_id"}) + e2.input.assert_called_once_with({"eid": "event_id"}) + e3.input.assert_called_once_with({"eid": "event_id"}) + + self.assertEqual(event_id, "event_id") @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 @mock.patch("snowplow_tracker.Tracker.track") diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 95e79120..68315085 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -122,17 +122,19 @@ def get_timestamp(tstamp: Optional[float] = None) -> int: Tracking methods """ - def track(self, pb: payload.Payload) -> "Tracker": + def track(self, pb: payload.Payload) -> Optional[str]: """ - Send the payload to a emitter + Send the payload to a emitter. Returns the tracked event ID. :param pb: Payload builder :type pb: payload - :rtype: tracker + :rtype: String """ for emitter in self.emitters: emitter.input(pb.nv_pairs) - return self + + if "eid" in pb.nv_pairs.keys(): + return pb.nv_pairs["eid"] def complete_payload( self, @@ -140,7 +142,7 @@ def complete_payload( context: Optional[List[SelfDescribingJson]], tstamp: Optional[float], event_subject: Optional[_subject.Subject], - ) -> "Tracker": + ) -> Optional[str]: """ Called by all tracking events to add the standard name-value pairs to the Payload object irrespective of the tracked event. @@ -153,7 +155,7 @@ def complete_payload( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: String """ pb.add("eid", Tracker.get_uuid()) @@ -199,7 +201,7 @@ def track_page_view( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ non_empty_string(page_url) @@ -209,7 +211,8 @@ def track_page_view( pb.add("page", page_title) pb.add("refr", referrer) - return self.complete_payload(pb, context, tstamp, event_subject) + self.complete_payload(pb, context, tstamp, event_subject) + return self def track_page_ping( self, @@ -245,7 +248,7 @@ def track_page_ping( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ non_empty_string(page_url) @@ -259,7 +262,8 @@ def track_page_ping( pb.add("pp_miy", min_y) pb.add("pp_may", max_y) - return self.complete_payload(pb, context, tstamp, event_subject) + self.complete_payload(pb, context, tstamp, event_subject) + return self def track_link_click( self, @@ -289,7 +293,7 @@ def track_link_click( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ non_empty_string(target_url) @@ -308,9 +312,8 @@ def track_link_click( "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def track_add_to_cart( self, @@ -343,7 +346,7 @@ def track_add_to_cart( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ warn( "track_add_to_cart will be deprecated in future versions.", @@ -368,9 +371,8 @@ def track_add_to_cart( "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def track_remove_from_cart( self, @@ -403,7 +405,7 @@ def track_remove_from_cart( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ warn( "track_remove_from_cart will be deprecated in future versions.", @@ -428,9 +430,8 @@ def track_remove_from_cart( "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def track_form_change( self, @@ -463,7 +464,7 @@ def track_form_change( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ non_empty_string(form_id) one_of(node_name, FORM_NODE_NAMES) @@ -484,9 +485,8 @@ def track_form_change( "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def track_form_submit( self, @@ -510,7 +510,7 @@ def track_form_submit( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ non_empty_string(form_id) for element in elements or []: @@ -527,9 +527,8 @@ def track_form_submit( "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def track_site_search( self, @@ -556,7 +555,7 @@ def track_site_search( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ non_empty(terms) @@ -573,9 +572,8 @@ def track_site_search( "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def track_ecommerce_transaction_item( self, @@ -614,7 +612,7 @@ def track_ecommerce_transaction_item( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ warn( "track_ecommerce_transaction_item will be deprecated in future versions.", @@ -634,7 +632,8 @@ def track_ecommerce_transaction_item( pb.add("ti_qu", quantity) pb.add("ti_cu", currency) - return self.complete_payload(pb, context, tstamp, event_subject) + self.complete_payload(pb, context, tstamp, event_subject) + return self def track_ecommerce_transaction( self, @@ -679,7 +678,7 @@ def track_ecommerce_transaction( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ warn( "track_ecommerce_transaction will be deprecated in future versions.", @@ -734,7 +733,7 @@ def track_screen_view( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ warn( "track_screen_view will be deprecated in future versions. Please use track_mobile_screen_view.", @@ -752,9 +751,8 @@ def track_screen_view( screen_view_properties, ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def track_mobile_screen_view( self, @@ -790,7 +788,7 @@ def track_mobile_screen_view( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ screen_view_properties = {} @@ -816,9 +814,8 @@ def track_mobile_screen_view( "%s/screen_view/%s/1-0-0" % (MOBILE_SCHEMA_PATH, SCHEMA_TAG), screen_view_properties, ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def track_struct_event( self, @@ -850,7 +847,7 @@ def track_struct_event( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ non_empty_string(category) non_empty_string(action) @@ -863,7 +860,8 @@ def track_struct_event( pb.add("se_pr", property_) pb.add("se_va", value) - return self.complete_payload(pb, context, tstamp, event_subject) + self.complete_payload(pb, context, tstamp, event_subject) + return self def track_self_describing_event( self, @@ -883,7 +881,7 @@ def track_self_describing_event( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ envelope = SelfDescribingJson( @@ -895,7 +893,8 @@ def track_self_describing_event( pb.add("e", "ue") pb.add_json(envelope, self.encode_base64, "ue_px", "ue_pr", self.json_encoder) - return self.complete_payload(pb, context, tstamp, event_subject) + self.complete_payload(pb, context, tstamp, event_subject) + return self # Alias def track_unstruct_event( @@ -916,16 +915,15 @@ def track_unstruct_event( :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: tracker + :rtype: Tracker """ warn( "track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.", DeprecationWarning, stacklevel=2, ) - return self.track_self_describing_event( - event_json, context, tstamp, event_subject - ) + self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self def flush(self, is_async: bool = False) -> "Tracker": """ From 6af6add43bda1917c8eb2a85147230cc5f5b7379 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Fri, 28 Apr 2023 09:44:05 +0100 Subject: [PATCH 29/51] Fix namespace assignment in Snowplow API (close #341) PR #342 * Add named variable in tracker inititialisation * Add Snowplow demo app to CI --- .github/workflows/ci.yml | 6 ++++++ snowplow_tracker/snowplow.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 106a2585..5062c1ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,12 @@ jobs: cd tracker_api_example python app.py "localhost:9090" + - name: Snowplow Demo + run: | + cd examples + cd snowplow_api_example + python snowplow_app.py "localhost:9090" + - name: Coveralls uses: AndreMiras/coveralls-python-action@develop with: diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 953c1587..d824ed26 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -72,7 +72,7 @@ def create_tracker( raise TypeError("Emitter or Collector URL must be provided") emitter = Emitter( - endpoint, + endpoint=endpoint, method=method, batch_size=emitter_config.batch_size, on_success=emitter_config.on_success, @@ -85,8 +85,8 @@ def create_tracker( ) tracker = Tracker( - emitter, namespace=namespace, + emitters=emitter, app_id=app_id, subject=subject, encode_base64=tracker_config.encode_base64, From b55c30205399b415e8dae7cb748d66ddb96c2ac7 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Thu, 8 Jun 2023 15:21:24 +0100 Subject: [PATCH 30/51] Refactor track_xxx() methods (close #343) PR #346 * Add Event class * Add PagePing Event * Add Pageview Event * Add SelfDescribing Event * Add StructEvent * Add ScreenView event * Add Events unit tests * Update example apps with new API * Add deprecation warnings to track_xxx functions --- examples/redis_example/redis_app.py | 35 +- examples/snowplow_api_example/snowplow_app.py | 33 +- examples/tracker_api_example/app.py | 43 +- setup.py | 5 +- snowplow_tracker/__init__.py | 8 + snowplow_tracker/constants.py | 27 + snowplow_tracker/events/__init__.py | 22 + snowplow_tracker/events/event.py | 134 ++++ snowplow_tracker/events/page_ping.py | 155 +++++ snowplow_tracker/events/page_view.py | 95 +++ snowplow_tracker/events/screen_view.py | 199 ++++++ snowplow_tracker/events/self_describing.py | 98 +++ snowplow_tracker/events/structured_event.py | 134 ++++ snowplow_tracker/self_describing_json.py | 10 + .../test/integration/test_integration.py | 12 +- snowplow_tracker/test/unit/test_event.py | 72 ++ snowplow_tracker/test/unit/test_tracker.py | 625 ++++++++---------- snowplow_tracker/tracker.py | 518 ++++++++------- 18 files changed, 1612 insertions(+), 613 deletions(-) create mode 100644 snowplow_tracker/constants.py create mode 100644 snowplow_tracker/events/__init__.py create mode 100644 snowplow_tracker/events/event.py create mode 100644 snowplow_tracker/events/page_ping.py create mode 100644 snowplow_tracker/events/page_view.py create mode 100644 snowplow_tracker/events/screen_view.py create mode 100644 snowplow_tracker/events/self_describing.py create mode 100644 snowplow_tracker/events/structured_event.py create mode 100644 snowplow_tracker/test/unit/test_event.py diff --git a/examples/redis_example/redis_app.py b/examples/redis_example/redis_app.py index a4ac1e77..553a547f 100644 --- a/examples/redis_example/redis_app.py +++ b/examples/redis_example/redis_app.py @@ -1,4 +1,12 @@ -from snowplow_tracker import Tracker +from snowplow_tracker import ( + Tracker, + ScreenView, + PagePing, + PageView, + SelfDescribing, + StructuredEvent, + SelfDescribingJson, +) from snowplow_tracker.typing import PayloadDict import json import redis @@ -51,9 +59,28 @@ def main(): t = Tracker(namespace="snowplow_tracker", emitters=emitter) - t.track_page_view("https://www.snowplow.io", "Homepage") - t.track_page_ping("https://www.snowplow.io", "Homepage") - t.track_link_click("https://www.snowplow.io") + page_view = PageView(page_url="https://www.snowplow.io", page_title="Homepage") + t.track(page_view) + + page_ping = PagePing(page_url="https://www.snowplow.io", page_title="Homepage") + t.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ) + ) + t.track(link_click) + + id = t.get_uuid() + screen_view = ScreenView(id_=id, name="name") + t.track(screen_view) + + struct_event = StructuredEvent( + category="shop", action="add-to-basket", property_="pcs", value=2 + ) + t.track(struct_event) if __name__ == "__main__": diff --git a/examples/snowplow_api_example/snowplow_app.py b/examples/snowplow_api_example/snowplow_app.py index 9cc9f093..1bbd21c8 100644 --- a/examples/snowplow_api_example/snowplow_app.py +++ b/examples/snowplow_api_example/snowplow_app.py @@ -5,6 +5,11 @@ Subject, TrackerConfiguration, SelfDescribingJson, + PagePing, + PageView, + ScreenView, + SelfDescribing, + StructuredEvent, ) @@ -15,11 +20,12 @@ def get_url_from_args(): def main(): - collector_url = get_url_from_args() # Configure Emitter custom_retry_codes = {500: False, 401: True} - emitter_config = EmitterConfiguration(batch_size=5, custom_retry_codes=custom_retry_codes) + emitter_config = EmitterConfiguration( + batch_size=5, custom_retry_codes=custom_retry_codes + ) # Configure Tracker tracker_config = TrackerConfiguration(encode_base64=True) @@ -39,19 +45,28 @@ def main(): tracker = Snowplow.get_tracker("ns") - tracker.track_page_view("https://www.snowplow.io", "Homepage") - tracker.track_page_ping("https://www.snowplow.io", "Homepage") - tracker.track_link_click("https://www.snowplow.io/about") - tracker.track_page_view("https://www.snowplow.io/about", "About") + page_view = PageView(page_url="https://www.snowplow.io", page_title="Homepage") + tracker.track(page_view) + + page_ping = PagePing(page_url="https://www.snowplow.io", page_title="Homepage") + tracker.track(page_ping) - tracker.track_self_describing_event( + link_click = SelfDescribing( SelfDescribingJson( "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", - {"targetUrl": "example.com"}, + {"targetUrl": "https://www.snowplow.io"}, ) ) - tracker.track_struct_event("shop", "add-to-basket", None, "pcs", 2) + tracker.track(link_click) + id = tracker.get_uuid() + screen_view = ScreenView(id_=id, name="name") + tracker.track(screen_view) + + struct_event = StructuredEvent( + category="shop", action="add-to-basket", property_="pcs", value=2 + ) + tracker.track(struct_event) tracker.flush() diff --git a/examples/tracker_api_example/app.py b/examples/tracker_api_example/app.py index 2483d509..3777a59c 100644 --- a/examples/tracker_api_example/app.py +++ b/examples/tracker_api_example/app.py @@ -4,6 +4,11 @@ Emitter, Subject, SelfDescribingJson, + PageView, + PagePing, + SelfDescribing, + ScreenView, + StructuredEvent, ) import sys @@ -26,17 +31,41 @@ def main(): print("Sending events to " + e.endpoint) - t.track_page_view("https://www.snowplow.io", "Homepage") - t.track_page_ping("https://www.snowplow.io", "Homepage") - t.track_link_click("https://www.snowplow.io") + page_view = PageView( + page_url="https://www.snowplow.io", + page_title="Homepage", + event_subject=t.subject, + ) + t.track(page_view) + + page_ping = PagePing( + page_url="https://www.snowplow.io", + page_title="Homepage", + event_subject=t.subject, + ) + t.track(page_ping) - t.track_self_describing_event( + link_click = SelfDescribing( SelfDescribingJson( "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", - {"targetUrl": "example.com"}, - ) + {"targetUrl": "https://www.snowplow.io"}, + ), + event_subject=t.subject, + ) + t.track(link_click) + + id = t.get_uuid() + screen_view = ScreenView(id_=id, name="name", event_subject=t.subject) + t.track(screen_view) + + struct_event = StructuredEvent( + category="shop", + action="add-to-basket", + property_="pcs", + value=2, + event_subject=t.subject, ) - t.track_struct_event("shop", "add-to-basket", None, "pcs", 2) + t.track(struct_event) t.flush() diff --git a/setup.py b/setup.py index 2510efa9..b67c8f3d 100644 --- a/setup.py +++ b/setup.py @@ -36,10 +36,7 @@ version="0.15.0", author=authors_str, author_email=authors_email_str, - packages=[ - "snowplow_tracker", - "snowplow_tracker.test", - ], + packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], url="http://snowplow.io", license="Apache License 2.0", description="Snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games", diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index 8745b370..689b2539 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -8,3 +8,11 @@ from snowplow_tracker.snowplow import Snowplow from snowplow_tracker.contracts import disable_contracts, enable_contracts from snowplow_tracker.event_store import EventStore +from snowplow_tracker.events import ( + Event, + PageView, + PagePing, + SelfDescribing, + StructuredEvent, + ScreenView, +) diff --git a/snowplow_tracker/constants.py b/snowplow_tracker/constants.py new file mode 100644 index 00000000..579ff86e --- /dev/null +++ b/snowplow_tracker/constants.py @@ -0,0 +1,27 @@ +# """ +# constants.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from typing import List +from snowplow_tracker import _version, SelfDescribingJson + +VERSION = "py-%s" % _version.__version__ +DEFAULT_ENCODE_BASE64 = True +BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" +MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" +SCHEMA_TAG = "jsonschema" +CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) +UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) +ContextArray = List[SelfDescribingJson] diff --git a/snowplow_tracker/events/__init__.py b/snowplow_tracker/events/__init__.py new file mode 100644 index 00000000..0f75c84f --- /dev/null +++ b/snowplow_tracker/events/__init__.py @@ -0,0 +1,22 @@ +# """ +# __init__.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from snowplow_tracker.events.page_ping import PagePing +from snowplow_tracker.events.page_view import PageView +from snowplow_tracker.events.self_describing import SelfDescribing +from snowplow_tracker.events.structured_event import StructuredEvent +from snowplow_tracker.events.screen_view import ScreenView diff --git a/snowplow_tracker/events/event.py b/snowplow_tracker/events/event.py new file mode 100644 index 00000000..7f510ba3 --- /dev/null +++ b/snowplow_tracker/events/event.py @@ -0,0 +1,134 @@ +# """ +# event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional, List +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject + +from snowplow_tracker.self_describing_json import SelfDescribingJson + +from snowplow_tracker.constants import CONTEXT_SCHEMA +from snowplow_tracker.typing import JsonEncoderFunction, PayloadDict + + +class Event(object): + """ + Event class which contains + elements that can be set in all events. These are context, trueTimestamp, and Subject. + + Context is a list of custom SelfDescribingJson entities. + TrueTimestamp is a user-defined timestamp. + Subject is an event-specific Subject. Its fields will override those of the + Tracker-associated Subject, if present. + + """ + + def __init__( + self, + dict_: Optional[PayloadDict] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + Constructor + :param dict_: Optional Dictionary to be added to the Events Payload + :type dict_: dict(string:\\*) | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + + """ + self.payload = payload.Payload(dict_=dict_) + self.event_subject = event_subject + self.context = context or [] + self.true_timestamp = true_timestamp + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + if len(self.context) > 0: + context_jsons = list(map(lambda c: c.to_json(), self.context)) + context_envelope = SelfDescribingJson( + CONTEXT_SCHEMA, context_jsons + ).to_json() + self.payload.add_json( + context_envelope, encode_base64, "cx", "co", json_encoder + ) + + if isinstance( + self.true_timestamp, + ( + int, + float, + ), + ): + self.payload.add("ttm", int(self.true_timestamp)) + + fin_subject = self.event_subject if self.event_subject is not None else subject + + if fin_subject is not None: + self.payload.add_dict(fin_subject.standard_nv_pairs) + return self.payload + + @property + def event_subject(self) -> Optional[Subject]: + """ + Optional per event subject + """ + return self._event_subject + + @event_subject.setter + def event_subject(self, value: Optional[Subject]): + self._event_subject = value + + @property + def context(self) -> List[SelfDescribingJson]: + """ + Custom context for the event + """ + return self._context + + @context.setter + def context(self, value: List[SelfDescribingJson]): + self._context = value + + @property + def true_timestamp(self) -> Optional[float]: + """ + Optional event timestamp in milliseconds + """ + return self._true_timestamp + + @true_timestamp.setter + def true_timestamp(self, value: Optional[float]): + self._true_timestamp = value diff --git a/snowplow_tracker/events/page_ping.py b/snowplow_tracker/events/page_ping.py new file mode 100644 index 00000000..b0084797 --- /dev/null +++ b/snowplow_tracker/events/page_ping.py @@ -0,0 +1,155 @@ +# """ +# page_ping.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string + + +class PagePing(Event): + """ + Constructs a PagePing event object. + + When tracked, generates a "pp" or "page_ping" event. + + """ + + def __init__( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(PagePing, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "pp") + self.page_url = page_url + self.page_title = page_title + self.referrer = referrer + self.min_x = min_x + self.max_x = max_x + self.min_y = min_y + self.max_y = max_y + + @property + def page_url(self) -> str: + """ + URL of the viewed page + """ + return self.payload.get("url") + + @page_url.setter + def page_url(self, value: str): + non_empty_string(value) + self.payload.add("url", value) + + @property + def page_title(self) -> Optional[str]: + """ + URL of the viewed page + """ + return self.payload.get("page") + + @page_title.setter + def page_title(self, value: Optional[str]): + self.payload.add("page", value) + + @property + def referrer(self) -> Optional[str]: + """ + The referrer of the page + """ + return self.payload.get("refr") + + @referrer.setter + def referrer(self, value: Optional[str]): + self.payload.add("refr", value) + + @property + def min_x(self) -> Optional[int]: + """ + Minimum page x offset seen in the last ping period + """ + return self.payload.get("pp_mix") + + @min_x.setter + def min_x(self, value: Optional[int]): + self.payload.add("pp_mix", value) + + @property + def max_x(self) -> Optional[int]: + """ + Maximum page x offset seen in the last ping period + """ + return self.payload.get("pp_max") + + @max_x.setter + def max_x(self, value: Optional[int]): + self.payload.add("pp_max", value) + + @property + def min_y(self) -> Optional[int]: + """ + Minimum page y offset seen in the last ping period + """ + return self.payload.get("pp_miy") + + @min_y.setter + def min_y(self, value: Optional[int]): + self.payload.add("pp_miy", value) + + @property + def max_y(self) -> Optional[int]: + """ + Maximum page y offset seen in the last ping period + """ + return self.payload.get("pp_may") + + @max_y.setter + def max_y(self, value: Optional[int]): + self.payload.add("pp_may", value) diff --git a/snowplow_tracker/events/page_view.py b/snowplow_tracker/events/page_view.py new file mode 100644 index 00000000..dc9ca51f --- /dev/null +++ b/snowplow_tracker/events/page_view.py @@ -0,0 +1,95 @@ +# """ +# page_view.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.contracts import non_empty_string + + +class PageView(Event): + """ + Constructs a PageView event object. + + When tracked, generates a "pv" or "page_view" event. + + """ + + def __init__( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(PageView, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "pv") + self.page_url = page_url + self.page_title = page_title + self.referrer = referrer + + @property + def page_url(self) -> str: + """ + URL of the viewed page + """ + return self.payload.get("url") + + @page_url.setter + def page_url(self, value: str): + non_empty_string(value) + self.payload.add("url", value) + + @property + def page_title(self) -> Optional[str]: + """ + Title of the viewed page + """ + return self.payload.get("page") + + @page_title.setter + def page_title(self, value: Optional[str]): + self.payload.add("page", value) + + @property + def referrer(self) -> Optional[str]: + """ + The referrer of the page + """ + return self.payload.get("refr") + + @referrer.setter + def referrer(self, value: Optional[str]): + self.payload.add("refr", value) diff --git a/snowplow_tracker/events/screen_view.py b/snowplow_tracker/events/screen_view.py new file mode 100644 index 00000000..d0cea5d0 --- /dev/null +++ b/snowplow_tracker/events/screen_view.py @@ -0,0 +1,199 @@ +# """ +# screen_view.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional, List +from snowplow_tracker.typing import JsonEncoderFunction +from snowplow_tracker.events.event import Event +from snowplow_tracker.events.self_describing import SelfDescribing +from snowplow_tracker import SelfDescribingJson +from snowplow_tracker.constants import ( + MOBILE_SCHEMA_PATH, + SCHEMA_TAG, +) +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string + + +class ScreenView(Event): + """ + Constructs a ScreenView event object. + + When tracked, generates a SelfDescribing event (event type "ue"). + + Schema: `iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0` + """ + + def __init__( + self, + id_: str, + name: str, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param id_: Screen view ID. This must be of type UUID. + :type id_: string + :param name: The name of the screen view event + :type name: string + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(ScreenView, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.screen_view_properties = {} + self.id_ = id_ + self.name = name + self.type = type + self.previous_name = previous_name + self.previous_id = previous_id + self.previous_type = previous_type + self.transition_type = transition_type + + @property + def id_(self) -> str: + """ + Screen view ID. This must be of type UUID. + """ + return self.screen_view_properties["id"] + + @id_.setter + def id_(self, value: str): + non_empty_string(value) + self.screen_view_properties["id"] = value + + @property + def name(self) -> str: + """ + The name of the screen view event + """ + return self.screen_view_properties["name"] + + @name.setter + def name(self, value: str): + non_empty_string(value) + self.screen_view_properties["name"] = value + + @property + def type(self) -> Optional[str]: + """ + The type of screen that was viewed e.g feed / carousel + """ + return self.screen_view_properties["type"] + + @type.setter + def type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["type"] = value + + @property + def previous_name(self) -> Optional[str]: + """ + The name of the previous screen. + """ + return self.screen_view_properties["previousName"] + + @previous_name.setter + def previous_name(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousName"] = value + + @property + def previous_id(self) -> Optional[str]: + """ + The screenview ID of the previous screenview. + """ + return self.screen_view_properties["previousId"] + + @previous_id.setter + def previous_id(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousId"] = value + + @property + def previous_type(self) -> Optional[str]: + """ + The screen type of the previous screenview + """ + return self.screen_view_properties["previousType"] + + @previous_type.setter + def previous_type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousType"] = value + + @property + def transition_type(self) -> Optional[str]: + """ + The type of transition that led to the screen being viewed + """ + return self.screen_view_properties["transitionType"] + + @transition_type.setter + def transition_type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["transitionType"] = value + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (MOBILE_SCHEMA_PATH, SCHEMA_TAG), + self.screen_view_properties, + ) + self_describing = SelfDescribing( + event_json=event_json, + event_subject=self.event_subject, + context=self.context, + true_timestamp=self.true_timestamp, + ) + return self_describing.build_payload( + encode_base64, json_encoder, subject=subject + ) diff --git a/snowplow_tracker/events/self_describing.py b/snowplow_tracker/events/self_describing.py new file mode 100644 index 00000000..e560eb72 --- /dev/null +++ b/snowplow_tracker/events/self_describing.py @@ -0,0 +1,98 @@ +# """ +# self_describing.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from typing import Optional, List +from snowplow_tracker.typing import JsonEncoderFunction +from snowplow_tracker.events.event import Event +from snowplow_tracker import SelfDescribingJson +from snowplow_tracker.constants import UNSTRUCT_EVENT_SCHEMA +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty + + +class SelfDescribing(Event): + """ + Constructs a SelfDescribing event object. + + This is a customisable event type which allows you to track anything describable + by a JsonSchema. + + When tracked, generates a self-describing event (event type "ue"). + """ + + def __init__( + self, + event_json: SelfDescribingJson, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(SelfDescribing, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "ue") + self.event_json = event_json + + @property + def event_json(self) -> SelfDescribingJson: + """ + The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + """ + return self._event_json + + @event_json.setter + def event_json(self, value: SelfDescribingJson): + self._event_json = value + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + + envelope = SelfDescribingJson( + UNSTRUCT_EVENT_SCHEMA, self.event_json.to_json() + ).to_json() + self.payload.add_json(envelope, encode_base64, "ue_px", "ue_pr", json_encoder) + + return super(SelfDescribing, self).build_payload( + encode_base64=encode_base64, json_encoder=json_encoder, subject=subject + ) diff --git a/snowplow_tracker/events/structured_event.py b/snowplow_tracker/events/structured_event.py new file mode 100644 index 00000000..a92acc0f --- /dev/null +++ b/snowplow_tracker/events/structured_event.py @@ -0,0 +1,134 @@ +# """ +# struct_event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.contracts import non_empty_string + + +class StructuredEvent(Event): + """ + Constructs a Structured event object. + + This event type is provided to be roughly equivalent to Google Analytics-style events. + Note that it is not automatically clear what data should be placed in what field. + To aid data quality and modeling, agree on business-wide definitions when designing + your tracking strategy. + + We recommend using SelfDescribing - fully custom - events instead. + + When tracked, generates a "struct" or "se" event. + """ + + def __init__( + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[int] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(StructuredEvent, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "se") + self.category = category + self.action = action + self.label = label + self.property_ = property_ + self.value = value + + @property + def category(self) -> Optional[str]: + """ + Category of the event + """ + return self.payload.get("se_ca") + + @category.setter + def category(self, value: Optional[str]): + non_empty_string(value) + self.payload.add("se_ca", value) + + @property + def action(self) -> Optional[str]: + """ + The event itself + """ + return self.payload.get("se_ac") + + @action.setter + def action(self, value: Optional[str]): + non_empty_string(value) + self.payload.add("se_ac", value) + + @property + def label(self) -> Optional[str]: + """ + Refer to the object the action is performed on + """ + return self.payload.get("se_la") + + @label.setter + def label(self, value: Optional[str]): + self.payload.add("se_la", value) + + @property + def property_(self) -> Optional[str]: + """ + Property associated with either the action or the object + """ + return self.payload.get("se_pr") + + @property_.setter + def property_(self, value: Optional[str]): + self.payload.add("se_pr", value) + + @property + def value(self) -> Optional[int]: + """ + A value associated with the user action + """ + return self.payload.get("se_va") + + @value.setter + def value(self, value: Optional[int]): + self.payload.add("se_va", value) diff --git a/snowplow_tracker/self_describing_json.py b/snowplow_tracker/self_describing_json.py index b386c039..8f7b65ea 100644 --- a/snowplow_tracker/self_describing_json.py +++ b/snowplow_tracker/self_describing_json.py @@ -19,6 +19,7 @@ from typing import Union from snowplow_tracker.typing import PayloadDict, PayloadDictList +from snowplow_tracker.contracts import non_empty_string class SelfDescribingJson(object): @@ -26,6 +27,15 @@ def __init__(self, schema: str, data: Union[PayloadDict, PayloadDictList]) -> No self.schema = schema self.data = data + @property + def schema(self) -> str: + return self._schema + + @schema.setter + def schema(self, value: str): + non_empty_string(value) + self._schema = value + def to_json(self) -> PayloadDict: return {"schema": self.schema, "data": self.data} diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index f9323a1c..a95e10a8 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -105,14 +105,15 @@ def test_integration_ecommerce_transaction(self) -> None: t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction( - "6a8078be", - 35, + order_id="6a8078be", + total_value=35, city="London", currency="GBP", items=[ {"sku": "pbz0026", "price": 20, "quantity": 1}, {"sku": "pbz0038", "price": 15, "quantity": 1}, ], + tstamp=1399021242240, ) expected_fields = { @@ -161,7 +162,7 @@ def test_integration_mobile_screen_view(self) -> None: "namespace", [get_emitter], default_subject, encode_base64=False ) with HTTMock(pass_response_content): - t.track_mobile_screen_view("534", "Game HUD 2") + t.track_mobile_screen_view(id_="534", name="Game HUD 2") expected_fields = {"e": "ue"} for key in expected_fields: self.assertEqual( @@ -520,6 +521,7 @@ def test_bytelimit(self) -> None: t.track_struct_event("Test", "A") # 322 bytes t.track_struct_event("Test", "A") # 483 bytes. Send t.track_struct_event("Test", "AA") # 162 + self.assertEqual(len(querystrings[-1]["data"]), 3) self.assertEqual(default_emitter.bytes_queued, 156 + len(_version.__version__)) @@ -548,7 +550,7 @@ def test_unicode_get(self) -> None: self.assertEqual(actual_a, unicode_a) uepr_string = unquote_plus(from_querystring("ue_pr", querystrings[-1])) - actual_b = json.loads(uepr_string)["data"]["data"]["id"] + actual_b = json.loads(uepr_string)["data"]["data"]["name"] self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: @@ -571,5 +573,5 @@ def test_unicode_post(self) -> None: self.assertEqual(in_test_ctx, unicode_a) sv_event = querystrings[-1] - in_uepr_name = json.loads(sv_event["data"][0]["ue_pr"])["data"]["data"]["id"] + in_uepr_name = json.loads(sv_event["data"][0]["ue_pr"])["data"]["data"]["name"] self.assertEqual(in_uepr_name, unicode_b) diff --git a/snowplow_tracker/test/unit/test_event.py b/snowplow_tracker/test/unit/test_event.py new file mode 100644 index 00000000..e50da98d --- /dev/null +++ b/snowplow_tracker/test/unit/test_event.py @@ -0,0 +1,72 @@ +# """ +# test_event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import json +import unittest +from snowplow_tracker.events import Event +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson + +CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" + + +class TestEvent(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event = Event() + self.assertEqual(event.payload.nv_pairs, {}) + + def test_build_payload(self): + event_subject = Subject() + event = Event(event_subject=event_subject) + payload = event.build_payload(encode_base64=None, json_encoder=None) + + self.assertEqual(payload.nv_pairs, {"p": "pc"}) + + def test_build_payload_tstamp(self): + event_subject = Subject() + tstamp = 1399021242030 + + event = Event(event_subject=event_subject, true_timestamp=tstamp) + + payload = event.build_payload( + json_encoder=None, + encode_base64=None, + ) + + self.assertEqual(payload.nv_pairs, {"p": "pc", "ttm": 1399021242030}) + + def test_build_payload_context(self): + event_subject = Subject() + context = SelfDescribingJson("test.context.schema", {"user": "tester"}) + event_context = [context] + event = Event(event_subject=event_subject, context=event_context) + + payload = event.build_payload( + json_encoder=None, + encode_base64=False, + ) + + expected_context = { + "schema": CONTEXT_SCHEMA, + "data": [{"schema": "test.context.schema", "data": {"user": "tester"}}], + } + actual_context = json.loads(payload.nv_pairs["co"]) + + self.assertDictEqual(actual_context, expected_context) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 8e6cb6bf..8eaf9894 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -21,7 +21,7 @@ import unittest.mock as mock from freezegun import freeze_time -from typing import Any +from typing import Any, Optional from snowplow_tracker.contracts import disable_contracts, enable_contracts from snowplow_tracker.tracker import Tracker @@ -29,6 +29,7 @@ from snowplow_tracker.subject import Subject from snowplow_tracker.payload import Payload from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.events import Event, SelfDescribing, ScreenView UNSTRUCT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0" CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" @@ -57,11 +58,21 @@ def mocked_uuid() -> str: return _TEST_UUID -def mocked_track(pb: Any) -> None: +def mocked_track( + event: Any, + context: Optional[Any] = None, + tstamp: Optional[Any] = None, + event_subject: Optional[Any] = None, +) -> None: pass -def mocked_complete_payload(*args: Any, **kwargs: Any) -> None: +def mocked_complete_payload( + event: Any, + event_subject: Optional[Any], + context: Optional[Any], + tstamp: Optional[Any], +) -> Payload: pass @@ -223,122 +234,49 @@ def test_track(self) -> None: t = Tracker("namespace", [e1, e2, e3]) - p = Payload({"eid": "event_id"}) - event_id = t.track(p) + mok_event = self.create_patch("snowplow_tracker.events.Event") + t.track(mok_event) + mok_payload = mok_event.build_payload().nv_pairs - e1.input.assert_called_once_with({"eid": "event_id"}) - e2.input.assert_called_once_with({"eid": "event_id"}) - e3.input.assert_called_once_with({"eid": "event_id"}) - - self.assertEqual(event_id, "event_id") + e1.input.assert_called_once_with(mok_payload) + e2.input.assert_called_once_with(mok_payload) + e3.input.assert_called_once_with(mok_payload) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload(self, mok_uuid: Any, mok_track: Any) -> None: - mokEmitter = self.create_patch("snowplow_tracker.Emitter") - e = mokEmitter() - + def test_complete_payload(self, mok_uuid: Any) -> None: mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - - t = Tracker("namespace", e) - p = Payload() - t.complete_payload(p, None, None, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs - - expected = { - "eid": _TEST_UUID, - "dtm": 1618790401000, - "tv": TRACKER_VERSION, - "p": "pc", - "tna": "namespace", - } - self.assertDictEqual(passed_nv_pairs, expected) - @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") - @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_tstamp_int(self, mok_uuid: Any, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - t = Tracker("namespace", e) - p = Payload() - time_in_millis = 100010001000 - t.complete_payload(p, None, time_in_millis, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + s = Subject() + event = Event(event_subject=s) + payload = t.complete_payload(event).nv_pairs expected = { - "tna": "namespace", "eid": _TEST_UUID, "dtm": 1618790401000, - "ttm": time_in_millis, "tv": TRACKER_VERSION, "p": "pc", - } - self.assertDictEqual(passed_nv_pairs, expected) - - @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") - @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_tstamp_dtm(self, mok_uuid: Any, mok_track: Any) -> None: - mokEmitter = self.create_patch("snowplow_tracker.Emitter") - e = mokEmitter() - - mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - - t = Tracker("namespace", e) - p = Payload() - time_in_millis = 100010001000 - t.complete_payload(p, None, time_in_millis, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs - - expected = { "tna": "namespace", - "eid": _TEST_UUID, - "dtm": 1618790401000, - "ttm": time_in_millis, - "tv": TRACKER_VERSION, - "p": "pc", } - self.assertDictEqual(passed_nv_pairs, expected) + self.assertDictEqual(payload, expected) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_tstamp_ttm(self, mok_uuid: Any, mok_track: Any) -> None: + def test_complete_payload_tstamp(self, mok_uuid: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - t = Tracker("namespace", e) - p = Payload() + s = Subject() time_in_millis = 100010001000 - t.complete_payload(p, None, time_in_millis, None) + event = Event(true_timestamp=time_in_millis, event_subject=s) - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + payload = t.complete_payload(event=event).nv_pairs expected = { "tna": "namespace", @@ -348,30 +286,23 @@ def test_complete_payload_tstamp_ttm(self, mok_uuid: Any, mok_track: Any) -> Non "tv": TRACKER_VERSION, "p": "pc", } - self.assertDictEqual(passed_nv_pairs, expected) + self.assertDictEqual(payload, expected) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_co(self, mok_uuid: Any, mok_track: Any) -> None: + def test_complete_payload_co(self, mok_uuid: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track t = Tracker("namespace", e, encode_base64=False) - p = Payload() geo_ctx = SelfDescribingJson(geoSchema, geoData) mov_ctx = SelfDescribingJson(movSchema, movData) ctx_array = [geo_ctx, mov_ctx] - t.complete_payload(p, ctx_array, None, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + event = Event(context=ctx_array) + payload = t.complete_payload(event=event).nv_pairs expected_co = { "schema": CONTEXT_SCHEMA, @@ -380,55 +311,39 @@ def test_complete_payload_co(self, mok_uuid: Any, mok_track: Any) -> None: {"schema": movSchema, "data": movData}, ], } - self.assertIn("co", passed_nv_pairs) - self.assertDictEqual(json.loads(passed_nv_pairs["co"]), expected_co) + self.assertIn("co", payload) + self.assertDictEqual(json.loads(payload["co"]), expected_co) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_cx(self, mok_uuid: Any, mok_track: Any) -> None: + def test_complete_payload_cx(self, mok_uuid: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track t = Tracker("namespace", e, encode_base64=True) - p = Payload() geo_ctx = SelfDescribingJson(geoSchema, geoData) mov_ctx = SelfDescribingJson(movSchema, movData) ctx_array = [geo_ctx, mov_ctx] - t.complete_payload(p, ctx_array, None, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + event = Event(context=ctx_array) + payload = t.complete_payload(event=event).nv_pairs - self.assertIn("cx", passed_nv_pairs) + self.assertIn("cx", payload) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_event_subject( - self, mok_uuid: Any, mok_track: Any - ) -> None: + def test_complete_payload_event_subject(self, mok_uuid: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track t = Tracker("namespace", e) - p = Payload() - evSubject = Subject().set_lang("EN").set_user_id("tester") - t.complete_payload(p, None, None, evSubject) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + event_subject = Subject().set_lang("EN").set_user_id("tester") + event = Event(event_subject=event_subject) + payload = t.complete_payload(event=event).nv_pairs expected = { "tna": "namespace", @@ -439,72 +354,70 @@ def test_complete_payload_event_subject( "lang": "EN", "uid": "tester", } - self.assertDictEqual(passed_nv_pairs, expected) + self.assertDictEqual(payload, expected) ### # test track_x methods ### - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_self_describing_event(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track t = Tracker("namespace", e, encode_base64=False) - evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) - t.track_self_describing_event(evJson) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + event_json = SelfDescribingJson("test.sde.schema", {"n": "v"}) + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json) + self.assertEqual(mok_track.call_count, 1) + + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) # payload - actualPayloadArg = completeArgsList[0] - actualPairs = actualPayloadArg.nv_pairs - actualUePr = json.loads(actualPairs["ue_pr"]) - # context - actualContextArg = completeArgsList[1] - # tstamp - actualTstampArg = completeArgsList[2] + actual_ue_pr = json.loads(actual_pairs["ue_pr"]) expectedUePr = { "data": {"data": {"n": "v"}, "schema": "test.sde.schema"}, "schema": UNSTRUCT_SCHEMA, } - self.assertDictEqual(actualUePr, expectedUePr) - self.assertEqual(actualPairs["e"], "ue") - self.assertTrue(actualContextArg is None) - self.assertTrue(actualTstampArg is None) + self.assertDictEqual(actual_ue_pr, expectedUePr) + self.assertEqual(actual_pairs["e"], "ue") - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_self_describing_event_all_args( - self, mok_complete_payload: Any - ) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event_all_args(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track t = Tracker("namespace", e, encode_base64=False) - evJson = SelfDescribingJson("test.schema", {"n": "v"}) + event_json = SelfDescribingJson("test.schema", {"n": "v"}) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evContext = [ctx] - evTstamp = 1399021242030 - t.track_self_describing_event(evJson, evContext, evTstamp) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + event_context = [ctx] + event_tstamp = 1399021242030 + + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json, event_context, event_tstamp) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) # payload - actualPayloadArg = completeArgsList[0] - actualPairs = actualPayloadArg.nv_pairs - actualUePr = json.loads(actualPairs["ue_pr"]) - # context - actualContextArg = completeArgsList[1] - # tstamp - actualTstampArg = completeArgsList[2] + actualUePr = json.loads(actual_pairs["ue_pr"]) expectedUePr = { "data": {"data": {"n": "v"}, "schema": "test.schema"}, @@ -512,40 +425,40 @@ def test_track_self_describing_event_all_args( } self.assertDictEqual(actualUePr, expectedUePr) - self.assertEqual(actualPairs["e"], "ue") - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) + self.assertEqual(actual_pairs["e"], "ue") - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_self_describing_event_encode( - self, mok_complete_payload: Any - ) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event_encode(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track t = Tracker("namespace", e, encode_base64=True) - evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) - t.track_self_describing_event(evJson) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + event_json = SelfDescribingJson("test.sde.schema", {"n": "v"}) - actualPayloadArg = completeArgsList[0] - actualPairs = actualPayloadArg.nv_pairs - self.assertTrue("ue_px" in actualPairs.keys()) + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_struct_event(self, mok_complete_payload: Any) -> None: + t.track_self_describing_event(event_json) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + self.assertTrue("ue_px" in actual_pairs.keys()) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_struct_event(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 + ev_tstamp = 1399021242030 t.track_struct_event( "Mixes", "Play", @@ -553,18 +466,16 @@ def test_track_struct_event(self, mok_complete_payload: Any) -> None: "TestProp", value=3.14, context=[ctx], - tstamp=evTstamp, + tstamp=ev_tstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] - actualPairs = actualPayloadArg.nv_pairs + actual_payload_arg = complete_args_dict["event"].payload + actual_pairs = actual_payload_arg.nv_pairs - expectedPairs = { + expected_pairs = { "e": "se", "se_ca": "Mixes", "se_ac": "Play", @@ -572,31 +483,31 @@ def test_track_struct_event(self, mok_complete_payload: Any) -> None: "se_pr": "TestProp", "se_va": 3.14, } - self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) + self.assertDictEqual(actual_pairs, expected_pairs) - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_page_view(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_page_view(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 + ev_tstamp = 1399021242030 t.track_page_view( - "example.com", "Example", "docs.snowplow.io", context=[ctx], tstamp=evTstamp + "example.com", + "Example", + "docs.snowplow.io", + context=[ctx], + tstamp=ev_tstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] - actualPairs = actualPayloadArg.nv_pairs + actual_payload_arg = complete_args_dict["event"].payload + actualPairs = actual_payload_arg.nv_pairs expectedPairs = { "e": "pv", @@ -605,19 +516,17 @@ def test_track_page_view(self, mok_complete_payload: Any) -> None: "refr": "docs.snowplow.io", } self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_page_ping(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_page_ping(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 + ev_tstamp = 1399021242030 t.track_page_ping( "example.com", "Example", @@ -627,16 +536,14 @@ def test_track_page_ping(self, mok_complete_payload: Any) -> None: 2, 3, context=[ctx], - tstamp=evTstamp, + tstamp=ev_tstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] - actualPairs = actualPayloadArg.nv_pairs + actual_payload_arg = complete_args_dict["event"].payload + actual_pairs = actual_payload_arg.nv_pairs expectedPairs = { "e": "pp", @@ -648,39 +555,35 @@ def test_track_page_ping(self, mok_complete_payload: Any) -> None: "pp_miy": 2, "pp_may": 3, } - self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) + self.assertDictEqual(actual_pairs, expectedPairs) - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_ecommerce_transaction_item(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_item(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 + ev_tstamp = 1399021242030 t.track_ecommerce_transaction_item( - "1234", - "sku1234", - 3.14, - 1, - "itemName", - "itemCategory", - "itemCurrency", + order_id="1234", + sku="sku1234", + price=3.14, + quantity=1, + name="itemName", + category="itemCategory", + currency="itemCurrency", context=[ctx], - tstamp=evTstamp, + tstamp=ev_tstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + self.assertEqual(mok_track.call_count, 1) + complete_args_list = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_list), 1) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] - actualPairs = actualPayloadArg.nv_pairs + actual_payload_arg = complete_args_list["event"].payload + actual_pairs = actual_payload_arg.nv_pairs expectedPairs = { "e": "ti", @@ -692,18 +595,14 @@ def test_track_ecommerce_transaction_item(self, mok_complete_payload: Any) -> No "ti_qu": 1, "ti_cu": "itemCurrency", } - self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) + self.assertDictEqual(actual_pairs, expectedPairs) - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_ecommerce_transaction_no_items( - self, mok_complete_payload: Any - ) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_no_items(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) @@ -721,12 +620,11 @@ def test_track_ecommerce_transaction_no_items( context=[ctx], tstamp=evTstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] + self.assertEqual(mok_track.call_count, 1) + completeArgsList = mok_track.call_args_list[0][1] + self.assertEqual(len(completeArgsList), 1) + + actualPayloadArg = completeArgsList["event"].payload actualPairs = actualPayloadArg.nv_pairs expectedPairs = { @@ -742,18 +640,16 @@ def test_track_ecommerce_transaction_no_items( "tr_cu": "transCurrency", } self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) @mock.patch("snowplow_tracker.Tracker.track_ecommerce_transaction_item") - @mock.patch("snowplow_tracker.Tracker.complete_payload") + @mock.patch("snowplow_tracker.Tracker.track") def test_track_ecommerce_transaction_with_items( - self, mok_complete_payload: Any, mok_track_trans_item: Any + self, mok_track: Any, mok_track_trans_item: Any ) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track mok_track_trans_item.side_effect = mocked_track_trans_item t = Tracker("namespace", e) @@ -764,27 +660,26 @@ def test_track_ecommerce_transaction_with_items( {"sku": "sku5678", "quantity": 1, "price": 2.72}, ] t.track_ecommerce_transaction( - "1234", - 10, - "transAffiliation", - 2.5, - 1.5, - "transCity", - "transState", - "transCountry", - "transCurrency", + order_id="1234", + total_value=10, + affiliation="transAffiliation", + tax_value=2.5, + shipping=1.5, + city="transCity", + state="transState", + country="transCountry", + currency="transCurrency", items=transItems, context=[ctx], tstamp=evTstamp, ) # Transaction - callCompleteArgsList = mok_complete_payload.call_args_list - firstCallArgsList = callCompleteArgsList[0][0] - self.assertEqual(len(firstCallArgsList), 4) - actualPayloadArg = firstCallArgsList[0] - actualContextArg = firstCallArgsList[1] - actualTstampArg = firstCallArgsList[2] + callCompleteArgsList = mok_track.call_args_list + firstCallArgsList = callCompleteArgsList[0][1] + self.assertEqual(len(firstCallArgsList), 1) + + actualPayloadArg = firstCallArgsList["event"].payload actualPairs = actualPayloadArg.nv_pairs expectedTransPairs = { @@ -800,8 +695,6 @@ def test_track_ecommerce_transaction_with_items( "tr_cu": "transCurrency", } self.assertDictEqual(actualPairs, expectedTransPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) # Items calls_to_track_trans_item = mok_track_trans_item.call_count @@ -813,14 +706,16 @@ def test_track_ecommerce_transaction_with_items( firstItemCallKwargs = callTrackItemsArgsList[0][1] expectedFirstItemPairs = { - "tstamp": evTstamp, - "order_id": "1234", - "currency": "transCurrency", "sku": "sku1234", "quantity": 3, "price": 3.14, + "order_id": "1234", + "currency": "transCurrency", + "tstamp": evTstamp, "event_subject": None, + "context": [ctx], } + self.assertDictEqual(firstItemCallKwargs, expectedFirstItemPairs) # 2nd item secItemCallArgs = callTrackItemsArgsList[1][0] @@ -828,14 +723,16 @@ def test_track_ecommerce_transaction_with_items( secItemCallKwargs = callTrackItemsArgsList[1][1] expectedSecItemPairs = { - "tstamp": evTstamp, - "order_id": "1234", - "currency": "transCurrency", "sku": "sku5678", "quantity": 1, "price": 2.72, + "order_id": "1234", + "currency": "transCurrency", + "tstamp": evTstamp, "event_subject": None, + "context": [ctx], } + self.assertDictEqual(secItemCallKwargs, expectedSecItemPairs) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") @@ -870,11 +767,11 @@ def test_track_link_click(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["true_timestamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: @@ -894,11 +791,11 @@ def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["true_timestamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: @@ -934,11 +831,11 @@ def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["true_timestamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: @@ -956,11 +853,11 @@ def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: "data": {"sku": "sku1234", "quantity": 1}, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["true_timestamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: @@ -996,11 +893,11 @@ def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["true_timestamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart_optional_none( @@ -1020,11 +917,11 @@ def test_track_remove_from_cart_optional_none( "data": {"sku": "sku1234", "quantity": 1}, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["true_timestamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change(self, mok_track_unstruct: Any) -> None: @@ -1060,11 +957,11 @@ def test_track_form_change(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["true_timestamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: @@ -1086,11 +983,11 @@ def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["true_timestamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit(self, mok_track_unstruct: Any) -> None: @@ -1128,11 +1025,11 @@ def test_track_form_submit(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["true_timestamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_invalid_element_type( @@ -1203,11 +1100,11 @@ def test_track_form_submit_invalid_element_type_disabled_contracts( }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["true_timestamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: @@ -1221,11 +1118,11 @@ def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["true_timestamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: @@ -1239,9 +1136,9 @@ def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search(self, mok_track_unstruct: Any) -> None: @@ -1268,11 +1165,12 @@ def test_track_site_search(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["true_timestamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: @@ -1289,37 +1187,42 @@ def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: "data": {"terms": ["track", "search"]}, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["true_timestamp"] is None) - @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") - def test_track_mobile_screen_view(self, mok_track_unstruct: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_mobile_screen_view(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_track_unstruct.side_effect = mocked_track_unstruct + mok_track.side_effect = mocked_track t = Tracker("namespace", e) - ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 - t.track_mobile_screen_view( - "screenId", "screenName", context=[ctx], tstamp=evTstamp - ) + screen_view = ScreenView(name="screenName", id_="screenId") + actual_pairs = screen_view.build_payload( + encode_base64=False, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track(screen_view) + + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][0] + self.assertEqual(len(complete_args_dict), 1) + actual_ue_pr = json.loads(actual_pairs["ue_pr"]) expected = { "schema": MOBILE_SCREEN_VIEW_SCHEMA, - "data": {"name": "screenName", "id": "screenId"}, + "data": {"id": "screenId", "name": "screenName"}, } - callArgs = mok_track_unstruct.call_args_list[0][0] - self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + complete_args_dict = mok_track.call_args_list[0][1] + complete_args_dict = mok_track.call_args_list[0][1] + self.assertDictEqual(actual_ue_pr["data"], expected) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_screen_view(self, mok_track_unstruct: Any) -> None: @@ -1339,8 +1242,8 @@ def test_track_screen_view(self, mok_track_unstruct: Any) -> None: "data": {"name": "screenName", "id": "screenId"}, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["true_timestamp"], evTstamp) diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 68315085..f0e92fdf 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -20,9 +20,24 @@ from typing import Any, Optional, Union, List, Dict, Sequence from warnings import warn -from snowplow_tracker import payload, _version, SelfDescribingJson -from snowplow_tracker import subject as _subject +from snowplow_tracker import payload, SelfDescribingJson +from snowplow_tracker.subject import Subject from snowplow_tracker.contracts import non_empty_string, one_of, non_empty, form_element +from snowplow_tracker.constants import ( + VERSION, + DEFAULT_ENCODE_BASE64, + BASE_SCHEMA_PATH, + SCHEMA_TAG, +) + +from snowplow_tracker.events import ( + Event, + PagePing, + PageView, + SelfDescribing, + StructuredEvent, + ScreenView, +) from snowplow_tracker.typing import ( JsonEncoderFunction, EmitterProtocol, @@ -33,19 +48,6 @@ FormClasses, ) -""" -Constants & config -""" - -VERSION = "py-%s" % _version.__version__ -DEFAULT_ENCODE_BASE64 = True -BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" -MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" -SCHEMA_TAG = "jsonschema" -CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) -UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) -ContextArray = List[SelfDescribingJson] - """ Tracker class """ @@ -56,7 +58,7 @@ def __init__( self, namespace: str, emitters: Union[List[EmitterProtocol], EmitterProtocol], - subject: Optional[_subject.Subject] = None, + subject: Optional[Subject] = None, app_id: Optional[str] = None, encode_base64: bool = DEFAULT_ENCODE_BASE64, json_encoder: Optional[JsonEncoderFunction] = None, @@ -76,7 +78,7 @@ def __init__( :type json_encoder: function | None """ if subject is None: - subject = _subject.Subject() + subject = Subject() if type(emitters) is list: non_empty(emitters) @@ -122,62 +124,42 @@ def get_timestamp(tstamp: Optional[float] = None) -> int: Tracking methods """ - def track(self, pb: payload.Payload) -> Optional[str]: - """ - Send the payload to a emitter. Returns the tracked event ID. - - :param pb: Payload builder - :type pb: payload - :rtype: String - """ - for emitter in self.emitters: - emitter.input(pb.nv_pairs) - - if "eid" in pb.nv_pairs.keys(): - return pb.nv_pairs["eid"] - - def complete_payload( + def track( self, - pb: payload.Payload, - context: Optional[List[SelfDescribingJson]], - tstamp: Optional[float], - event_subject: Optional[_subject.Subject], + event: Event, ) -> Optional[str]: """ - Called by all tracking events to add the standard name-value pairs - to the Payload object irrespective of the tracked event. - - :param pb: Payload builder - :type pb: payload - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None + Send the event payload to a emitter. Returns the tracked event ID. + :param event: Event + :type event: events.Event :rtype: String """ - pb.add("eid", Tracker.get_uuid()) - pb.add("dtm", Tracker.get_timestamp()) - if tstamp is not None: - pb.add("ttm", Tracker.get_timestamp(tstamp)) + payload = self.complete_payload( + event=event, + ) - if context is not None: - context_jsons = list(map(lambda c: c.to_json(), context)) - context_envelope = SelfDescribingJson( - CONTEXT_SCHEMA, context_jsons - ).to_json() - pb.add_json( - context_envelope, self.encode_base64, "cx", "co", self.json_encoder - ) + for emitter in self.emitters: + emitter.input(payload.nv_pairs) - pb.add_dict(self.standard_nv_pairs) + if "eid" in payload.nv_pairs.keys(): + return payload.nv_pairs["eid"] - fin_subject = event_subject if event_subject is not None else self.subject - pb.add_dict(fin_subject.standard_nv_pairs) + def complete_payload( + self, + event: Event, + ) -> payload.Payload: + payload = event.build_payload( + encode_base64=self.encode_base64, + json_encoder=self.json_encoder, + subject=self.subject, + ) + + payload.add("eid", Tracker.get_uuid()) + payload.add("dtm", Tracker.get_timestamp()) + payload.add_dict(self.standard_nv_pairs) - return self.track(pb) + return payload def track_page_view( self, @@ -186,7 +168,7 @@ def track_page_view( referrer: Optional[str] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param page_url: URL of the viewed page @@ -203,15 +185,22 @@ def track_page_view( :type event_subject: subject | None :rtype: Tracker """ - non_empty_string(page_url) + warn( + "track_page_view will be removed in future versions. Please use the new PageView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) - pb = payload.Payload() - pb.add("e", "pv") # pv: page view - pb.add("url", page_url) - pb.add("page", page_title) - pb.add("refr", referrer) + pv = PageView( + page_url=page_url, + page_title=page_title, + referrer=referrer, + event_subject=event_subject, + context=context, + true_timestamp=tstamp, + ) - self.complete_payload(pb, context, tstamp, event_subject) + self.track(event=pv) return self def track_page_ping( @@ -225,7 +214,7 @@ def track_page_ping( max_y: Optional[int] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param page_url: URL of the viewed page @@ -250,19 +239,26 @@ def track_page_ping( :type event_subject: subject | None :rtype: Tracker """ - non_empty_string(page_url) + warn( + "track_page_ping will be removed in future versions. Please use the new PagePing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) - pb = payload.Payload() - pb.add("e", "pp") # pp: page ping - pb.add("url", page_url) - pb.add("page", page_title) - pb.add("refr", referrer) - pb.add("pp_mix", min_x) - pb.add("pp_max", max_x) - pb.add("pp_miy", min_y) - pb.add("pp_may", max_y) + pp = PagePing( + page_url=page_url, + page_title=page_title, + referrer=referrer, + min_x=min_x, + max_x=max_x, + min_y=min_y, + max_y=max_y, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) - self.complete_payload(pb, context, tstamp, event_subject) + self.track(event=pp) return self def track_link_click( @@ -274,27 +270,32 @@ def track_link_click( element_content: Optional[str] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ - :param target_url: Target URL of the link - :type target_url: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none + :param target_url: Target URL of the link + :type target_url: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none :param element_classes: Classes of the HTML element :type element_classes: list(str) | tuple(str,\\*) | None :param element_target: ID attribute of the HTML element :type element_target: string_or_none :param element_content: The content of the HTML element :type element_content: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: Tracker + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_link_click will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(target_url) properties = {} @@ -312,7 +313,12 @@ def track_link_click( "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) return self def track_add_to_cart( @@ -325,7 +331,7 @@ def track_add_to_cart( currency: Optional[str] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param sku: Item SKU or ID @@ -371,7 +377,12 @@ def track_add_to_cart( "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) return self def track_remove_from_cart( @@ -384,7 +395,7 @@ def track_remove_from_cart( currency: Optional[str] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param sku: Item SKU or ID @@ -430,7 +441,12 @@ def track_remove_from_cart( "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) return self def track_form_change( @@ -443,29 +459,35 @@ def track_form_change( element_classes: Optional[ElementClasses] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param node_name: Type of input element - :type node_name: form_node_name - :param value: Value of the input element - :type value: string_or_none - :param type_: Type of data the element represents - :type type_: non_empty_string, form_type + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param node_name: Type of input element + :type node_name: form_node_name + :param value: Value of the input element + :type value: string_or_none + :param type_: Type of data the element represents + :type type_: non_empty_string, form_type :param element_classes: Classes of the HTML element :type element_classes: list(str) | tuple(str,\\*) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: Tracker + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_form_change will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(form_id) one_of(node_name, FORM_NODE_NAMES) if type_ is not None: @@ -485,7 +507,12 @@ def track_form_change( "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) return self def track_form_submit( @@ -495,7 +522,7 @@ def track_form_submit( elements: Optional[List[Dict[str, Any]]] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param form_id: ID attribute of the HTML form @@ -512,7 +539,13 @@ def track_form_submit( :type event_subject: subject | None :rtype: Tracker """ + warn( + "track_form_submit will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(form_id) + for element in elements or []: form_element(element) @@ -527,7 +560,12 @@ def track_form_submit( "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) return self def track_site_search( @@ -538,7 +576,7 @@ def track_site_search( page_results: Optional[int] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param terms: Search terms @@ -557,6 +595,11 @@ def track_site_search( :type event_subject: subject | None :rtype: Tracker """ + warn( + "track_site_search will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) non_empty(terms) properties = {} @@ -572,7 +615,12 @@ def track_site_search( "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) return self def track_ecommerce_transaction_item( @@ -586,33 +634,33 @@ def track_ecommerce_transaction_item( currency: Optional[str] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ This is an internal method called by track_ecommerce_transaction. It is not for public use. - :param order_id: Order ID - :type order_id: non_empty_string - :param sku: Item SKU - :type sku: non_empty_string - :param price: Item price - :type price: int | float - :param quantity: Item quantity - :type quantity: int - :param name: Item name - :type name: string_or_none - :param category: Item category - :type category: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None + :param order_id: Order ID + :type order_id: non_empty_string + :param sku: Item SKU + :type sku: non_empty_string + :param price: Item price + :type price: int | float + :param quantity: Item quantity + :type quantity: int + :param name: Item name + :type name: string_or_none + :param category: Item category + :type category: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None :param event_subject: Optional per event subject :type event_subject: subject | None - :rtype: Tracker + :rtype: Tracker """ warn( "track_ecommerce_transaction_item will be deprecated in future versions.", @@ -622,17 +670,19 @@ def track_ecommerce_transaction_item( non_empty_string(order_id) non_empty_string(sku) - pb = payload.Payload() - pb.add("e", "ti") - pb.add("ti_id", order_id) - pb.add("ti_sk", sku) - pb.add("ti_nm", name) - pb.add("ti_ca", category) - pb.add("ti_pr", price) - pb.add("ti_qu", quantity) - pb.add("ti_cu", currency) - - self.complete_payload(pb, context, tstamp, event_subject) + event = Event( + event_subject=event_subject, context=context, true_timestamp=tstamp + ) + event.payload.add("e", "ti") + event.payload.add("ti_id", order_id) + event.payload.add("ti_sk", sku) + event.payload.add("ti_nm", name) + event.payload.add("ti_ca", category) + event.payload.add("ti_pr", price) + event.payload.add("ti_qu", quantity) + event.payload.add("ti_cu", currency) + + self.track(event=event) return self def track_ecommerce_transaction( @@ -649,7 +699,7 @@ def track_ecommerce_transaction( items: Optional[List[Dict[str, Any]]] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param order_id: ID of the eCommerce transaction @@ -687,29 +737,32 @@ def track_ecommerce_transaction( ) non_empty_string(order_id) - pb = payload.Payload() - pb.add("e", "tr") - pb.add("tr_id", order_id) - pb.add("tr_tt", total_value) - pb.add("tr_af", affiliation) - pb.add("tr_tx", tax_value) - pb.add("tr_sh", shipping) - pb.add("tr_ci", city) - pb.add("tr_st", state) - pb.add("tr_co", country) - pb.add("tr_cu", currency) + event = Event( + event_subject=event_subject, context=context, true_timestamp=tstamp + ) + event.payload.add("e", "tr") + event.payload.add("tr_id", order_id) + event.payload.add("tr_tt", total_value) + event.payload.add("tr_af", affiliation) + event.payload.add("tr_tx", tax_value) + event.payload.add("tr_sh", shipping) + event.payload.add("tr_ci", city) + event.payload.add("tr_st", state) + event.payload.add("tr_co", country) + event.payload.add("tr_cu", currency) tstamp = Tracker.get_timestamp(tstamp) - self.complete_payload(pb, context, tstamp, event_subject) + self.track(event=event) if items is None: items = [] for item in items: - item["tstamp"] = tstamp - item["event_subject"] = event_subject item["order_id"] = order_id item["currency"] = currency + item["tstamp"] = tstamp + item["event_subject"] = event_subject + item["context"] = context self.track_ecommerce_transaction_item(**item) return self @@ -720,7 +773,7 @@ def track_screen_view( id_: Optional[str] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param name: The name of the screen view event @@ -736,7 +789,7 @@ def track_screen_view( :rtype: Tracker """ warn( - "track_screen_view will be deprecated in future versions. Please use track_mobile_screen_view.", + "track_screen_view will be removed in future versions. Please use the new ScreenView class to track the event.", DeprecationWarning, stacklevel=2, ) @@ -751,13 +804,18 @@ def track_screen_view( screen_view_properties, ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) return self def track_mobile_screen_view( self, + name: str, id_: Optional[str] = None, - name: Optional[str] = None, type: Optional[str] = None, previous_name: Optional[str] = None, previous_id: Optional[str] = None, @@ -765,13 +823,13 @@ def track_mobile_screen_view( transition_type: Optional[str] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ - :param id_: Screen view ID. This must be of type UUID. - :type id_: string | None :param name: The name of the screen view event :type name: string_or_none + :param id_: Screen view ID. This must be of type UUID. + :type id_: string | None :param type: The type of screen that was viewed e.g feed / carousel. :type type: string | None :param previous_name: The name of the previous screen. @@ -790,31 +848,28 @@ def track_mobile_screen_view( :type event_subject: subject | None :rtype: Tracker """ - screen_view_properties = {} - + warn( + "track_mobile_screen_view will be removed in future versions. Please use the new ScreenView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) if id_ is None: id_ = self.get_uuid() - screen_view_properties["id"] = id_ - - if name is not None: - screen_view_properties["name"] = name - if type is not None: - screen_view_properties["type"] = type - if previous_name is not None: - screen_view_properties["previousName"] = previous_name - if previous_id is not None: - screen_view_properties["previousId"] = previous_id - if previous_type is not None: - screen_view_properties["previousType"] = previous_type - if transition_type is not None: - screen_view_properties["transitionType"] = transition_type - - event_json = SelfDescribingJson( - "%s/screen_view/%s/1-0-0" % (MOBILE_SCHEMA_PATH, SCHEMA_TAG), - screen_view_properties, + sv = ScreenView( + name=name, + id_=id_, + type=type, + previous_name=previous_name, + previous_id=previous_id, + previous_type=previous_type, + transition_type=transition_type, + event_subject=event_subject, + context=context, + true_timestamp=tstamp, ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + + self.track(event=sv) return self def track_struct_event( @@ -826,7 +881,7 @@ def track_struct_event( value: Optional[float] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param category: Category of the event @@ -849,18 +904,25 @@ def track_struct_event( :type event_subject: subject | None :rtype: Tracker """ - non_empty_string(category) - non_empty_string(action) - - pb = payload.Payload() - pb.add("e", "se") - pb.add("se_ca", category) - pb.add("se_ac", action) - pb.add("se_la", label) - pb.add("se_pr", property_) - pb.add("se_va", value) + warn( + "track_struct_event will be removed in future versions. Please use the new Structured class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + se = StructuredEvent( + category=category, + action=action, + label=label, + property_=property_, + value=value, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) - self.complete_payload(pb, context, tstamp, event_subject) + self.track( + event=se, + ) return self def track_self_describing_event( @@ -868,7 +930,7 @@ def track_self_describing_event( event_json: SelfDescribingJson, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param event_json: The properties of the event. Has two field: @@ -883,17 +945,21 @@ def track_self_describing_event( :type event_subject: subject | None :rtype: Tracker """ + warn( + "track_self_describing_event will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) - envelope = SelfDescribingJson( - UNSTRUCT_EVENT_SCHEMA, event_json.to_json() - ).to_json() - - pb = payload.Payload() - - pb.add("e", "ue") - pb.add_json(envelope, self.encode_base64, "ue_px", "ue_pr", self.json_encoder) - - self.complete_payload(pb, context, tstamp, event_subject) + sd = SelfDescribing( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + self.track( + event=sd, + ) return self # Alias @@ -902,7 +968,7 @@ def track_unstruct_event( event_json: SelfDescribingJson, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None, + event_subject: Optional[Subject] = None, ) -> "Tracker": """ :param event_json: The properties of the event. Has two field: @@ -922,7 +988,13 @@ def track_unstruct_event( DeprecationWarning, stacklevel=2, ) - self.track_self_describing_event(event_json, context, tstamp, event_subject) + + self.track_self_describing_event( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) return self def flush(self, is_async: bool = False) -> "Tracker": @@ -942,7 +1014,7 @@ def flush(self, is_async: bool = False) -> "Tracker": emitter.sync_flush() return self - def set_subject(self, subject: Optional[_subject.Subject]) -> "Tracker": + def set_subject(self, subject: Optional[Subject]) -> "Tracker": """ Set the subject of the events fired by the tracker From 2b7a3dd07c89bd671ab4a8afc8021a105d4c49bd Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Wed, 14 Jun 2023 10:43:11 +0100 Subject: [PATCH 31/51] Update payload builder to combine event subjects (close #347) PR #348 * Add combine_subject function to Subject * Combine subjects in payload builder --- examples/tracker_api_example/app.py | 5 +++- snowplow_tracker/events/event.py | 9 ++++--- snowplow_tracker/subject.py | 17 +++++++++++-- snowplow_tracker/test/unit/test_subject.py | 28 ++++++++++++++++++++++ 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/examples/tracker_api_example/app.py b/examples/tracker_api_example/app.py index 3777a59c..41f520ce 100644 --- a/examples/tracker_api_example/app.py +++ b/examples/tracker_api_example/app.py @@ -31,10 +31,13 @@ def main(): print("Sending events to " + e.endpoint) + event_subject = Subject() + event_subject.set_color_depth(10) + page_view = PageView( page_url="https://www.snowplow.io", page_title="Homepage", - event_subject=t.subject, + event_subject=event_subject, ) t.track(page_view) diff --git a/snowplow_tracker/events/event.py b/snowplow_tracker/events/event.py index 7f510ba3..c9d9b82e 100644 --- a/snowplow_tracker/events/event.py +++ b/snowplow_tracker/events/event.py @@ -94,10 +94,13 @@ def build_payload( ): self.payload.add("ttm", int(self.true_timestamp)) - fin_subject = self.event_subject if self.event_subject is not None else subject + if self.event_subject is not None: + fin_payload_dict = self.event_subject.combine_subject(subject) + else: + fin_payload_dict = None if subject is None else subject.standard_nv_pairs - if fin_subject is not None: - self.payload.add_dict(fin_subject.standard_nv_pairs) + if fin_payload_dict is not None: + self.payload.add_dict(fin_payload_dict) return self.payload @property diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index 10bcbe2a..c3165d34 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -15,8 +15,9 @@ # language governing permissions and limitations there under. # """ +from typing import Optional from snowplow_tracker.contracts import one_of, greater_than -from snowplow_tracker.typing import SupportedPlatform, SUPPORTED_PLATFORMS +from snowplow_tracker.typing import SupportedPlatform, SUPPORTED_PLATFORMS, PayloadDict DEFAULT_PLATFORM = "pc" @@ -29,7 +30,6 @@ class Subject(object): """ def __init__(self) -> None: - self.standard_nv_pairs = {"p": DEFAULT_PLATFORM} def set_platform(self, value: SupportedPlatform) -> "Subject": @@ -173,3 +173,16 @@ def set_network_user_id(self, nuid: str) -> "Subject": """ self.standard_nv_pairs["tnuid"] = nuid return self + + def combine_subject(self, subject: Optional["Subject"]) -> PayloadDict: + """ + Merges another instance of Subject, with self taking priority + :param subject Subject to update + :type subject subject + :rtype PayloadDict + + """ + if subject is not None: + return {**subject.standard_nv_pairs, **self.standard_nv_pairs} + + return self.standard_nv_pairs diff --git a/snowplow_tracker/test/unit/test_subject.py b/snowplow_tracker/test/unit/test_subject.py index 7645781a..953a0a74 100644 --- a/snowplow_tracker/test/unit/test_subject.py +++ b/snowplow_tracker/test/unit/test_subject.py @@ -86,3 +86,31 @@ def test_subject_1(self) -> None: s.standard_nv_pairs["vid"] with pytest.raises(KeyError): s.standard_nv_pairs["tnuid"] + + def test_combine_subject(self) -> None: + s = _subject.Subject() + s.set_color_depth(10) + s.set_domain_session_id("domain_session_id") + + s2 = _subject.Subject() + s2.set_domain_user_id("domain_user_id") + s2.set_lang("en") + + fin_payload_dict = s.combine_subject(s2) + + expected_fin_payload_dict = { + "p": "pc", + "cd": 10, + "sid": "domain_session_id", + "duid": "domain_user_id", + "lang": "en", + } + + expected_subject = { + "p": "pc", + "cd": 10, + "sid": "domain_session_id", + } + + self.assertDictEqual(fin_payload_dict, expected_fin_payload_dict) + self.assertDictEqual(s.standard_nv_pairs, expected_subject) From d4fd76b3f1c6dbde2e37f1db6508d1d7a3bb151d Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Fri, 16 Jun 2023 14:18:43 +0100 Subject: [PATCH 32/51] Prepare for 1.0.0 release --- CHANGES.txt | 9 +++++++++ docs/source/conf.py | 2 +- setup.py | 11 +++++++++-- snowplow_tracker/_version.py | 2 +- snowplow_tracker/test/integration/test_integration.py | 10 +++++----- 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index f4ab14eb..8b8f7b6a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,12 @@ +Version 1.0.0 (2023-06-16) +-------------------------- +Remove Redis and Celery Emitters (#335) +Make tracker namespace mandatory (#337) +Track function to return event_id (#338) +Fix namespace assignment in Snowplow API (#341) +Refactor track_xxx() methods (#343) +Update payload builder to combine event subjects (#347) + Version 0.15.0 (2023-04-19) --------------------------- Use Requests Session for sending eventss (#221) diff --git a/docs/source/conf.py b/docs/source/conf.py index d2b51972..187e0da3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "0.15" +release = "1.0.0" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index b67c8f3d..253b231c 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,14 @@ except ImportError: from distutils.core import setup -authors_list = ["Anuj More", "Alexander Dean", "Fred Blundun", "Paul Boocock"] +authors_list = [ + "Anuj More", + "Alexander Dean", + "Fred Blundun", + "Paul Boocock", + "Matus Tomlein", + "Jack Keene", +] authors_str = ", ".join(authors_list) authors_email_list = [ @@ -33,7 +40,7 @@ setup( name="snowplow-tracker", - version="0.15.0", + version="1.0.0", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 69a6ff65..60a0bd19 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (0, 15, 0) +__version_info__ = (1, 0, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index a95e10a8..57b1a58c 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -513,14 +513,14 @@ def test_timestamps(self) -> None: def test_bytelimit(self) -> None: default_emitter = emitters.Emitter( - "localhost", protocol="http", port=80, batch_size=5, byte_limit=483 + "localhost", protocol="http", port=80, batch_size=5, byte_limit=459 ) t = tracker.Tracker("namespace", default_emitter, default_subject) with HTTMock(pass_post_response_content): - t.track_struct_event("Test", "A") # 161 bytes - t.track_struct_event("Test", "A") # 322 bytes - t.track_struct_event("Test", "A") # 483 bytes. Send - t.track_struct_event("Test", "AA") # 162 + t.track_struct_event("Test", "A") # 153 bytes + t.track_struct_event("Test", "A") # 306 bytes + t.track_struct_event("Test", "A") # 459 bytes. Send + t.track_struct_event("Test", "AA") # 154 self.assertEqual(len(querystrings[-1]["data"]), 3) self.assertEqual(default_emitter.bytes_queued, 156 + len(_version.__version__)) From d236424297412501e1ed69c95701b247992cafe2 Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Wed, 12 Jul 2023 12:31:29 +0100 Subject: [PATCH 33/51] Fix tstamp parameter in track_self_describing_event (close #350) --- snowplow_tracker/test/unit/test_tracker.py | 28 +++++++++++----------- snowplow_tracker/tracker.py | 16 ++++++------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 8eaf9894..3009790a 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -771,7 +771,7 @@ def test_track_link_click(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertIs(callArgs["context"][0], ctx) - self.assertEqual(callArgs["true_timestamp"], evTstamp) + self.assertEqual(callArgs["tstamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: @@ -795,7 +795,7 @@ def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertTrue(callArgs["context"] is None) - self.assertTrue(callArgs["true_timestamp"] is None) + self.assertTrue(callArgs["tstamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: @@ -835,7 +835,7 @@ def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertIs(callArgs["context"][0], ctx) - self.assertEqual(callArgs["true_timestamp"], evTstamp) + self.assertEqual(callArgs["tstamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: @@ -857,7 +857,7 @@ def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertTrue(callArgs["context"] is None) - self.assertTrue(callArgs["true_timestamp"] is None) + self.assertTrue(callArgs["tstamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: @@ -897,7 +897,7 @@ def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertIs(callArgs["context"][0], ctx) - self.assertEqual(callArgs["true_timestamp"], evTstamp) + self.assertEqual(callArgs["tstamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart_optional_none( @@ -921,7 +921,7 @@ def test_track_remove_from_cart_optional_none( self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertTrue(callArgs["context"] is None) - self.assertTrue(callArgs["true_timestamp"] is None) + self.assertTrue(callArgs["tstamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change(self, mok_track_unstruct: Any) -> None: @@ -961,7 +961,7 @@ def test_track_form_change(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertIs(callArgs["context"][0], ctx) - self.assertEqual(callArgs["true_timestamp"], evTstamp) + self.assertEqual(callArgs["tstamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: @@ -987,7 +987,7 @@ def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertTrue(callArgs["context"] is None) - self.assertTrue(callArgs["true_timestamp"] is None) + self.assertTrue(callArgs["tstamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit(self, mok_track_unstruct: Any) -> None: @@ -1029,7 +1029,7 @@ def test_track_form_submit(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertIs(callArgs["context"][0], ctx) - self.assertEqual(callArgs["true_timestamp"], evTstamp) + self.assertEqual(callArgs["tstamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_invalid_element_type( @@ -1104,7 +1104,7 @@ def test_track_form_submit_invalid_element_type_disabled_contracts( self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertIs(callArgs["context"][0], ctx) - self.assertEqual(callArgs["true_timestamp"], evTstamp) + self.assertEqual(callArgs["tstamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: @@ -1122,7 +1122,7 @@ def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertTrue(callArgs["context"] is None) - self.assertTrue(callArgs["true_timestamp"] is None) + self.assertTrue(callArgs["tstamp"] is None) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: @@ -1170,7 +1170,7 @@ def test_track_site_search(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertIs(callArgs["context"][0], ctx) - self.assertEqual(callArgs["true_timestamp"], evTstamp) + self.assertEqual(callArgs["tstamp"], evTstamp) @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: @@ -1191,7 +1191,7 @@ def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertTrue(callArgs["context"] is None) - self.assertTrue(callArgs["true_timestamp"] is None) + self.assertTrue(callArgs["tstamp"] is None) @mock.patch("snowplow_tracker.Tracker.track") def test_track_mobile_screen_view(self, mok_track: Any) -> None: @@ -1246,4 +1246,4 @@ def test_track_screen_view(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs["event_json"].to_json(), expected) self.assertIs(callArgs["context"][0], ctx) - self.assertEqual(callArgs["true_timestamp"], evTstamp) + self.assertEqual(callArgs["tstamp"], evTstamp) diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index f0e92fdf..2effe835 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -316,7 +316,7 @@ def track_link_click( self.track_self_describing_event( event_json=event_json, context=context, - true_timestamp=tstamp, + tstamp=tstamp, event_subject=event_subject, ) return self @@ -380,7 +380,7 @@ def track_add_to_cart( self.track_self_describing_event( event_json=event_json, context=context, - true_timestamp=tstamp, + tstamp=tstamp, event_subject=event_subject, ) return self @@ -444,7 +444,7 @@ def track_remove_from_cart( self.track_self_describing_event( event_json=event_json, context=context, - true_timestamp=tstamp, + tstamp=tstamp, event_subject=event_subject, ) return self @@ -510,7 +510,7 @@ def track_form_change( self.track_self_describing_event( event_json=event_json, context=context, - true_timestamp=tstamp, + tstamp=tstamp, event_subject=event_subject, ) return self @@ -563,7 +563,7 @@ def track_form_submit( self.track_self_describing_event( event_json=event_json, context=context, - true_timestamp=tstamp, + tstamp=tstamp, event_subject=event_subject, ) return self @@ -618,7 +618,7 @@ def track_site_search( self.track_self_describing_event( event_json=event_json, context=context, - true_timestamp=tstamp, + tstamp=tstamp, event_subject=event_subject, ) return self @@ -807,7 +807,7 @@ def track_screen_view( self.track_self_describing_event( event_json=event_json, context=context, - true_timestamp=tstamp, + tstamp=tstamp, event_subject=event_subject, ) return self @@ -992,7 +992,7 @@ def track_unstruct_event( self.track_self_describing_event( event_json=event_json, context=context, - true_timestamp=tstamp, + tstamp=tstamp, event_subject=event_subject, ) return self From fdc2abc778bc6f776e4ff9e43fda41bf15e55778 Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Wed, 12 Jul 2023 14:44:17 +0100 Subject: [PATCH 34/51] Prepare for 1.0.1 release --- CHANGES.txt | 4 ++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 8b8f7b6a..76b9bc20 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,7 @@ +Version 1.0.1 (2023-07-12) +-------------------------- +Fix tstamp parameter in track_self_describing_event (#350) (Thanks to @andehen) + Version 1.0.0 (2023-06-16) -------------------------- Remove Redis and Celery Emitters (#335) diff --git a/docs/source/conf.py b/docs/source/conf.py index 187e0da3..8594b737 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "1.0.0" +release = "1.0.1" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 253b231c..3b124a32 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( name="snowplow-tracker", - version="1.0.0", + version="1.0.1", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 60a0bd19..2289a3a8 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (1, 0, 0) +__version_info__ = (1, 0, 1) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From db531d3a10510d971662d4431eb49cbbcbe7c7b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Mon, 26 Feb 2024 03:53:21 -0600 Subject: [PATCH 35/51] Add Python 3.12 to CI tests (close #356) --- .github/workflows/ci.yml | 2 +- Dockerfile | 2 +- run-tests.sh | 13 +++++++++++++ setup.py | 1 + 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5062c1ca..85929119 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9, "3.10", "3.11"] + python-version: [3.6, 3.7, 3.8, 3.9, "3.10", "3.11", "3.12"] steps: - name: Checkout diff --git a/Dockerfile b/Dockerfile index c48e4a8a..ba2948f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH RUN git clone --depth=1 https://github.com/pyenv/pyenv.git $PYENV_ROOT RUN git clone --depth=1 https://github.com/pyenv/pyenv-virtualenv.git $PYENV_ROOT/plugins/pyenv-virtualenv -RUN pyenv install 3.5.10 && pyenv install 3.6.14 && pyenv install 3.7.11 && pyenv install 3.8.11 && pyenv install 3.9.6 && pyenv install 3.10.1 && pyenv install 3.11.0 +RUN pyenv install 3.5.10 && pyenv install 3.6.14 && pyenv install 3.7.11 && pyenv install 3.8.11 && pyenv install 3.9.6 && pyenv install 3.10.1 && pyenv install 3.11.0 && pyenv install 3.12.1 WORKDIR /app COPY . . diff --git a/run-tests.sh b/run-tests.sh index 8a6489a0..23b13a73 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -78,6 +78,14 @@ function deploy { source deactivate fi + # pyenv install 3.12.0 + if [ ! -e ~/.pyenv/versions/tracker312 ]; then + pyenv virtualenv 3.12.0 tracker312 + pyenv activate tracker312 + pip install . + pip install -r requirements-test.txt + source deactivate + fi } @@ -109,6 +117,10 @@ function run_tests { pyenv activate tracker311 pytest source deactivate + + pyenv activate tracker312 + pytest + source deactivate } function refresh_deploy { @@ -119,6 +131,7 @@ function refresh_deploy { pyenv uninstall -f tracker39 pyenv uninstall -f tracker310 pyenv uninstall -f tracker311 + pyenv uninstall -f tracker312 } diff --git a/setup.py b/setup.py index 3b124a32..02efed94 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", ], install_requires=["requests>=2.25.1,<3.0", "typing_extensions>=3.7.4"], From 70712924c80603b35676e0f3c9b35201bbbc0d79 Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Mon, 26 Feb 2024 11:40:27 +0000 Subject: [PATCH 36/51] Prepare for 1.0.2 release --- CHANGES.txt | 4 ++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 76b9bc20..4a5c0c29 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,7 @@ +Version 1.0.2 (2024-02-26) +-------------------------- +Add Python 3.12 to CI tests (#356) (Thanks to @edgarrmondragon) + Version 1.0.1 (2023-07-12) -------------------------- Fix tstamp parameter in track_self_describing_event (#350) (Thanks to @andehen) diff --git a/docs/source/conf.py b/docs/source/conf.py index 8594b737..e2762fe0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "1.0.1" +release = "1.0.2" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 02efed94..d0ef7f04 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( name="snowplow-tracker", - version="1.0.1", + version="1.0.2", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 2289a3a8..38be1911 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (1, 0, 1) +__version_info__ = (1, 0, 2) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From 6d8d588c6e2a270fefd290541331f866a9f8b4cd Mon Sep 17 00:00:00 2001 From: Greg Leonard <45019882+greg-el@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:39:33 +0100 Subject: [PATCH 37/51] Fix `PagePing`, `PageView`, and `StructuredEvent` property getters (close #361) --- snowplow_tracker/events/page_ping.py | 14 +++---- snowplow_tracker/events/page_view.py | 6 +-- snowplow_tracker/events/structured_event.py | 10 ++--- snowplow_tracker/test/unit/test_page_ping.py | 38 +++++++++++++++++++ snowplow_tracker/test/unit/test_page_view.py | 27 +++++++++++++ .../test/unit/test_structured_event.py | 24 ++++++++++++ 6 files changed, 104 insertions(+), 15 deletions(-) create mode 100644 snowplow_tracker/test/unit/test_page_ping.py create mode 100644 snowplow_tracker/test/unit/test_page_view.py create mode 100644 snowplow_tracker/test/unit/test_structured_event.py diff --git a/snowplow_tracker/events/page_ping.py b/snowplow_tracker/events/page_ping.py index b0084797..43bbb210 100644 --- a/snowplow_tracker/events/page_ping.py +++ b/snowplow_tracker/events/page_ping.py @@ -81,7 +81,7 @@ def page_url(self) -> str: """ URL of the viewed page """ - return self.payload.get("url") + return self.payload.nv_pairs["url"] @page_url.setter def page_url(self, value: str): @@ -93,7 +93,7 @@ def page_title(self) -> Optional[str]: """ URL of the viewed page """ - return self.payload.get("page") + return self.payload.nv_pairs.get("page") @page_title.setter def page_title(self, value: Optional[str]): @@ -104,7 +104,7 @@ def referrer(self) -> Optional[str]: """ The referrer of the page """ - return self.payload.get("refr") + return self.payload.nv_pairs.get("refr") @referrer.setter def referrer(self, value: Optional[str]): @@ -115,7 +115,7 @@ def min_x(self) -> Optional[int]: """ Minimum page x offset seen in the last ping period """ - return self.payload.get("pp_mix") + return self.payload.nv_pairs.get("pp_mix") @min_x.setter def min_x(self, value: Optional[int]): @@ -126,7 +126,7 @@ def max_x(self) -> Optional[int]: """ Maximum page x offset seen in the last ping period """ - return self.payload.get("pp_max") + return self.payload.nv_pairs.get("pp_max") @max_x.setter def max_x(self, value: Optional[int]): @@ -137,7 +137,7 @@ def min_y(self) -> Optional[int]: """ Minimum page y offset seen in the last ping period """ - return self.payload.get("pp_miy") + return self.payload.nv_pairs.get("pp_miy") @min_y.setter def min_y(self, value: Optional[int]): @@ -148,7 +148,7 @@ def max_y(self) -> Optional[int]: """ Maximum page y offset seen in the last ping period """ - return self.payload.get("pp_may") + return self.payload.nv_pairs.get("pp_may") @max_y.setter def max_y(self, value: Optional[int]): diff --git a/snowplow_tracker/events/page_view.py b/snowplow_tracker/events/page_view.py index dc9ca51f..53e44bb6 100644 --- a/snowplow_tracker/events/page_view.py +++ b/snowplow_tracker/events/page_view.py @@ -65,7 +65,7 @@ def page_url(self) -> str: """ URL of the viewed page """ - return self.payload.get("url") + return self.payload.nv_pairs["url"] @page_url.setter def page_url(self, value: str): @@ -77,7 +77,7 @@ def page_title(self) -> Optional[str]: """ Title of the viewed page """ - return self.payload.get("page") + return self.payload.nv_pairs.get("page") @page_title.setter def page_title(self, value: Optional[str]): @@ -88,7 +88,7 @@ def referrer(self) -> Optional[str]: """ The referrer of the page """ - return self.payload.get("refr") + return self.payload.nv_pairs.get("refr") @referrer.setter def referrer(self, value: Optional[str]): diff --git a/snowplow_tracker/events/structured_event.py b/snowplow_tracker/events/structured_event.py index a92acc0f..00658e9f 100644 --- a/snowplow_tracker/events/structured_event.py +++ b/snowplow_tracker/events/structured_event.py @@ -81,7 +81,7 @@ def category(self) -> Optional[str]: """ Category of the event """ - return self.payload.get("se_ca") + return self.payload.nv_pairs.get("se_ca") @category.setter def category(self, value: Optional[str]): @@ -93,7 +93,7 @@ def action(self) -> Optional[str]: """ The event itself """ - return self.payload.get("se_ac") + return self.payload.nv_pairs.get("se_ac") @action.setter def action(self, value: Optional[str]): @@ -105,7 +105,7 @@ def label(self) -> Optional[str]: """ Refer to the object the action is performed on """ - return self.payload.get("se_la") + return self.payload.nv_pairs.get("se_la") @label.setter def label(self, value: Optional[str]): @@ -116,7 +116,7 @@ def property_(self) -> Optional[str]: """ Property associated with either the action or the object """ - return self.payload.get("se_pr") + return self.payload.nv_pairs.get("se_pr") @property_.setter def property_(self, value: Optional[str]): @@ -127,7 +127,7 @@ def value(self) -> Optional[int]: """ A value associated with the user action """ - return self.payload.get("se_va") + return self.payload.nv_pairs.get("se_va") @value.setter def value(self, value: Optional[int]): diff --git a/snowplow_tracker/test/unit/test_page_ping.py b/snowplow_tracker/test/unit/test_page_ping.py new file mode 100644 index 00000000..7539ce43 --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_ping.py @@ -0,0 +1,38 @@ +import pytest + +from snowplow_tracker.events.page_ping import PagePing + + +class TestPagePing: + def test_getters(self): + pp = PagePing("url", "title", "referrer", 1, 2, 3, 4) + assert pp.page_url == "url" + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + + def test_setters(self): + pp = PagePing("url") + pp.page_title = "title" + pp.referrer = "referrer" + pp.min_x = 1 + pp.max_x = 2 + pp.min_y = 3 + pp.max_y = 4 + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + assert pp.page_url == "url" + + def test_page_url_non_empty_string(self): + pp = PagePing("url") + pp.page_url = "new_url" + assert pp.page_url == "new_url" + with pytest.raises(ValueError): + pp.page_url = "" diff --git a/snowplow_tracker/test/unit/test_page_view.py b/snowplow_tracker/test/unit/test_page_view.py new file mode 100644 index 00000000..3736710c --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_view.py @@ -0,0 +1,27 @@ +import pytest + +from snowplow_tracker.events.page_view import PageView + + +class TestPageView: + def test_getters(self): + pv = PageView("url", "title", "referrer") + assert pv.page_url == "url" + assert pv.page_title == "title" + assert pv.referrer == "referrer" + + def test_setters(self): + pv = PageView("url", "title", "referrer") + pv.page_url = "new_url" + pv.page_title = "new_title" + pv.referrer = "new_referrer" + assert pv.page_url == "new_url" + assert pv.page_title == "new_title" + assert pv.referrer == "new_referrer" + + def test_page_url_non_empty_string(self): + pv = PageView("url") + pv.page_url = "new_url" + assert pv.page_url == "new_url" + with pytest.raises(ValueError): + pv.page_url = "" diff --git a/snowplow_tracker/test/unit/test_structured_event.py b/snowplow_tracker/test/unit/test_structured_event.py new file mode 100644 index 00000000..fdf00014 --- /dev/null +++ b/snowplow_tracker/test/unit/test_structured_event.py @@ -0,0 +1,24 @@ +from snowplow_tracker.events.structured_event import StructuredEvent + + +class TestStructuredEvent: + def test_getters(self): + se = StructuredEvent("category", "action", "label", "property", 1) + assert se.category == "category" + assert se.action == "action" + assert se.label == "label" + assert se.property_ == "property" + assert se.value == 1 + + def test_setters(self): + se = StructuredEvent("category", "action") + se.category = "new_category" + se.action = "new_action" + se.label = "new_label" + se.property_ = "new_property" + se.value = 2 + assert se.category == "new_category" + assert se.action == "new_action" + assert se.label == "new_label" + assert se.property_ == "new_property" + assert se.value == 2 From f58d845969ec9daf368f7b3dab1732fa764c0818 Mon Sep 17 00:00:00 2001 From: Greg Leonard <45019882+greg-el@users.noreply.github.com> Date: Wed, 21 Aug 2024 12:52:33 +0100 Subject: [PATCH 38/51] Update typing --- .github/workflows/ci.yml | 5 +++ setup.py | 6 +++- snowplow_tracker/constants.py | 2 +- snowplow_tracker/contracts.py | 2 +- snowplow_tracker/emitters.py | 35 ++++++++++++++++----- snowplow_tracker/event_store.py | 11 ++++--- snowplow_tracker/events/event.py | 5 ++- snowplow_tracker/events/screen_view.py | 4 +-- snowplow_tracker/events/structured_event.py | 12 +++---- snowplow_tracker/payload.py | 5 ++- snowplow_tracker/snowplow.py | 6 ++-- snowplow_tracker/subject.py | 4 +-- snowplow_tracker/tracker.py | 31 ++++++++++++------ snowplow_tracker/tracker_configuration.py | 8 ++--- snowplow_tracker/typing.py | 9 ++++-- 15 files changed, 94 insertions(+), 51 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85929119..a285a2d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,6 +40,11 @@ jobs: - name: Tests run: | pytest --cov=snowplow_tracker --cov-report=xml + + - name: MyPy + run: | + python -m pip install mypy + mypy snowplow_tracker --exclude '/test' - name: Demo run: | diff --git a/setup.py b/setup.py index d0ef7f04..e1b2aa33 100644 --- a/setup.py +++ b/setup.py @@ -65,5 +65,9 @@ "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", ], - install_requires=["requests>=2.25.1,<3.0", "typing_extensions>=3.7.4"], + install_requires=[ + "requests>=2.25.1,<3.0", + "types-requests>=2.25.1,<3.0", + "typing_extensions>=3.7.4", + ], ) diff --git a/snowplow_tracker/constants.py b/snowplow_tracker/constants.py index 579ff86e..53ecc151 100644 --- a/snowplow_tracker/constants.py +++ b/snowplow_tracker/constants.py @@ -18,7 +18,7 @@ from snowplow_tracker import _version, SelfDescribingJson VERSION = "py-%s" % _version.__version__ -DEFAULT_ENCODE_BASE64 = True +DEFAULT_ENCODE_BASE64: bool = True # Type hint required for Python 3.6 MyPy check BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" SCHEMA_TAG = "jsonschema" diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py index c54ac668..3b17e1a3 100644 --- a/snowplow_tracker/contracts.py +++ b/snowplow_tracker/contracts.py @@ -77,7 +77,7 @@ def _get_parameter_name() -> str: match = _MATCH_FIRST_PARAMETER_REGEX.search(code) if not match: return "Unnamed parameter" - return match.groups(0)[0] + return str(match.groups(0)[0]) def _check_form_element(element: Dict[str, Any]) -> bool: diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index af233566..6a138f0f 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -20,7 +20,7 @@ import threading import requests import random -from typing import Optional, Union, Tuple, Dict +from typing import Optional, Union, Tuple, Dict, cast, Callable from queue import Queue from snowplow_tracker.self_describing_json import SelfDescribingJson @@ -31,6 +31,7 @@ Method, SuccessCallback, FailureCallback, + EmitterProtocol, ) from snowplow_tracker.contracts import one_of from snowplow_tracker.event_store import EventStore, InMemoryEventStore @@ -48,7 +49,20 @@ METHODS = {"get", "post"} -class Emitter(object): +# Unifes the two request methods under one interface +class Requester: + post: Callable + get: Callable + + def __init__(self, post: Callable, get: Callable): + # 3.6 MyPy compatibility: + # error: Cannot assign to a method + # https://github.com/python/mypy/issues/2427 + setattr(self, "post", post) + setattr(self, "get", get) + + +class Emitter(EmitterProtocol): """ Synchronously send Snowplow events to a Snowplow collector Supports both GET and POST requests @@ -151,12 +165,15 @@ def __init__( self.retry_timer = FlushTimer(emitter=self, repeating=False) self.max_retry_delay_seconds = max_retry_delay_seconds - self.retry_delay = 0 + self.retry_delay: Union[int, float] = 0 self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) - self.request_method = requests if session is None else session + if session is None: + self.request_method = Requester(post=requests.post, get=requests.get) + else: + self.request_method = Requester(post=session.post, get=session.get) @staticmethod def as_collector_uri( @@ -183,7 +200,7 @@ def as_collector_uri( if endpoint.split("://")[0] in PROTOCOLS: endpoint_arr = endpoint.split("://") - protocol = endpoint_arr[0] + protocol = cast(HttpProtocol, endpoint_arr[0]) endpoint = endpoint_arr[1] if method == "get": @@ -427,6 +444,10 @@ def _cancel_retry_timer(self) -> None: """ self.retry_timer.cancel() + # This is only here to satisfy the `EmitterProtocol` interface + def async_flush(self) -> None: + return + class AsyncEmitter(Emitter): """ @@ -446,7 +467,7 @@ def __init__( byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, max_retry_delay_seconds: int = 60, - buffer_capacity: int = None, + buffer_capacity: Optional[int] = None, custom_retry_codes: Dict[int, bool] = {}, event_store: Optional[EventStore] = None, session: Optional[requests.Session] = None, @@ -501,7 +522,7 @@ def __init__( event_store=event_store, session=session, ) - self.queue = Queue() + self.queue: Queue = Queue() for i in range(thread_count): t = threading.Thread(target=self.consume) t.daemon = True diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py index 898f92ff..b8d13028 100644 --- a/snowplow_tracker/event_store.py +++ b/snowplow_tracker/event_store.py @@ -15,6 +15,7 @@ # language governing permissions and limitations there under. # """ +from typing import List from typing_extensions import Protocol from snowplow_tracker.typing import PayloadDict, PayloadDictList from logging import Logger @@ -25,7 +26,7 @@ class EventStore(Protocol): EventStore protocol. For buffering events in the Emitter. """ - def add_event(payload: PayloadDict) -> bool: + def add_event(self, payload: PayloadDict) -> bool: """ Add PayloadDict to buffer. Returns True if successful. @@ -35,7 +36,7 @@ def add_event(payload: PayloadDict) -> bool: """ ... - def get_events_batch() -> PayloadDictList: + def get_events_batch(self) -> PayloadDictList: """ Get a list of all the PayloadDicts in the buffer. @@ -43,7 +44,7 @@ def get_events_batch() -> PayloadDictList: """ ... - def cleanup(batch: PayloadDictList, need_retry: bool) -> None: + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: """ Removes sent events from the event store. If events need to be retried they are re-added to the buffer. @@ -54,7 +55,7 @@ def cleanup(batch: PayloadDictList, need_retry: bool) -> None: """ ... - def size() -> int: + def size(self) -> int: """ Returns the number of events in the buffer @@ -76,7 +77,7 @@ def __init__(self, logger: Logger, buffer_capacity: int = 10000) -> None: When the buffer is full new events are lost. :type buffer_capacity int """ - self.event_buffer = [] + self.event_buffer: List[PayloadDict] = [] self.buffer_capacity = buffer_capacity self.logger = logger diff --git a/snowplow_tracker/events/event.py b/snowplow_tracker/events/event.py index c9d9b82e..fb300b87 100644 --- a/snowplow_tracker/events/event.py +++ b/snowplow_tracker/events/event.py @@ -97,10 +97,9 @@ def build_payload( if self.event_subject is not None: fin_payload_dict = self.event_subject.combine_subject(subject) else: - fin_payload_dict = None if subject is None else subject.standard_nv_pairs + fin_payload_dict = {} if subject is None else subject.standard_nv_pairs - if fin_payload_dict is not None: - self.payload.add_dict(fin_payload_dict) + self.payload.add_dict(fin_payload_dict) return self.payload @property diff --git a/snowplow_tracker/events/screen_view.py b/snowplow_tracker/events/screen_view.py index d0cea5d0..6b4af927 100644 --- a/snowplow_tracker/events/screen_view.py +++ b/snowplow_tracker/events/screen_view.py @@ -15,7 +15,7 @@ # language governing permissions and limitations there under. # """ -from typing import Optional, List +from typing import Dict, Optional, List from snowplow_tracker.typing import JsonEncoderFunction from snowplow_tracker.events.event import Event from snowplow_tracker.events.self_describing import SelfDescribing @@ -76,7 +76,7 @@ def __init__( super(ScreenView, self).__init__( event_subject=event_subject, context=context, true_timestamp=true_timestamp ) - self.screen_view_properties = {} + self.screen_view_properties: Dict[str, str] = {} self.id_ = id_ self.name = name self.type = type diff --git a/snowplow_tracker/events/structured_event.py b/snowplow_tracker/events/structured_event.py index 00658e9f..23abafa8 100644 --- a/snowplow_tracker/events/structured_event.py +++ b/snowplow_tracker/events/structured_event.py @@ -15,7 +15,7 @@ # language governing permissions and limitations there under. # """ from snowplow_tracker.events.event import Event -from typing import Optional, List +from typing import Optional, List, Union from snowplow_tracker.subject import Subject from snowplow_tracker.self_describing_json import SelfDescribingJson from snowplow_tracker.contracts import non_empty_string @@ -41,7 +41,7 @@ def __init__( action: str, label: Optional[str] = None, property_: Optional[str] = None, - value: Optional[int] = None, + value: Optional[Union[int, float]] = None, event_subject: Optional[Subject] = None, context: Optional[List[SelfDescribingJson]] = None, true_timestamp: Optional[float] = None, @@ -84,7 +84,7 @@ def category(self) -> Optional[str]: return self.payload.nv_pairs.get("se_ca") @category.setter - def category(self, value: Optional[str]): + def category(self, value: str): non_empty_string(value) self.payload.add("se_ca", value) @@ -96,7 +96,7 @@ def action(self) -> Optional[str]: return self.payload.nv_pairs.get("se_ac") @action.setter - def action(self, value: Optional[str]): + def action(self, value: str): non_empty_string(value) self.payload.add("se_ac", value) @@ -123,12 +123,12 @@ def property_(self, value: Optional[str]): self.payload.add("se_pr", value) @property - def value(self) -> Optional[int]: + def value(self) -> Optional[Union[int, float]]: """ A value associated with the user action """ return self.payload.nv_pairs.get("se_va") @value.setter - def value(self, value: Optional[int]): + def value(self, value: Optional[Union[int, float]]): self.payload.add("se_va", value) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index 26e3262c..18d1bf4d 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -83,9 +83,8 @@ def add_json( if encode_base64: encoded_dict = base64.urlsafe_b64encode(json_dict.encode("utf-8")) - if not isinstance(encoded_dict, str): - encoded_dict = encoded_dict.decode("utf-8") - self.add(type_when_encoded, encoded_dict) + encoded_dict_str = encoded_dict.decode("utf-8") + self.add(type_when_encoded, encoded_dict_str) else: self.add(type_when_not_encoded, json_dict) diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index d824ed26..daa1434b 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -16,7 +16,7 @@ # """ import logging -from typing import Optional +from typing import Dict, Optional from snowplow_tracker import ( Tracker, Emitter, @@ -37,7 +37,7 @@ class Snowplow: - _trackers = {} + _trackers: Dict[str, Tracker] = {} @staticmethod def create_tracker( @@ -149,7 +149,7 @@ def reset(cls): cls._trackers = {} @classmethod - def get_tracker(cls, namespace: str) -> Tracker: + def get_tracker(cls, namespace: str) -> Optional[Tracker]: """ Returns a Snowplow tracker from the Snowplow object if it exists :param namespace: Snowplow tracker namespace diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index c3165d34..cbf29aa8 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -15,7 +15,7 @@ # language governing permissions and limitations there under. # """ -from typing import Optional +from typing import Dict, Optional, Union from snowplow_tracker.contracts import one_of, greater_than from snowplow_tracker.typing import SupportedPlatform, SUPPORTED_PLATFORMS, PayloadDict @@ -30,7 +30,7 @@ class Subject(object): """ def __init__(self) -> None: - self.standard_nv_pairs = {"p": DEFAULT_PLATFORM} + self.standard_nv_pairs: Dict[str, Union[str, int]] = {"p": DEFAULT_PLATFORM} def set_platform(self, value: SupportedPlatform) -> "Subject": """ diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 2effe835..4dc489dc 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -80,13 +80,13 @@ def __init__( if subject is None: subject = Subject() - if type(emitters) is list: + if isinstance(emitters, list): non_empty(emitters) self.emitters = emitters else: self.emitters = [emitters] - self.subject = subject + self.subject: Optional[Subject] = subject self.encode_base64 = encode_base64 self.json_encoder = json_encoder @@ -145,6 +145,8 @@ def track( if "eid" in payload.nv_pairs.keys(): return payload.nv_pairs["eid"] + return None + def complete_payload( self, event: Event, @@ -298,7 +300,7 @@ def track_link_click( ) non_empty_string(target_url) - properties = {} + properties: Dict[str, Union[str, ElementClasses]] = {} properties["targetUrl"] = target_url if element_id is not None: properties["elementId"] = element_id @@ -361,7 +363,7 @@ def track_add_to_cart( ) non_empty_string(sku) - properties = {} + properties: Union[Dict[str, Union[str, float, int]]] = {} properties["sku"] = sku properties["quantity"] = quantity if name is not None: @@ -425,7 +427,7 @@ def track_remove_from_cart( ) non_empty_string(sku) - properties = {} + properties: Dict[str, Union[str, float, int]] = {} properties["sku"] = sku properties["quantity"] = quantity if name is not None: @@ -493,7 +495,7 @@ def track_form_change( if type_ is not None: one_of(type_.lower(), FORM_TYPES) - properties = dict() + properties: Dict[str, Union[Optional[str], ElementClasses]] = dict() properties["formId"] = form_id properties["elementId"] = element_id properties["nodeName"] = node_name @@ -549,7 +551,9 @@ def track_form_submit( for element in elements or []: form_element(element) - properties = dict() + properties: Dict[ + str, Union[str, ElementClasses, FormClasses, List[Dict[str, Any]]] + ] = dict() properties["formId"] = form_id if form_classes is not None: properties["formClasses"] = form_classes @@ -602,7 +606,9 @@ def track_site_search( ) non_empty(terms) - properties = {} + properties: Dict[ + str, Union[Sequence[str], Dict[str, Union[str, bool]], int] + ] = {} properties["terms"] = terms if filters is not None: properties["filters"] = filters @@ -878,7 +884,7 @@ def track_struct_event( action: str, label: Optional[str] = None, property_: Optional[str] = None, - value: Optional[float] = None, + value: Optional[Union[int, float]] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, event_subject: Optional[Subject] = None, @@ -1037,4 +1043,9 @@ def add_emitter(self, emitter: EmitterProtocol) -> "Tracker": return self def get_namespace(self) -> str: - return self.standard_nv_pairs["tna"] + # As app_id is added to the standard_nv_pairs dict above with a type of Optional[str], the type for + # the whole standard_nv_pairs dict is inferred to be dict[str, Optional[str]]. + # But, we know that "tna" should always be present in the dict, since namespace is a required argument. + # + # This ignores MyPy saying Incompatible return value type (got "str | None", expected "str") + return self.standard_nv_pairs["tna"] # type: ignore diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py index af2a4b9d..6a574dc2 100644 --- a/snowplow_tracker/tracker_configuration.py +++ b/snowplow_tracker/tracker_configuration.py @@ -22,7 +22,7 @@ class TrackerConfiguration(object): def __init__( self, - encode_base64: Optional[bool] = None, + encode_base64: bool = True, json_encoder: Optional[JsonEncoderFunction] = None, ) -> None: """ @@ -37,18 +37,16 @@ def __init__( self.json_encoder = json_encoder @property - def encode_base64(self) -> Optional[bool]: + def encode_base64(self) -> bool: """ Whether JSONs in the payload should be base-64 encoded. Default is True. """ return self._encode_base64 @encode_base64.setter - def encode_base64(self, value: Optional[bool]): + def encode_base64(self, value: bool): if isinstance(value, bool) or value is None: self._encode_base64 = value - else: - raise ValueError("encode_base64 must be True or False") @property def json_encoder(self) -> Optional[JsonEncoderFunction]: diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py index 5bbc477b..3e973562 100644 --- a/snowplow_tracker/typing.py +++ b/snowplow_tracker/typing.py @@ -65,5 +65,10 @@ class EmitterProtocol(Protocol): - def input(self, payload: PayloadDict) -> None: - ... + def input(self, payload: PayloadDict) -> None: ... + + def flush(self) -> None: ... + + def async_flush(self) -> None: ... + + def sync_flush(self) -> None: ... From 65bdd7b777b6cd153f405e11c78342f4b1b461ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Sun, 28 Jul 2024 13:05:21 -0600 Subject: [PATCH 39/51] Add py.typed to package (close #360) --- setup.py | 1 + snowplow_tracker/py.typed | 0 2 files changed, 1 insertion(+) create mode 100644 snowplow_tracker/py.typed diff --git a/setup.py b/setup.py index e1b2aa33..1cdd729f 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], + package_data={"snowplow_tracker": ["py.typed"]}, url="http://snowplow.io", license="Apache License 2.0", description="Snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games", diff --git a/snowplow_tracker/py.typed b/snowplow_tracker/py.typed new file mode 100644 index 00000000..e69de29b From ea634ec6e0b29fea82ca1637c5dc553f76f5686e Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Fri, 23 Aug 2024 11:51:21 +0100 Subject: [PATCH 40/51] Update `on_success` docstring (close #358) PR #366 * Update `on_success` docstring --- snowplow_tracker/emitter_configuration.py | 8 +++----- snowplow_tracker/emitters.py | 10 ++++------ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 1cf90238..82626fa4 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -39,13 +39,12 @@ def __init__( :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads :type on_success: function | None :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 Gets passed two arguments: 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads + 2) An array of dictionaries corresponding to the unsent events' payloads :type on_failure: function | None :param byte_limit: The size event list after reaching which queued events will be flushed :type byte_limit: int | None @@ -105,8 +104,7 @@ def on_failure(self) -> Optional[FailureCallback]: Callback executed if at least one HTTP request in a flush has status code other than 200 Gets passed two arguments: 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads + 2) An array of dictionaries corresponding to the unsent events' payloads """ return self._on_failure diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 6a138f0f..72f451bf 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -97,13 +97,12 @@ def __init__( :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads :type on_success: function | None :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 Gets passed two arguments: 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads + 2) An array of dictionaries corresponding to the unsent events' payloads :type on_failure: function | None :param byte_limit: The size event list after reaching which queued events will be flushed :type byte_limit: int | None @@ -484,13 +483,12 @@ def __init__( :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads :type on_success: function | None :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 Gets passed two arguments: 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads + 2) An array of dictionaries corresponding to the unsent events' payloads :type on_failure: function | None :param thread_count: Number of worker threads to use for HTTP requests :type thread_count: int From 9554954092bf00fca6db786f39d6db525c5a4fc9 Mon Sep 17 00:00:00 2001 From: Greg Leonard <45019882+greg-el@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:02:15 +0100 Subject: [PATCH 41/51] Fix docs action (close #367) --- docs/requirements.txt | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 229d0ff5..36f69b0c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,15 @@ -sphinx -sphinx_rtd_theme -sphinx_copybutton -sphinx_minipres -sphinx_tabs -sphinx_togglebutton>=0.2.0 -sphinx-autobuild + +sphinx==7.1.2 +sphinx_rtd_theme==2.0.0 +sphinx_copybutton==0.5.2 +sphinx_minipres==0.2.1 +sphinx_tabs==3.4.5 + +sphinx_togglebutton==0.3.2 +# Transitive dependency of togglebutton causing: +# https://security.snyk.io/vuln/SNYK-PYTHON-SETUPTOOLS-7448482 +setuptools==70.0.0 + +sphinx-autobuild==2021.3.14 myst_nb>0.8.3 -sphinx_rtd_theme_ext_color_contrast \ No newline at end of file +sphinx_rtd_theme_ext_color_contrast==0.3.2 From b75934afdcd40dd8804dfcc8174a59c1868ebd1c Mon Sep 17 00:00:00 2001 From: Greg Leonard <45019882+greg-el@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:42:28 +0100 Subject: [PATCH 42/51] Prepare for 1.0.3 release --- CHANGES.txt | 8 ++++++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4a5c0c29..861f8987 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,11 @@ +Version 1.0.3 (2024-08-27) +-------------------------- +Fix docs action (close #367) +Update `on_success` docstring (close #358) +Add py.typed to package (close #360) (Thanks to @edgarrmondragon) +Update typing +Fix `PagePing`, `PageView`, and `StructuredEvent` property getters (close #361) + Version 1.0.2 (2024-02-26) -------------------------- Add Python 3.12 to CI tests (#356) (Thanks to @edgarrmondragon) diff --git a/docs/source/conf.py b/docs/source/conf.py index e2762fe0..647cf119 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "1.0.2" +release = "1.0.3" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 1cdd729f..4b180da6 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( name="snowplow-tracker", - version="1.0.2", + version="1.0.3", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 38be1911..b71b2e2c 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (1, 0, 2) +__version_info__ = (1, 0, 3) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From 92fb34ae9e5b9acfc0d3f552357ab298f63b561e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 22:20:22 +0000 Subject: [PATCH 43/51] Bump actions/download-artifact from 3 to 4.1.7 in /.github/workflows Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4.1.7. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4.1.7) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .github/workflows/cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 5e95d341..b809ea1a 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -76,7 +76,7 @@ jobs: python-version: '3.x' - name: Download artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4.1.7 with: name: distfiles_${{ github.run_id }} path: ${{ github.workspace }}/dist From 74c9377598b58b5fa6a0c07988f411104993789b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 19 Nov 2024 05:10:37 -0600 Subject: [PATCH 44/51] Test with Python 3.13 (#365) --- .github/workflows/ci.yml | 15 +++++++++++--- Dockerfile | 2 +- requirements-test.txt | 5 +++-- run-tests.sh | 42 ++++++++++++++++++++++++++-------------- setup.py | 1 + 5 files changed, 45 insertions(+), 20 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a285a2d9..2bb32b7b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,15 +11,24 @@ jobs: runs-on: ubuntu-20.04 strategy: + fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8, 3.9, "3.10", "3.11", "3.12"] + python-version: + - 3.6 + - 3.7 + - 3.8 + - 3.9 + - "3.10" + - "3.11" + - "3.12" + - "3.13" steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/Dockerfile b/Dockerfile index ba2948f6..d79a72cd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH RUN git clone --depth=1 https://github.com/pyenv/pyenv.git $PYENV_ROOT RUN git clone --depth=1 https://github.com/pyenv/pyenv-virtualenv.git $PYENV_ROOT/plugins/pyenv-virtualenv -RUN pyenv install 3.5.10 && pyenv install 3.6.14 && pyenv install 3.7.11 && pyenv install 3.8.11 && pyenv install 3.9.6 && pyenv install 3.10.1 && pyenv install 3.11.0 && pyenv install 3.12.1 +RUN pyenv install 3.5.10 && pyenv install 3.6.15 && pyenv install 3.7.17 && pyenv install 3.8.20 && pyenv install 3.9.20 && pyenv install 3.10.15 && pyenv install 3.11.10 && pyenv install 3.12.7 && pyenv install 3.13.0 WORKDIR /app COPY . . diff --git a/requirements-test.txt b/requirements-test.txt index 55463d8c..cde305f6 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,7 +1,8 @@ pytest==4.6.11; python_version < '3.10.0' -pytest==6.2.5; python_version >= '3.10.0' +pytest==8.3.2; python_version >= '3.10.0' attrs==21.2.0 httmock==1.4.0 -freezegun==1.1.0 +freezegun==1.1.0; python_version < '3.13' +freezegun==1.5.1; python_version >= '3.13' pytest-cov coveralls==3.3.1 diff --git a/run-tests.sh b/run-tests.sh index 23b13a73..bb938e85 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -24,68 +24,77 @@ function deploy { source deactivate fi - # pyenv install 3.6.14 + # pyenv install 3.6.15 if [ ! -e ~/.pyenv/versions/tracker36 ]; then - pyenv virtualenv 3.6.14 tracker36 + pyenv virtualenv 3.6.15 tracker36 pyenv activate tracker36 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.7.11 + # pyenv install 3.7.17 if [ ! -e ~/.pyenv/versions/tracker37 ]; then - pyenv virtualenv 3.7.11 tracker37 + pyenv virtualenv 3.7.17 tracker37 pyenv activate tracker37 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.8.11 + # pyenv install 3.8.20 if [ ! -e ~/.pyenv/versions/tracker38 ]; then - pyenv virtualenv 3.8.11 tracker38 + pyenv virtualenv 3.8.20 tracker38 pyenv activate tracker38 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.9.6 + # pyenv install 3.9.20 if [ ! -e ~/.pyenv/versions/tracker39 ]; then - pyenv virtualenv 3.9.6 tracker39 + pyenv virtualenv 3.9.20 tracker39 pyenv activate tracker39 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.10.1 + # pyenv install 3.10.15 if [ ! -e ~/.pyenv/versions/tracker310 ]; then - pyenv virtualenv 3.10.1 tracker310 + pyenv virtualenv 3.10.15 tracker310 pyenv activate tracker310 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.11.0 + # pyenv install 3.11.10 if [ ! -e ~/.pyenv/versions/tracker311 ]; then - pyenv virtualenv 3.11.0 tracker311 + pyenv virtualenv 3.11.10 tracker311 pyenv activate tracker311 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.12.0 + # pyenv install 3.12.7 if [ ! -e ~/.pyenv/versions/tracker312 ]; then - pyenv virtualenv 3.12.0 tracker312 + pyenv virtualenv 3.12.7 tracker312 pyenv activate tracker312 pip install . pip install -r requirements-test.txt source deactivate fi + + # pyenv install 3.13.0 + if [ ! -e ~/.pyenv/versions/tracker313 ]; then + pyenv virtualenv 3.13.0 tracker313 + pyenv activate tracker313 + pip install . + pip install -r requirements-test.txt + source deactivate + fi } @@ -121,6 +130,10 @@ function run_tests { pyenv activate tracker312 pytest source deactivate + + pyenv activate tracker313 + pytest + source deactivate } function refresh_deploy { @@ -132,6 +145,7 @@ function refresh_deploy { pyenv uninstall -f tracker310 pyenv uninstall -f tracker311 pyenv uninstall -f tracker312 + pyenv uninstall -f tracker313 } diff --git a/setup.py b/setup.py index 4b180da6..26137437 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,7 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", ], install_requires=[ From 0341cb19d5fac344095dbb5e11bc0ebae3c4e880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matu=CC=81s=CC=8C=20Tomlein?= Date: Tue, 19 Nov 2024 12:13:28 +0100 Subject: [PATCH 45/51] Shorten automatic github release title (close #352) --- .github/workflows/cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index b809ea1a..cfb160bb 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -108,6 +108,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ github.ref }} - name: Snowplow Python Tracker v${{ needs.version_check.outputs.v_tracker }} + name: Version ${{ needs.version_check.outputs.v_tracker }} draft: false prerelease: ${{ contains(needs.version_check.outputs.v_tracker, 'rc') }} From 9a70fb37146e83e1ab31b96ab4c8e0ac9bb9d696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matu=CC=81s=CC=8C=20Tomlein?= Date: Tue, 19 Nov 2024 12:18:36 +0100 Subject: [PATCH 46/51] Prepare for 1.0.4 release --- CHANGES.txt | 5 +++++ docs/source/conf.py | 4 ++-- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 861f8987..581f8e6a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,8 @@ +Version 1.0.4 (2024-11-19) +-------------------------- +Test with Python 3.13 (#365) (Thanks to @edgarrmondragon) +Shorten automatic github release title (#352) + Version 1.0.3 (2024-08-27) -------------------------- Fix docs action (close #367) diff --git a/docs/source/conf.py b/docs/source/conf.py index 647cf119..46a8fb74 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "1.0.3" +release = "1.0.4" # -- General configuration --------------------------------------------------- @@ -60,4 +60,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ['_static'] diff --git a/setup.py b/setup.py index 26137437..6d386306 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( name="snowplow-tracker", - version="1.0.3", + version="1.0.4", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index b71b2e2c..b6a9d713 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (1, 0, 3) +__version_info__ = (1, 0, 4) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From 30717fbc517f7bbac75075287fd3fd286fb6dd4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matu=CC=81s=CC=8C=20Tomlein?= Date: Wed, 20 Nov 2024 12:49:54 +0100 Subject: [PATCH 47/51] Use the same version for the upload-artifact and download-artifact action --- .github/workflows/cd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index cfb160bb..935d1c35 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -57,7 +57,7 @@ jobs: python setup.py sdist bdist_wheel - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: distfiles_${{ github.run_id }} path: dist @@ -76,7 +76,7 @@ jobs: python-version: '3.x' - name: Download artifacts - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: distfiles_${{ github.run_id }} path: ${{ github.workspace }}/dist From a10e3e4b525a2649d0df6fcc1ee695e34545f409 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Thu, 20 Feb 2025 13:49:12 +0000 Subject: [PATCH 48/51] Bump Ubuntu Version in GH Actions (close #374) PR #375 * bump ubuntu version in CI/CD * drop EoL python versions --- .github/workflows/cd.yml | 8 ++++---- .github/workflows/ci.yml | 6 ++---- README.md | 1 + setup.py | 3 --- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 935d1c35..2a0fcff1 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -7,7 +7,7 @@ on: jobs: version_check: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest outputs: v_tracker: ${{ steps.version.outputs.PYTHON_TRACKER_VERSION}} @@ -32,7 +32,7 @@ jobs: build: needs: ["version_check"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: @@ -64,7 +64,7 @@ jobs: publish: needs: ["build"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout code @@ -96,7 +96,7 @@ jobs: release: needs: ["publish", "version_check"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout code diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2bb32b7b..09b5de9e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,14 +8,12 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: - - 3.6 - - 3.7 - 3.8 - 3.9 - "3.10" @@ -74,7 +72,7 @@ jobs: coveralls_finish: needs: ["build"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Coveralls finished diff --git a/README.md b/README.md index 16fd5b51..694d3ce9 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Python Support | Python version | snowplow-tracker version | | :----: | :----: | +| \>=3.8 | > 1.1.0 | | \>=3.5 | > 0.10.0 | | 2.7 | > 0.9.1 | diff --git a/setup.py b/setup.py index 6d386306..91bc2c65 100644 --- a/setup.py +++ b/setup.py @@ -56,9 +56,6 @@ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", From c74108d203184b40ba2df89f6b124f82bfd47bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Thu, 29 Aug 2024 02:25:43 -0600 Subject: [PATCH 49/51] Avoid install `types-requests` at run-time --- .github/workflows/ci.yml | 2 +- setup.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09b5de9e..9c14d2fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,7 +50,7 @@ jobs: - name: MyPy run: | - python -m pip install mypy + python -m pip install -e .[typing] mypy snowplow_tracker --exclude '/test' - name: Demo diff --git a/setup.py b/setup.py index 91bc2c65..f3a89bfe 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,12 @@ ], install_requires=[ "requests>=2.25.1,<3.0", - "types-requests>=2.25.1,<3.0", "typing_extensions>=3.7.4", ], + extras_require={ + "typing": [ + "mypy>=0.971", + "types-requests>=2.25.1,<3.0", + ], + }, ) From 197fa9cd1307f12ffb3e12b61dcf13009525b875 Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Thu, 20 Feb 2025 14:14:40 +0000 Subject: [PATCH 50/51] Prepare for 1.1.0 release --- CHANGES.txt | 5 +++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 581f8e6a..6a56dedb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,8 @@ +Version 1.1.0 (2025-02-20) +-------------------------- +Bump Ubuntu Version in GH Actions (#375) +Avoid installing types-requests at run-time (#370) (Thanks to @edgarrmondragon) + Version 1.0.4 (2024-11-19) -------------------------- Test with Python 3.13 (#365) (Thanks to @edgarrmondragon) diff --git a/docs/source/conf.py b/docs/source/conf.py index 46a8fb74..88d210c3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "1.0.4" +release = "1.1.0" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index f3a89bfe..efaf6536 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( name="snowplow-tracker", - version="1.0.4", + version="1.1.0", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index b6a9d713..f4ff17a0 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (1, 0, 4) +__version_info__ = (1, 1, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From e8f2629e78334a974794efa1f20442d923c14e39 Mon Sep 17 00:00:00 2001 From: Patricio Date: Mon, 1 Sep 2025 12:42:22 +0200 Subject: [PATCH 51/51] claude mds instrumentation --- CLAUDE.md | 370 ++++++++++++++++++++++++++++++ snowplow_tracker/events/CLAUDE.md | 284 +++++++++++++++++++++++ snowplow_tracker/test/CLAUDE.md | 365 +++++++++++++++++++++++++++++ 3 files changed, 1019 insertions(+) create mode 100644 CLAUDE.md create mode 100644 snowplow_tracker/events/CLAUDE.md create mode 100644 snowplow_tracker/test/CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..dd4a535e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,370 @@ +# Snowplow Python Tracker - CLAUDE.md + +## Project Overview + +The Snowplow Python Tracker is a public Python library for sending analytics events to Snowplow collectors. It enables developers to integrate Snowplow analytics into Python applications, games, and web servers. The library provides a robust event tracking system with support for various event types, custom contexts, and reliable event delivery through configurable emitters. + +**Key Technologies:** +- Python 3.8+ (supported versions: 3.8-3.13) +- requests library for HTTP communication +- typing_extensions for enhanced type hints +- Event-driven architecture with schema validation +- Asynchronous and synchronous event emission + +## Development Commands + +```bash +# Install dependencies +pip install -r requirements-test.txt + +# Run tests +./run-tests.sh + +# Run specific test module +python -m pytest snowplow_tracker/test/unit/test_tracker.py + +# Run integration tests +python -m pytest snowplow_tracker/test/integration/ + +# Install package in development mode +pip install -e . + +# Build Docker image for testing +docker build -t snowplow-python-tracker . +docker run snowplow-python-tracker +``` + +## Architecture + +The tracker follows a layered architecture with clear separation of concerns: + +``` +snowplow_tracker/ +├── Core Components +│ ├── tracker.py # Main Tracker class orchestrating events +│ ├── snowplow.py # High-level API for tracker management +│ └── subject.py # User/device context management +├── Event Layer (events/) +│ ├── event.py # Base Event class +│ ├── page_view.py # PageView event +│ ├── structured_event.py # Structured events +│ └── self_describing.py # Custom schema events +├── Emission Layer +│ ├── emitters.py # Sync/Async event transmission +│ ├── event_store.py # Event buffering and persistence +│ └── payload.py # Event payload construction +├── Configuration +│ ├── tracker_configuration.py +│ └── emitter_configuration.py +└── Validation + ├── contracts.py # Runtime validation + └── typing.py # Type definitions +``` + +## Core Architectural Principles + +1. **Schema-First Design**: All events conform to Iglu schemas for consistency +2. **Separation of Concerns**: Event creation, validation, and emission are separate +3. **Configuration Objects**: Use dedicated configuration classes, not raw dictionaries +4. **Type Safety**: Extensive use of type hints and Protocol classes +5. **Fail-Safe Delivery**: Events are buffered and retried on failure +6. **Immutability**: Event objects are largely immutable after creation + +## Layer Organization & Responsibilities + +### Application Layer (snowplow.py) +- Singleton pattern for global tracker management +- Factory methods for tracker creation +- Namespace-based tracker registry + +### Domain Layer (tracker.py, events/) +- Event creation and validation +- Subject (user/device) context management +- Event enrichment with standard fields + +### Infrastructure Layer (emitters.py, event_store.py) +- HTTP communication with collectors +- Event buffering and retry logic +- Async/sync emission strategies + +### Cross-Cutting (contracts.py, typing.py) +- Runtime validation with togglable contracts +- Shared type definitions and protocols + +## Critical Import Patterns + +```python +# ✅ Import from package root for public API +from snowplow_tracker import Snowplow, Tracker, Subject +from snowplow_tracker import EmitterConfiguration, TrackerConfiguration + +# ✅ Import specific event classes +from snowplow_tracker.events import PageView, StructuredEvent + +# ❌ Don't import from internal modules +from snowplow_tracker.emitters import Requester # Internal class + +# ✅ Use typing module for type hints +from snowplow_tracker.typing import PayloadDict, Method +``` + +## Essential Library Patterns + +### Tracker Initialization Pattern +```python +# ✅ Use Snowplow factory with configuration objects +tracker = Snowplow.create_tracker( + namespace="my_app", + endpoint="https://collector.example.com", + tracker_config=TrackerConfiguration(encode_base64=True), + emitter_config=EmitterConfiguration(batch_size=10) +) + +# ❌ Don't instantiate Tracker directly without Snowplow +tracker = Tracker("namespace", emitter) # Missing registration +``` + +### Event Creation Pattern +```python +# ✅ Use event classes with named parameters +page_view = PageView( + page_url="https://example.com", + page_title="Homepage" +) + +# ✅ Add contexts to events +event.context = [SelfDescribingJson(schema, data)] + +# ❌ Don't modify event payload directly +event.payload.add("custom", "value") # Breaks schema validation +``` + +### Subject Management Pattern +```python +# ✅ Set subject at tracker or event level +subject = Subject() +subject.set_user_id("user123") +tracker = Snowplow.create_tracker(..., subject=subject) + +# ✅ Override subject per event +event = PageView(..., event_subject=Subject()) + +# ❌ Don't modify subject after tracker creation +tracker.subject.set_user_id("new_id") # Not thread-safe +``` + +### Emitter Configuration Pattern +```python +# ✅ Configure retry and buffering behavior +config = EmitterConfiguration( + batch_size=50, + buffer_capacity=10000, + custom_retry_codes={429: True, 500: True} +) + +# ❌ Don't use magic numbers +emitter = Emitter(endpoint, 443, "post", 100) # Use config object +``` + +## Model Organization Pattern + +### Event Hierarchy +```python +Event (base class) +├── PageView # Web page views +├── PagePing # Page engagement tracking +├── ScreenView # Mobile screen views +├── StructuredEvent # Category/action/label/property/value events +└── SelfDescribing # Custom schema events +``` + +### Data Structures +```python +# SelfDescribingJson for custom contexts +context = SelfDescribingJson( + "iglu:com.example/context/jsonschema/1-0-0", + {"key": "value"} +) + +# Payload for event data assembly +payload = Payload() +payload.add("e", "pv") # Event type +payload.add_dict({"aid": "app_id"}) +``` + +## Common Pitfalls & Solutions + +### Contract Validation +```python +# ❌ Passing invalid parameters silently fails in production +tracker.track_page_view("") # Empty URL + +# ✅ Enable contracts during development +from snowplow_tracker import enable_contracts +enable_contracts() +``` + +### Event Buffering +```python +# ❌ Not flushing events before shutdown +tracker.track(event) +sys.exit() # Events lost! + +# ✅ Always flush before exit +tracker.track(event) +tracker.flush() +``` + +### Thread Safety +```python +# ❌ Sharing emitter across threads +emitter = Emitter(endpoint) +# Multiple threads using same emitter + +# ✅ Use AsyncEmitter for concurrent scenarios +emitter = AsyncEmitter(endpoint, thread_count=2) +``` + +### Schema Validation +```python +# ❌ Hardcoding schema strings +schema = "iglu:com.snowplow/event/1-0-0" + +# ✅ Use constants for schemas +from snowplow_tracker.constants import CONTEXT_SCHEMA +``` + +## File Structure Template + +``` +project/ +├── tracker_app.py # Application entry point +├── config/ +│ └── tracker_config.py # Tracker configuration +├── events/ +│ ├── __init__.py +│ └── custom_events.py # Custom event definitions +├── contexts/ +│ └── custom_contexts.py # Custom context schemas +└── tests/ + ├── unit/ + │ └── test_events.py + └── integration/ + └── test_emission.py +``` + +## Testing Patterns + +### Unit Testing +```python +# ✅ Mock emitters for unit tests +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_track_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(...)) + mock_emitter.input.assert_called_once() +``` + +### Contract Testing +```python +# ✅ Use ContractsDisabled context manager +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) +``` + +### Integration Testing +```python +# ✅ Test against mock collector +def test_event_delivery(): + with requests_mock.Mocker() as m: + m.post("https://collector.test/com.snowplow/tp2") + # Track and verify delivery +``` + +## Configuration Best Practices + +### Environment-Based Configuration +```python +# ✅ Use environment variables +import os +endpoint = os.getenv("SNOWPLOW_COLLECTOR_URL") +namespace = os.getenv("SNOWPLOW_NAMESPACE", "default") +``` + +### Retry Configuration +```python +# ✅ Configure intelligent retry behavior +EmitterConfiguration( + max_retry_delay_seconds=120, + custom_retry_codes={ + 429: True, # Retry rate limits + 500: True, # Retry server errors + 400: False # Don't retry bad requests + } +) +``` + +## Quick Reference + +### Import Checklist +- [ ] Import from `snowplow_tracker` package root +- [ ] Use `EmitterConfiguration` and `TrackerConfiguration` +- [ ] Import specific event classes from `snowplow_tracker.events` +- [ ] Use type hints from `snowplow_tracker.typing` + +### Event Tracking Checklist +- [ ] Create tracker with `Snowplow.create_tracker()` +- [ ] Configure emitter with appropriate batch size +- [ ] Set subject context if tracking users +- [ ] Use appropriate event class for the use case +- [ ] Add custom contexts as `SelfDescribingJson` +- [ ] Call `flush()` before application shutdown +- [ ] Handle failures with callbacks + +### Common Event Types +- `PageView`: Web page views +- `ScreenView`: Mobile app screens +- `StructuredEvent`: Generic events with 5 parameters +- `SelfDescribing`: Custom schema events +- `PagePing`: Engagement tracking + +## Contributing to CLAUDE.md + +When adding or updating content in this document, please follow these guidelines: + +### File Size Limit +- **CLAUDE.md must not exceed 40KB** (currently ~19KB) +- Check file size after updates: `wc -c CLAUDE.md` +- Remove outdated content if approaching the limit + +### Code Examples +- Keep all code examples **4 lines or fewer** +- Focus on the essential pattern, not complete implementations +- Use `// ❌` and `// ✅` to clearly show wrong vs right approaches + +### Content Organization +- Add new patterns to existing sections when possible +- Create new sections sparingly to maintain structure +- Update the architectural principles section for major changes +- Ensure examples follow current codebase conventions + +### Quality Standards +- Test any new patterns in actual code before documenting +- Verify imports and syntax are correct for the codebase +- Keep language concise and actionable +- Focus on "what" and "how", minimize "why" explanations + +### Multiple CLAUDE.md Files +- **Directory-specific CLAUDE.md files** can be created for specialized modules +- Follow the same structure and guidelines as this root CLAUDE.md +- Keep them focused on directory-specific patterns and conventions +- Maximum 20KB per directory-specific CLAUDE.md file + +### Instructions for LLMs +When editing files in this repository, **always check for CLAUDE.md guidance**: + +1. **Look for CLAUDE.md in the same directory** as the file being edited +2. **If not found, check parent directories** recursively up to project root +3. **Follow the patterns and conventions** described in the applicable CLAUDE.md +4. **Prioritize directory-specific guidance** over root-level guidance when conflicts exist \ No newline at end of file diff --git a/snowplow_tracker/events/CLAUDE.md b/snowplow_tracker/events/CLAUDE.md new file mode 100644 index 00000000..efc0f5ab --- /dev/null +++ b/snowplow_tracker/events/CLAUDE.md @@ -0,0 +1,284 @@ +# Snowplow Event Types - CLAUDE.md + +## Directory Overview + +The `events/` directory contains all event type implementations for the Snowplow Python Tracker. Each event class represents a specific type of analytics event that can be sent to Snowplow collectors. All events inherit from the base `Event` class and follow a consistent pattern for construction, validation, and payload generation. + +## Event Class Hierarchy + +``` +Event (base class) +├── PageView # Web page view tracking +├── PagePing # Page engagement/heartbeat +├── ScreenView # Mobile/app screen views +├── StructuredEvent # Generic 5-parameter events +└── SelfDescribing # Custom schema events +``` + +## Core Event Patterns + +### Event Construction Pattern +```python +# ✅ Use keyword arguments for clarity +event = PageView( + page_url="https://example.com", + page_title="Homepage", + referrer="https://google.com" +) + +# ❌ Don't use positional arguments +event = PageView("https://example.com", "Homepage") +``` + +### Event Context Pattern +```python +# ✅ Add contexts as SelfDescribingJson list +geo_context = SelfDescribingJson( + "iglu:com.acme/geolocation/jsonschema/1-0-0", + {"latitude": 40.0, "longitude": -73.0} +) +event = PageView(page_url="...", context=[geo_context]) + +# ❌ Don't use raw dictionaries for context +event.context = [{"latitude": 40.0}] # Missing schema! +``` + +### Event Subject Override Pattern +```python +# ✅ Override tracker subject for specific event +special_subject = Subject() +special_subject.set_user_id("anonymous_user") +event = StructuredEvent( + category="shop", + action="view", + event_subject=special_subject +) + +# ❌ Don't modify shared subject +tracker.subject.set_user_id("temp") # Affects all events +``` + +### True Timestamp Pattern +```python +# ✅ Use milliseconds for true_timestamp +import time +timestamp_ms = time.time() * 1000 +event = PageView( + page_url="...", + true_timestamp=timestamp_ms +) + +# ❌ Don't use seconds +event = PageView(true_timestamp=time.time()) +``` + +## Event-Specific Patterns + +### PageView Events +```python +# ✅ Complete PageView with all fields +page_view = PageView( + page_url="https://example.com/products", + page_title="Products", + referrer="https://example.com/home" +) + +# ❌ Missing required page_url +page_view = PageView(page_title="Products") +``` + +### StructuredEvent Pattern +```python +# ✅ Use descriptive category/action pairs +event = StructuredEvent( + category="ecommerce", + action="add-to-cart", + label="SKU-123", + property_="size:XL", + value=29.99 +) + +# ❌ Generic naming loses meaning +event = StructuredEvent("event", "click") +``` + +### SelfDescribing Events +```python +# ✅ Custom events with Iglu schemas +purchase_event = SelfDescribing( + SelfDescribingJson( + "iglu:com.acme/purchase/jsonschema/2-0-0", + { + "orderId": "ORD-123", + "total": 99.99, + "currency": "USD" + } + ) +) + +# ❌ Missing schema version +event = SelfDescribing( + SelfDescribingJson("iglu:com.acme/purchase", {...}) +) +``` + +### ScreenView Pattern (Mobile) +```python +# ✅ Mobile screen tracking with ID +screen = ScreenView( + name="ProductDetailScreen", + id_="screen-456", + previous_name="ProductListScreen" +) + +# ❌ Using PageView for mobile apps +page = PageView(page_url="app://product-detail") +``` + +## Event Validation Rules + +### Required Fields by Event Type +- **PageView**: `page_url` (required), `page_title`, `referrer` +- **StructuredEvent**: `category`, `action` (required), `label`, `property_`, `value` +- **SelfDescribing**: `event_json` (SelfDescribingJson required) +- **ScreenView**: `name` or `id_` (at least one required) +- **PagePing**: `page_url` (required) + +### Schema Validation Pattern +```python +# ✅ Validate schema format +SCHEMA_PATTERN = r"^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/" +SCHEMA_PATTERN += r"[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + +# ❌ Invalid schema formats +"iglu:com.acme/event" # Missing version +"com.acme/event/1-0-0" # Missing iglu: prefix +``` + +## Payload Building Pattern + +### Internal Payload Construction +```python +# ✅ Event classes handle payload internally +def build_payload(self, encode_base64, json_encoder, subject): + # Add event-specific fields + self.payload.add("e", "pv") # Page view type + self.payload.add("url", self.page_url) + + # Let base class handle common fields + return super().build_payload(encode_base64, json_encoder, subject) + +# ❌ Don't expose payload building to users +event.payload = Payload() +event.payload.add("custom", "field") +``` + +## Testing Event Classes + +### Unit Test Pattern +```python +# ✅ Test event construction and validation +def test_page_view_required_fields(): + with self.assertRaises(TypeError): + PageView() # Missing required page_url + + event = PageView(page_url="https://test.com") + assert event.page_url == "https://test.com" + +# ✅ Test payload generation +def test_event_payload(): + event = PageView(page_url="https://test.com") + payload = event.build_payload(False, None, None) + assert payload.get()["url"] == "https://test.com" +``` + +### Context Testing Pattern +```python +# ✅ Test context attachment +def test_event_context(): + context = SelfDescribingJson(schema, data) + event = PageView(page_url="...", context=[context]) + + payload = event.build_payload(True, None, None) + assert "cx" in payload.get() # Base64 context +``` + +## Common Event Pitfalls + +### Timestamp Confusion +```python +# ❌ Mixing timestamp types +event.true_timestamp = "2024-01-01" # String not allowed +event.true_timestamp = datetime.now() # Use milliseconds + +# ✅ Consistent millisecond timestamps +event.true_timestamp = int(time.time() * 1000) +``` + +### Context Array Management +```python +# ❌ Modifying context after creation +event.context.append(new_context) # Unexpected behavior + +# ✅ Set complete context at creation +all_contexts = [context1, context2] +event = PageView(page_url="...", context=all_contexts) +``` + +### Schema Version Control +```python +# ❌ Hardcoding schema versions +schema = "iglu:com.acme/event/jsonschema/1-0-0" + +# ✅ Centralize schema definitions +PURCHASE_SCHEMA = "iglu:com.acme/purchase/jsonschema/2-1-0" +event = SelfDescribing(SelfDescribingJson(PURCHASE_SCHEMA, data)) +``` + +## Event Migration Guide + +### Upgrading Event Schemas +```python +# From version 1-0-0 to 2-0-0 +# ✅ Handle backward compatibility +def create_purchase_event(data): + if "items" in data: # New schema + schema = "iglu:.../purchase/jsonschema/2-0-0" + else: # Old schema + schema = "iglu:.../purchase/jsonschema/1-0-0" + + return SelfDescribing(SelfDescribingJson(schema, data)) +``` + +## Quick Reference + +### Event Type Selection +- **PageView**: Traditional web page tracking +- **ScreenView**: Mobile app screen tracking +- **StructuredEvent**: Generic business events +- **SelfDescribing**: Complex custom events +- **PagePing**: Engagement/time-on-page tracking + +### Event Field Checklist +- [ ] Required fields provided +- [ ] Timestamps in milliseconds +- [ ] Contexts as SelfDescribingJson array +- [ ] Valid Iglu schema format +- [ ] Event-specific subject if needed + +### Common Event Methods +- `build_payload()`: Internal payload generation +- `event_subject`: Per-event user context +- `context`: Custom context array +- `true_timestamp`: User-defined timestamp + +## Contributing to events/CLAUDE.md + +When modifying event implementations or adding new event types: + +1. **Follow the Event base class pattern** - All events must inherit from Event +2. **Implement required abstract methods** - Ensure payload building works correctly +3. **Document required fields** - Update this file with new event requirements +4. **Add comprehensive tests** - Test construction, validation, and payload generation +5. **Maintain backward compatibility** - Don't break existing event APIs +6. **Update schema constants** - Add new schemas to constants.py if needed \ No newline at end of file diff --git a/snowplow_tracker/test/CLAUDE.md b/snowplow_tracker/test/CLAUDE.md new file mode 100644 index 00000000..08d0b042 --- /dev/null +++ b/snowplow_tracker/test/CLAUDE.md @@ -0,0 +1,365 @@ +# Snowplow Python Tracker Tests - CLAUDE.md + +## Directory Overview + +The `test/` directory contains comprehensive test suites for the Snowplow Python Tracker. Tests are organized into unit tests (isolated component testing) and integration tests (end-to-end collector communication). The test suite uses pytest and unittest.mock for mocking, with freezegun for time-based testing. + +## Test Organization + +``` +test/ +├── unit/ # Isolated component tests +│ ├── test_tracker.py # Tracker class tests +│ ├── test_emitters.py # Emitter functionality +│ ├── test_event.py # Base event class +│ ├── test_payload.py # Payload construction +│ ├── test_contracts.py # Validation logic +│ └── test_*.py # Other component tests +└── integration/ # End-to-end tests + └── test_integration.py # Collector communication +``` + +## Core Testing Patterns + +### Mock Pattern for Emitters +```python +# ✅ Mock emitter for isolated tracker testing +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_tracker_tracks_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(page_url="test.com")) + mock_emitter.input.assert_called_once() + +# ❌ Don't test with real network calls in unit tests +def test_tracker(): + emitter = Emitter("https://real-collector.com") +``` + +### Contract Testing Pattern +```python +# ✅ Use ContractsDisabled context manager +class ContractsDisabled: + def __enter__(self): + disable_contracts() + def __exit__(self, type, value, traceback): + enable_contracts() + +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) + +# ❌ Don't disable contracts globally +disable_contracts() +# ... rest of test file +``` + +### Time-Based Testing Pattern +```python +# ✅ Use freezegun for deterministic timestamps +from freezegun import freeze_time + +@freeze_time("2024-01-01 12:00:00") +def test_event_timestamp(): + event = PageView(page_url="test.com") + # Timestamp will be consistent + +# ❌ Don't use actual system time +import time +timestamp = time.time() # Non-deterministic +``` + +### UUID Mocking Pattern +```python +# ✅ Mock UUID generation for predictable IDs +@mock.patch('snowplow_tracker.tracker.Tracker.get_uuid') +def test_event_id(mock_uuid): + mock_uuid.return_value = "test-uuid-123" + tracker.track(event) + assert payload["eid"] == "test-uuid-123" + +# ❌ Don't rely on random UUIDs +event_id = tracker.get_uuid() # Different each run +``` + +## Unit Test Patterns + +### Payload Testing +```python +# ✅ Test payload field presence and values +def test_payload_construction(): + payload = Payload() + payload.add("e", "pv") + payload.add("url", "https://test.com") + + result = payload.get() + assert result["e"] == "pv" + assert result["url"] == "https://test.com" + +# ✅ Test JSON encoding +def test_payload_json_encoding(): + payload.add_json({"key": "value"}, True, "cx", "co") + assert "cx" in payload.get() # Base64 encoded +``` + +### Event Testing +```python +# ✅ Test event construction with all parameters +def test_page_view_complete(): + context = SelfDescribingJson(schema, data) + subject = Subject() + + event = PageView( + page_url="https://test.com", + page_title="Test", + context=[context], + event_subject=subject, + true_timestamp=1234567890 + ) + + assert event.page_url == "https://test.com" + assert len(event.context) == 1 + +# ❌ Don't test internal implementation details +def test_private_methods(): + event._internal_method() # Testing private methods +``` + +### Emitter Testing +```python +# ✅ Mock HTTP requests for emitter tests +@mock.patch('requests.post') +def test_emitter_sends_events(mock_post): + mock_post.return_value.status_code = 200 + + emitter = Emitter("https://collector.test") + emitter.input({"e": "pv"}) + emitter.flush() + + mock_post.assert_called_once() + +# ✅ Test retry logic +def test_emitter_retry_on_failure(mock_post): + mock_post.return_value.status_code = 500 + emitter.custom_retry_codes = {500: True} + # Verify retry behavior +``` + +### Contract Validation Testing +```python +# ✅ Test validation rules +def test_non_empty_string_validation(): + with self.assertRaises(ValueError): + non_empty_string("") + + non_empty_string("valid") # Should not raise + +# ✅ Test form element validation +def test_form_element_contract(): + valid_element = { + "name": "field1", + "value": "test", + "nodeName": "INPUT", + "type": "text" + } + form_element(valid_element) # Should not raise +``` + +## Integration Test Patterns + +### Mock Collector Pattern +```python +# ✅ Use micro mock collector for integration tests +from http.server import HTTPServer, BaseHTTPRequestHandler + +class MockCollector(BaseHTTPRequestHandler): + def do_POST(self): + # Capture and validate payload + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + # Store for assertions + self.send_response(200) + +# Start mock collector in test +server = HTTPServer(('localhost', 9090), MockCollector) +``` + +### End-to-End Testing +```python +# ✅ Test complete tracking flow +def test_end_to_end_tracking(): + tracker = Snowplow.create_tracker( + namespace="test", + endpoint="http://localhost:9090" + ) + + # Track multiple events + tracker.track(PageView(page_url="test1.com")) + tracker.track(StructuredEvent("cat", "act")) + tracker.flush() + + # Verify collector received both events + assert len(received_events) == 2 +``` + +## Testing Best Practices + +### Test Isolation +```python +# ✅ Clean up after each test +def setUp(self): + Snowplow.reset() # Clear all trackers + +def tearDown(self): + # Clean up any test artifacts + if hasattr(self, 'server'): + self.server.shutdown() + +# ❌ Don't leave state between tests +class TestSuite: + shared_tracker = Tracker(...) # Shared state! +``` + +### Assertion Patterns +```python +# ✅ Use specific assertions +assert event.page_url == "https://expected.com" +assert "e" in payload.get() +mock_func.assert_called_with(expected_arg) + +# ❌ Avoid generic assertions +assert event # Too vague +assert payload.get() # What are we checking? +``` + +### Mock Management +```python +# ✅ Use patch decorators or context managers +@mock.patch('snowplow_tracker.tracker.uuid.uuid4') +def test_with_mock(mock_uuid): + mock_uuid.return_value = "test-id" + +# ✅ Clean up patches +def create_patch(self, name): + patcher = mock.patch(name) + thing = patcher.start() + self.addCleanup(patcher.stop) + return thing +``` + +## Common Test Scenarios + +### Testing Event Contexts +```python +# ✅ Test context encoding and attachment +def test_event_with_multiple_contexts(): + contexts = [ + SelfDescribingJson(schema1, data1), + SelfDescribingJson(schema2, data2) + ] + event = PageView(page_url="test", context=contexts) + + payload = event.build_payload(True, None, None) + cx_data = json.loads(base64.b64decode(payload.get()["cx"])) + assert len(cx_data["data"]) == 2 +``` + +### Testing Failure Scenarios +```python +# ✅ Test failure callbacks +def test_emitter_failure_callback(): + failed_events = [] + + def on_failure(count, events): + failed_events.extend(events) + + emitter = Emitter( + "https://invalid.collector", + on_failure=on_failure + ) + # Trigger failure and verify callback +``` + +### Testing Async Behavior +```python +# ✅ Test async emitter threading +def test_async_emitter(): + emitter = AsyncEmitter("https://collector.test") + + # Track events + for i in range(100): + emitter.input({"e": "pv", "url": f"test{i}.com"}) + + # Wait for flush + emitter.flush() + time.sleep(1) # Allow async processing + + # Verify all events sent +``` + +## Test Utilities + +### Helper Functions +```python +# ✅ Create reusable test helpers +def create_test_tracker(namespace="test"): + emitter = mock.MagicMock() + return Tracker(namespace, emitter) + +def create_test_event(): + return PageView(page_url="https://test.com") + +# ❌ Don't duplicate test setup +def test_one(): + emitter = mock.MagicMock() + tracker = Tracker("test", emitter) + # ... repeated in every test +``` + +## Performance Testing + +### Load Testing Pattern +```python +# ✅ Test tracker under load +def test_high_volume_tracking(): + tracker = create_test_tracker() + + start = time.time() + for i in range(10000): + tracker.track(PageView(page_url=f"test{i}.com")) + + duration = time.time() - start + assert duration < 5.0 # Performance threshold +``` + +## Quick Reference + +### Test File Naming +- Unit tests: `test_.py` +- Integration tests: `test_integration_.py` +- Test classes: `Test` +- Test methods: `test_` + +### Essential Test Imports +```python +import unittest +import unittest.mock as mock +from freezegun import freeze_time +from snowplow_tracker.contracts import ContractsDisabled +``` + +### Common Mock Targets +- `snowplow_tracker.tracker.Tracker.get_uuid` +- `requests.post` / `requests.get` +- `time.time` +- `snowplow_tracker.emitters.Emitter.sync_flush` + +## Contributing to test/CLAUDE.md + +When adding or modifying tests: + +1. **Maintain test isolation** - Each test should be independent +2. **Mock external dependencies** - No real network calls in unit tests +3. **Use descriptive test names** - Clear what is being tested +4. **Test both success and failure paths** - Include edge cases +5. **Keep tests fast** - Mock time-consuming operations +6. **Document complex test scenarios** - Add comments for clarity \ No newline at end of file