From 1ea728e88eb93a5fef78008a47689d4d40a8e4f4 Mon Sep 17 00:00:00 2001 From: Jack-Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Mon, 31 Oct 2022 16:01:16 +0000 Subject: [PATCH 01/18] Make HTTPS the default protocol in emitter (close #14) PR #288 * Set https as default protocol * Add unit tests --- snowplow_tracker/emitters.py | 13 ++++++++---- snowplow_tracker/test/unit/test_emitters.py | 22 ++++++++++++++------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 2deb0345..ea028862 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -51,7 +51,7 @@ class Emitter(object): def __init__( self, endpoint: str, - protocol: HttpProtocol = "http", + protocol: HttpProtocol = "https", port: Optional[int] = None, method: Method = "get", buffer_size: Optional[int] = None, @@ -60,9 +60,9 @@ def __init__( byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None) -> None: """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to http. + :param protocol: The protocol to use - http or https. Defaults to https. :type protocol: protocol :param port: The collector port to connect to :type port: int | None @@ -116,7 +116,7 @@ def __init__( @staticmethod def as_collector_uri( endpoint: str, - protocol: HttpProtocol = "http", + protocol: HttpProtocol = "https", port: Optional[int] = None, method: Method = "get") -> str: """ @@ -133,6 +133,11 @@ def as_collector_uri( if len(endpoint) < 1: raise ValueError("No endpoint provided.") + if bool(PROTOCOLS & set(endpoint.split("://"))): + endpoint_arr = endpoint.split("://") + protocol = endpoint_arr[0] + endpoint = endpoint_arr[1] + if method == "get": path = "/i" else: diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 0167525a..00ff68c1 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -54,7 +54,7 @@ def setUp(self) -> None: def test_init(self) -> None: e = Emitter('0.0.0.0') - self.assertEqual(e.endpoint, 'http://0.0.0.0/i') + self.assertEqual(e.endpoint, 'https://0.0.0.0/i') self.assertEqual(e.method, 'get') self.assertEqual(e.buffer_size, 1) self.assertEqual(e.buffer, []) @@ -83,24 +83,32 @@ def test_init_requests_timeout(self) -> None: def test_as_collector_uri(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0') - self.assertEqual(uri, 'http://0.0.0.0/i') + self.assertEqual(uri, 'https://0.0.0.0/i') def test_as_collector_uri_post(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0', method="post") - self.assertEqual(uri, 'http://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + self.assertEqual(uri, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') def test_as_collector_uri_port(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0', port=9090, method="post") - self.assertEqual(uri, 'http://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') + self.assertEqual(uri, 'https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') - def test_as_collector_uri_https(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', protocol="https") - self.assertEqual(uri, 'https://0.0.0.0/i') + def test_as_collector_uri_http(self) -> None: + uri = Emitter.as_collector_uri('0.0.0.0', protocol="http") + self.assertEqual(uri, 'http://0.0.0.0/i') def test_as_collector_uri_empty_string(self) -> None: with self.assertRaises(ValueError): Emitter.as_collector_uri('') + def test_as_collector_uri_endpoint_protocol(self) -> None: + uri = Emitter.as_collector_uri("https://0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/i") + + def test_as_collector_uri_endpoint_protocol_http(self) -> None: + uri = Emitter.as_collector_uri("http://0.0.0.0") + self.assertEqual(uri, "http://0.0.0.0/i") + @mock.patch('snowplow_tracker.Emitter.flush') def test_input_no_flush(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush From 18f7a11ae2824ecc1714e8b22f6c89b4a7f2ad7c Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Tue, 1 Nov 2022 12:17:59 +0000 Subject: [PATCH 02/18] Fix invalid escape sequence --- snowplow_tracker/payload.py | 4 ++-- snowplow_tracker/tracker.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index 77fa6759..bb47a1d6 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -54,7 +54,7 @@ def add_dict(self, dict_: PayloadDict, base64: bool = False) -> None: Add a dict of name value pairs to the Payload object :param dict_: Dictionary to be added to the Payload - :type dict_: dict(string:\*) + :type dict_: dict(string:\\*) """ for f in dict_: self.add(f, dict_[f]) @@ -70,7 +70,7 @@ def add_json( Add an encoded or unencoded JSON to the payload :param dict_: Custom context for the event - :type dict_: dict(string:\*) | None + :type dict_: dict(string:\\*) | None :param encode_base64: If the payload is base64 encoded :type encode_base64: bool :param type_when_encoded: Name of the field when encode_base64 is set diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index f693e41a..16b89d8d 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -263,7 +263,7 @@ def track_link_click( :param element_id: ID attribute of the HTML element :type element_id: string_or_none :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\*) | None + :type element_classes: list(str) | tuple(str,\\*) | None :param element_target: ID attribute of the HTML element :type element_target: string_or_none :param element_content: The content of the HTML element @@ -416,7 +416,7 @@ def track_form_change( :param type_: Type of data the element represents :type type_: non_empty_string, form_type :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\*) | None + :type element_classes: list(str) | tuple(str,\\*) | None :param context: Custom context for the event :type context: context_array | None :param tstamp: Optional event timestamp in milliseconds @@ -456,7 +456,7 @@ def track_form_submit( :param form_id: ID attribute of the HTML form :type form_id: non_empty_string :param form_classes: Classes of the HTML form - :type form_classes: list(str) | tuple(str,\*) | None + :type form_classes: list(str) | tuple(str,\\*) | None :param elements: Classes of the HTML form :type elements: list(form_element) | None :param context: Custom context for the event @@ -611,7 +611,7 @@ def track_ecommerce_transaction( :param currency: The currency the price is expressed in :type currency: string_or_none :param items: The items in the transaction - :type items: list(dict(str:\*)) | None + :type items: list(dict(str:\\*)) | None :param context: Custom context for the event :type context: context_array | None :param tstamp: Optional event timestamp in milliseconds From 5682e149bd31dfaa9daf598c352dc7894c49c885 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Tue, 1 Nov 2022 12:18:39 +0000 Subject: [PATCH 03/18] Set default method to post --- snowplow_tracker/emitters.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index ea028862..00d09ebe 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -DEFAULT_MAX_LENGTH = 10 +DEFAULT_MAX_LENGTH = 1 PAYLOAD_DATA_SCHEMA = "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" PROTOCOLS = {"http", "https"} METHODS = {"get", "post"} @@ -53,7 +53,7 @@ def __init__( endpoint: str, protocol: HttpProtocol = "https", port: Optional[int] = None, - method: Method = "get", + method: Method = "post", buffer_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, @@ -66,9 +66,9 @@ def __init__( :type protocol: protocol :param port: The collector port to connect to :type port: int | None - :param method: The HTTP request method + :param method: The HTTP request method. Defaults to post. :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 1. :type buffer_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. @@ -94,10 +94,8 @@ def __init__( self.method = method if buffer_size is None: - if method == "post": - buffer_size = DEFAULT_MAX_LENGTH - else: - buffer_size = 1 + buffer_size = DEFAULT_MAX_LENGTH + self.buffer_size = buffer_size self.buffer = [] self.byte_limit = byte_limit @@ -118,7 +116,7 @@ def as_collector_uri( endpoint: str, protocol: HttpProtocol = "https", port: Optional[int] = None, - method: Method = "get") -> str: + method: Method = "post") -> str: """ :param endpoint: The raw endpoint provided by the user :type endpoint: string @@ -153,7 +151,7 @@ def input(self, payload: PayloadDict) -> None: If the maximum size has been reached, flushes the buffer. :param payload: The name-value pairs for the event - :type payload: dict(string:\*) + :type payload: dict(string:\\*) """ with self.lock: if self.bytes_queued is not None: @@ -212,7 +210,7 @@ def http_post(self, data: str) -> bool: def http_get(self, payload: PayloadDict) -> bool: """ :param payload: The event properties - :type payload: dict(string:\*) + :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) @@ -247,7 +245,7 @@ def is_good_status_code(status_code: int) -> bool: def send_events(self, evts: PayloadDictList) -> None: """ :param evts: Array of events to be sent - :type evts: list(dict(string:\*)) + :type evts: list(dict(string:\\*)) """ if len(evts) > 0: logger.info("Attempting to send %s events" % len(evts)) @@ -312,7 +310,7 @@ def attach_sent_timestamp(events: PayloadDictList) -> None: as `stm` param :param events: Array of events to be sent - :type events: list(dict(string:\*)) + :type events: list(dict(string:\\*)) :rtype: None """ def update(e: PayloadDict) -> None: @@ -332,7 +330,7 @@ def __init__( endpoint: str, protocol: HttpProtocol = "http", port: Optional[int] = None, - method: Method = "get", + method: Method = "post", buffer_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, From 96d35c2dae02e40c97caf2a56074b91c455bd23c Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Tue, 1 Nov 2022 12:21:58 +0000 Subject: [PATCH 04/18] Update integration tests --- .../test/integration/test_integration.py | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 2346243b..7d73378c 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -38,7 +38,7 @@ default_emitter = emitters.Emitter("localhost", protocol="http", port=80) -post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=1) +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get', buffer_size=1) default_subject = subject.Subject() @@ -79,7 +79,7 @@ def fail_response_content(url: str, request: Any) -> Dict[str, Any]: class IntegrationTest(unittest.TestCase): def test_integration_page_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view("http://savethearctic.org", "Save The Arctic", "http://referrer.com") expected_fields = {"e": "pv", "page": "Save+The+Arctic", "url": "http%3A%2F%2Fsavethearctic.org", "refr": "http%3A%2F%2Freferrer.com"} @@ -87,7 +87,7 @@ def test_integration_page_view(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_ecommerce_transaction_item(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction_item("12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP") expected_fields = {"ti_ca": "tarot", "ti_id": "12345", "ti_qu": "2", "ti_sk": "pbz0025", "e": "ti", "ti_nm": "black-tarot", "ti_pr": "7.99", "ti_cu": "GBP"} @@ -95,7 +95,7 @@ def test_integration_ecommerce_transaction_item(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_ecommerce_transaction(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction( "6a8078be", 35, city="London", currency="GBP", @@ -126,7 +126,7 @@ def test_integration_ecommerce_transaction(self) -> None: self.assertEqual(from_querystring("ttm", querystrings[-3]), from_querystring("ttm", querystrings[-2])) def test_integration_screen_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_screen_view("Game HUD 2", id_="534") expected_fields = {"e": "ue"} @@ -146,7 +146,7 @@ def test_integration_screen_view(self) -> None: }) def test_integration_struct_event(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_struct_event("Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99) expected_fields = {"se_ca": "Ecomm", "se_pr": "hd", "se_la": "dog-skateboarding-video", "se_va": "13.99", "se_ac": "add-to-basket", "e": "se"} @@ -154,7 +154,7 @@ def test_integration_struct_event(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_unstruct_event_non_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} @@ -168,7 +168,7 @@ def test_integration_unstruct_event_non_base64(self) -> None: }) def test_integration_unstruct_event_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} @@ -182,7 +182,7 @@ def test_integration_unstruct_event_base64(self) -> None: }) def test_integration_context_non_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) envelope_string = from_querystring("co", querystrings[-1]) @@ -193,7 +193,7 @@ def test_integration_context_non_base64(self) -> None: }) def test_integration_context_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) envelope_string = unquote_plus(from_querystring("cx", querystrings[-1])) @@ -212,7 +212,7 @@ def test_integration_standard_nv_pairs(self) -> None: s.set_timezone("Europe London") s.set_lang("en") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = {"tna": "cf", "res": "100x200", @@ -232,7 +232,7 @@ def test_integration_identification_methods(self) -> None: s.set_useragent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)") s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = { @@ -251,7 +251,7 @@ def test_integration_event_subject(self) -> None: s.set_domain_user_id("4616bfb38f872d16") s.set_lang("ES") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") evSubject = subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", event_subject=evSubject) @@ -293,6 +293,7 @@ def test_integration_success_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", + method='get', on_success=lambda x: callback_success_queue.append(x), on_failure=lambda x, y: callback_failure_queue.append(x)) t = tracker.Tracker([callback_emitter], default_subject) @@ -321,7 +322,7 @@ def test_integration_failure_callback(self) -> None: self.assertEqual(callback_failure_queue[0], 0) def test_post_page_view(self) -> None: - t = tracker.Tracker([post_emitter], default_subject) + t = tracker.Tracker([default_emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "local host", None) expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} @@ -331,8 +332,8 @@ def test_post_page_view(self) -> None: self.assertEqual(request["data"][0][key], expected_fields[key]) def test_post_batched(self) -> None: - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=2) - t = tracker.Tracker(post_emitter, default_subject) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=2) + t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") t.track_struct_event("Test", "B") @@ -341,7 +342,7 @@ def test_post_batched(self) -> None: @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 def test_timestamps(self) -> None: - emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=3) + emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=3) t = tracker.Tracker([emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "stamp0", None, tstamp=None) @@ -361,18 +362,18 @@ def test_timestamps(self) -> None: self.assertEqual(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) def test_bytelimit(self) -> None: - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=5, byte_limit=420) - t = tracker.Tracker(post_emitter, default_subject) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=5, byte_limit=420) + t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") # 140 bytes t.track_struct_event("Test", "A") # 280 bytes t.track_struct_event("Test", "A") # 420 bytes. Send t.track_struct_event("Test", "AA") # 141 self.assertEqual(len(querystrings[-1]["data"]), 3) - self.assertEqual(post_emitter.bytes_queued, 136 + len(_version.__version__)) + self.assertEqual(default_emitter.bytes_queued, 136 + len(_version.__version__)) def test_unicode_get(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) unicode_a = u'\u0107' unicode_b = u'test.\u0107om' test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) @@ -396,7 +397,7 @@ def test_unicode_get(self) -> None: self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: - t = tracker.Tracker([post_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) unicode_a = u'\u0107' unicode_b = u'test.\u0107om' test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) From f0ac5394658e123528aca813d185ced8fe40d1fd Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Tue, 1 Nov 2022 12:22:20 +0000 Subject: [PATCH 05/18] Update unit tests --- snowplow_tracker/test/unit/test_emitters.py | 38 ++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 00ff68c1..94ae1f62 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -54,8 +54,8 @@ def setUp(self) -> None: def test_init(self) -> None: e = Emitter('0.0.0.0') - self.assertEqual(e.endpoint, 'https://0.0.0.0/i') - self.assertEqual(e.method, 'get') + self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + self.assertEqual(e.method, 'post') self.assertEqual(e.buffer_size, 1) self.assertEqual(e.buffer, []) self.assertIsNone(e.byte_limit) @@ -70,7 +70,7 @@ def test_init_buffer_size(self) -> None: self.assertEqual(e.buffer_size, 10) def test_init_post(self) -> None: - e = Emitter('0.0.0.0', method="post") + e = Emitter('0.0.0.0') self.assertEqual(e.buffer_size, DEFAULT_MAX_LENGTH) def test_init_byte_limit(self) -> None: @@ -83,19 +83,19 @@ def test_init_requests_timeout(self) -> None: def test_as_collector_uri(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0') - self.assertEqual(uri, 'https://0.0.0.0/i') - - def test_as_collector_uri_post(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', method="post") self.assertEqual(uri, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + def test_as_collector_uri_get(self) -> None: + uri = Emitter.as_collector_uri('0.0.0.0', method='get') + self.assertEqual(uri, 'https://0.0.0.0/i') + def test_as_collector_uri_port(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', port=9090, method="post") + uri = Emitter.as_collector_uri('0.0.0.0', port=9090) self.assertEqual(uri, 'https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') def test_as_collector_uri_http(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0', protocol="http") - self.assertEqual(uri, 'http://0.0.0.0/i') + self.assertEqual(uri, 'http://0.0.0.0/com.snowplowanalytics.snowplow/tp2') def test_as_collector_uri_empty_string(self) -> None: with self.assertRaises(ValueError): @@ -103,11 +103,11 @@ def test_as_collector_uri_empty_string(self) -> None: def test_as_collector_uri_endpoint_protocol(self) -> None: uri = Emitter.as_collector_uri("https://0.0.0.0") - self.assertEqual(uri, "https://0.0.0.0/i") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_endpoint_protocol_http(self) -> None: uri = Emitter.as_collector_uri("http://0.0.0.0") - self.assertEqual(uri, "http://0.0.0.0/i") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") @mock.patch('snowplow_tracker.Emitter.flush') def test_input_no_flush(self, mok_flush: Any) -> None: @@ -173,7 +173,7 @@ def test_input_bytes_queued(self, mok_flush: Any) -> None: def test_input_bytes_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="post") + e = Emitter('0.0.0.0') nvPairs = {"testString": "test", "testNum": 2.72} e.input(nvPairs) @@ -219,7 +219,7 @@ def test_attach_sent_tstamp(self) -> None: def test_flush_timer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="post", buffer_size=10) + e = Emitter('0.0.0.0', buffer_size=10) ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] for i in ev_list: e.input(i) @@ -261,7 +261,7 @@ def test_send_events_post_success(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -274,7 +274,7 @@ def test_send_events_post_failure(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -292,7 +292,7 @@ def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: @mock.patch('snowplow_tracker.emitters.requests.post') def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0') + e = Emitter('0.0.0.0', method='get') get_succeeded = e.http_get({"a": "b"}) self.assertFalse(get_succeeded) @@ -366,7 +366,7 @@ def test_async_send_events_post_success(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -379,7 +379,7 @@ def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -403,7 +403,7 @@ def test_input_unicode_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="post", buffer_size=2) + ae = AsyncEmitter('0.0.0.0', buffer_size=2) ae.input(payload) self.assertEqual(len(ae.buffer), 1) From b6859614f59484cf1186215bdecbd1fd839db107 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Tue, 1 Nov 2022 12:17:59 +0000 Subject: [PATCH 06/18] Fix invalid escape sequence --- snowplow_tracker/payload.py | 4 ++-- snowplow_tracker/tracker.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index 77fa6759..bb47a1d6 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -54,7 +54,7 @@ def add_dict(self, dict_: PayloadDict, base64: bool = False) -> None: Add a dict of name value pairs to the Payload object :param dict_: Dictionary to be added to the Payload - :type dict_: dict(string:\*) + :type dict_: dict(string:\\*) """ for f in dict_: self.add(f, dict_[f]) @@ -70,7 +70,7 @@ def add_json( Add an encoded or unencoded JSON to the payload :param dict_: Custom context for the event - :type dict_: dict(string:\*) | None + :type dict_: dict(string:\\*) | None :param encode_base64: If the payload is base64 encoded :type encode_base64: bool :param type_when_encoded: Name of the field when encode_base64 is set diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index f693e41a..16b89d8d 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -263,7 +263,7 @@ def track_link_click( :param element_id: ID attribute of the HTML element :type element_id: string_or_none :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\*) | None + :type element_classes: list(str) | tuple(str,\\*) | None :param element_target: ID attribute of the HTML element :type element_target: string_or_none :param element_content: The content of the HTML element @@ -416,7 +416,7 @@ def track_form_change( :param type_: Type of data the element represents :type type_: non_empty_string, form_type :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\*) | None + :type element_classes: list(str) | tuple(str,\\*) | None :param context: Custom context for the event :type context: context_array | None :param tstamp: Optional event timestamp in milliseconds @@ -456,7 +456,7 @@ def track_form_submit( :param form_id: ID attribute of the HTML form :type form_id: non_empty_string :param form_classes: Classes of the HTML form - :type form_classes: list(str) | tuple(str,\*) | None + :type form_classes: list(str) | tuple(str,\\*) | None :param elements: Classes of the HTML form :type elements: list(form_element) | None :param context: Custom context for the event @@ -611,7 +611,7 @@ def track_ecommerce_transaction( :param currency: The currency the price is expressed in :type currency: string_or_none :param items: The items in the transaction - :type items: list(dict(str:\*)) | None + :type items: list(dict(str:\\*)) | None :param context: Custom context for the event :type context: context_array | None :param tstamp: Optional event timestamp in milliseconds From 800b13a8260d6729713479a4ab7a12ee8eed5c70 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Tue, 1 Nov 2022 12:18:39 +0000 Subject: [PATCH 07/18] Set default method to post --- snowplow_tracker/emitters.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 0f331ef5..9e9867d8 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -DEFAULT_MAX_LENGTH = 10 +DEFAULT_MAX_LENGTH = 1 PAYLOAD_DATA_SCHEMA = "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" PROTOCOLS = {"http", "https"} METHODS = {"get", "post"} @@ -53,7 +53,7 @@ def __init__( endpoint: str, protocol: HttpProtocol = "https", port: Optional[int] = None, - method: Method = "get", + method: Method = "post", buffer_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, @@ -66,9 +66,9 @@ def __init__( :type protocol: protocol :param port: The collector port to connect to :type port: int | None - :param method: The HTTP request method + :param method: The HTTP request method. Defaults to post. :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 1. :type buffer_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. @@ -94,10 +94,8 @@ def __init__( self.method = method if buffer_size is None: - if method == "post": - buffer_size = DEFAULT_MAX_LENGTH - else: - buffer_size = 1 + buffer_size = DEFAULT_MAX_LENGTH + self.buffer_size = buffer_size self.buffer = [] self.byte_limit = byte_limit @@ -118,7 +116,7 @@ def as_collector_uri( endpoint: str, protocol: HttpProtocol = "https", port: Optional[int] = None, - method: Method = "get") -> str: + method: Method = "post") -> str: """ :param endpoint: The raw endpoint provided by the user :type endpoint: string @@ -153,7 +151,7 @@ def input(self, payload: PayloadDict) -> None: If the maximum size has been reached, flushes the buffer. :param payload: The name-value pairs for the event - :type payload: dict(string:\*) + :type payload: dict(string:\\*) """ with self.lock: if self.bytes_queued is not None: @@ -212,7 +210,7 @@ def http_post(self, data: str) -> bool: def http_get(self, payload: PayloadDict) -> bool: """ :param payload: The event properties - :type payload: dict(string:\*) + :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) @@ -247,7 +245,7 @@ def is_good_status_code(status_code: int) -> bool: def send_events(self, evts: PayloadDictList) -> None: """ :param evts: Array of events to be sent - :type evts: list(dict(string:\*)) + :type evts: list(dict(string:\\*)) """ if len(evts) > 0: logger.info("Attempting to send %s events" % len(evts)) @@ -312,7 +310,7 @@ def attach_sent_timestamp(events: PayloadDictList) -> None: as `stm` param :param events: Array of events to be sent - :type events: list(dict(string:\*)) + :type events: list(dict(string:\\*)) :rtype: None """ def update(e: PayloadDict) -> None: @@ -332,7 +330,7 @@ def __init__( endpoint: str, protocol: HttpProtocol = "http", port: Optional[int] = None, - method: Method = "get", + method: Method = "post", buffer_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, From db735b93ad4dfd470797b44bfc3851b7a8187593 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Tue, 1 Nov 2022 12:21:58 +0000 Subject: [PATCH 08/18] Update integration tests --- .../test/integration/test_integration.py | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 2346243b..7d73378c 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -38,7 +38,7 @@ default_emitter = emitters.Emitter("localhost", protocol="http", port=80) -post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=1) +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get', buffer_size=1) default_subject = subject.Subject() @@ -79,7 +79,7 @@ def fail_response_content(url: str, request: Any) -> Dict[str, Any]: class IntegrationTest(unittest.TestCase): def test_integration_page_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view("http://savethearctic.org", "Save The Arctic", "http://referrer.com") expected_fields = {"e": "pv", "page": "Save+The+Arctic", "url": "http%3A%2F%2Fsavethearctic.org", "refr": "http%3A%2F%2Freferrer.com"} @@ -87,7 +87,7 @@ def test_integration_page_view(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_ecommerce_transaction_item(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction_item("12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP") expected_fields = {"ti_ca": "tarot", "ti_id": "12345", "ti_qu": "2", "ti_sk": "pbz0025", "e": "ti", "ti_nm": "black-tarot", "ti_pr": "7.99", "ti_cu": "GBP"} @@ -95,7 +95,7 @@ def test_integration_ecommerce_transaction_item(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_ecommerce_transaction(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction( "6a8078be", 35, city="London", currency="GBP", @@ -126,7 +126,7 @@ def test_integration_ecommerce_transaction(self) -> None: self.assertEqual(from_querystring("ttm", querystrings[-3]), from_querystring("ttm", querystrings[-2])) def test_integration_screen_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_screen_view("Game HUD 2", id_="534") expected_fields = {"e": "ue"} @@ -146,7 +146,7 @@ def test_integration_screen_view(self) -> None: }) def test_integration_struct_event(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_struct_event("Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99) expected_fields = {"se_ca": "Ecomm", "se_pr": "hd", "se_la": "dog-skateboarding-video", "se_va": "13.99", "se_ac": "add-to-basket", "e": "se"} @@ -154,7 +154,7 @@ def test_integration_struct_event(self) -> None: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) def test_integration_unstruct_event_non_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} @@ -168,7 +168,7 @@ def test_integration_unstruct_event_non_base64(self) -> None: }) def test_integration_unstruct_event_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} @@ -182,7 +182,7 @@ def test_integration_unstruct_event_base64(self) -> None: }) def test_integration_context_non_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) envelope_string = from_querystring("co", querystrings[-1]) @@ -193,7 +193,7 @@ def test_integration_context_non_base64(self) -> None: }) def test_integration_context_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) envelope_string = unquote_plus(from_querystring("cx", querystrings[-1])) @@ -212,7 +212,7 @@ def test_integration_standard_nv_pairs(self) -> None: s.set_timezone("Europe London") s.set_lang("en") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = {"tna": "cf", "res": "100x200", @@ -232,7 +232,7 @@ def test_integration_identification_methods(self) -> None: s.set_useragent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)") s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = { @@ -251,7 +251,7 @@ def test_integration_event_subject(self) -> None: s.set_domain_user_id("4616bfb38f872d16") s.set_lang("ES") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") evSubject = subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", event_subject=evSubject) @@ -293,6 +293,7 @@ def test_integration_success_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", + method='get', on_success=lambda x: callback_success_queue.append(x), on_failure=lambda x, y: callback_failure_queue.append(x)) t = tracker.Tracker([callback_emitter], default_subject) @@ -321,7 +322,7 @@ def test_integration_failure_callback(self) -> None: self.assertEqual(callback_failure_queue[0], 0) def test_post_page_view(self) -> None: - t = tracker.Tracker([post_emitter], default_subject) + t = tracker.Tracker([default_emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "local host", None) expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} @@ -331,8 +332,8 @@ def test_post_page_view(self) -> None: self.assertEqual(request["data"][0][key], expected_fields[key]) def test_post_batched(self) -> None: - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=2) - t = tracker.Tracker(post_emitter, default_subject) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=2) + t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") t.track_struct_event("Test", "B") @@ -341,7 +342,7 @@ def test_post_batched(self) -> None: @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 def test_timestamps(self) -> None: - emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=3) + emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=3) t = tracker.Tracker([emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "stamp0", None, tstamp=None) @@ -361,18 +362,18 @@ def test_timestamps(self) -> None: self.assertEqual(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) def test_bytelimit(self) -> None: - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=5, byte_limit=420) - t = tracker.Tracker(post_emitter, default_subject) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=5, byte_limit=420) + t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") # 140 bytes t.track_struct_event("Test", "A") # 280 bytes t.track_struct_event("Test", "A") # 420 bytes. Send t.track_struct_event("Test", "AA") # 141 self.assertEqual(len(querystrings[-1]["data"]), 3) - self.assertEqual(post_emitter.bytes_queued, 136 + len(_version.__version__)) + self.assertEqual(default_emitter.bytes_queued, 136 + len(_version.__version__)) def test_unicode_get(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) unicode_a = u'\u0107' unicode_b = u'test.\u0107om' test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) @@ -396,7 +397,7 @@ def test_unicode_get(self) -> None: self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: - t = tracker.Tracker([post_emitter], default_subject, encode_base64=False) + t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) unicode_a = u'\u0107' unicode_b = u'test.\u0107om' test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) From 09ef976f41a4777452edc75301dbb47f7569ce55 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Tue, 1 Nov 2022 12:22:20 +0000 Subject: [PATCH 09/18] Update unit tests --- snowplow_tracker/test/unit/test_emitters.py | 38 ++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 00ff68c1..94ae1f62 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -54,8 +54,8 @@ def setUp(self) -> None: def test_init(self) -> None: e = Emitter('0.0.0.0') - self.assertEqual(e.endpoint, 'https://0.0.0.0/i') - self.assertEqual(e.method, 'get') + self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + self.assertEqual(e.method, 'post') self.assertEqual(e.buffer_size, 1) self.assertEqual(e.buffer, []) self.assertIsNone(e.byte_limit) @@ -70,7 +70,7 @@ def test_init_buffer_size(self) -> None: self.assertEqual(e.buffer_size, 10) def test_init_post(self) -> None: - e = Emitter('0.0.0.0', method="post") + e = Emitter('0.0.0.0') self.assertEqual(e.buffer_size, DEFAULT_MAX_LENGTH) def test_init_byte_limit(self) -> None: @@ -83,19 +83,19 @@ def test_init_requests_timeout(self) -> None: def test_as_collector_uri(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0') - self.assertEqual(uri, 'https://0.0.0.0/i') - - def test_as_collector_uri_post(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', method="post") self.assertEqual(uri, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + def test_as_collector_uri_get(self) -> None: + uri = Emitter.as_collector_uri('0.0.0.0', method='get') + self.assertEqual(uri, 'https://0.0.0.0/i') + def test_as_collector_uri_port(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', port=9090, method="post") + uri = Emitter.as_collector_uri('0.0.0.0', port=9090) self.assertEqual(uri, 'https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') def test_as_collector_uri_http(self) -> None: uri = Emitter.as_collector_uri('0.0.0.0', protocol="http") - self.assertEqual(uri, 'http://0.0.0.0/i') + self.assertEqual(uri, 'http://0.0.0.0/com.snowplowanalytics.snowplow/tp2') def test_as_collector_uri_empty_string(self) -> None: with self.assertRaises(ValueError): @@ -103,11 +103,11 @@ def test_as_collector_uri_empty_string(self) -> None: def test_as_collector_uri_endpoint_protocol(self) -> None: uri = Emitter.as_collector_uri("https://0.0.0.0") - self.assertEqual(uri, "https://0.0.0.0/i") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_endpoint_protocol_http(self) -> None: uri = Emitter.as_collector_uri("http://0.0.0.0") - self.assertEqual(uri, "http://0.0.0.0/i") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") @mock.patch('snowplow_tracker.Emitter.flush') def test_input_no_flush(self, mok_flush: Any) -> None: @@ -173,7 +173,7 @@ def test_input_bytes_queued(self, mok_flush: Any) -> None: def test_input_bytes_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="post") + e = Emitter('0.0.0.0') nvPairs = {"testString": "test", "testNum": 2.72} e.input(nvPairs) @@ -219,7 +219,7 @@ def test_attach_sent_tstamp(self) -> None: def test_flush_timer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="post", buffer_size=10) + e = Emitter('0.0.0.0', buffer_size=10) ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] for i in ev_list: e.input(i) @@ -261,7 +261,7 @@ def test_send_events_post_success(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -274,7 +274,7 @@ def test_send_events_post_failure(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -292,7 +292,7 @@ def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: @mock.patch('snowplow_tracker.emitters.requests.post') def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0') + e = Emitter('0.0.0.0', method='get') get_succeeded = e.http_get({"a": "b"}) self.assertFalse(get_succeeded) @@ -366,7 +366,7 @@ def test_async_send_events_post_success(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -379,7 +379,7 @@ def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -403,7 +403,7 @@ def test_input_unicode_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="post", buffer_size=2) + ae = AsyncEmitter('0.0.0.0', buffer_size=2) ae.input(payload) self.assertEqual(len(ae.buffer), 1) From f1b4e4cdda5e0fce64ec03d7b7e97baec240b500 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 11:12:33 +0000 Subject: [PATCH 10/18] Change default method to post in Celery emitter --- snowplow_tracker/celery/celery_emitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py index e7a8efae..d9aafaa7 100644 --- a/snowplow_tracker/celery/celery_emitter.py +++ b/snowplow_tracker/celery/celery_emitter.py @@ -52,7 +52,7 @@ def __init__( endpoint: str, protocol: HttpProtocol = "http", port: Optional[int] = None, - method: Method = "get", + method: Method = "post", buffer_size: Optional[int] = None, byte_limit: Optional[int] = None) -> None: super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, None, None, byte_limit) From 3369947b46d9b9d983dcd75c525afac3be346f03 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 12:43:33 +0000 Subject: [PATCH 11/18] Update doc strings --- snowplow_tracker/emitters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 9e9867d8..3d6f2fc1 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -345,7 +345,7 @@ def __init__( :type port: int | None :param method: The HTTP request method :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 1. :type buffer_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. From 8ef2aca95ed42eaadf5e421d9e1001760aed3d10 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 14:14:05 +0000 Subject: [PATCH 12/18] Update default buffer size --- snowplow_tracker/emitters.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 3d6f2fc1..96d69604 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -DEFAULT_MAX_LENGTH = 1 +DEFAULT_MAX_LENGTH = 10 PAYLOAD_DATA_SCHEMA = "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" PROTOCOLS = {"http", "https"} METHODS = {"get", "post"} @@ -68,7 +68,7 @@ def __init__( :type port: int | None :param method: The HTTP request method. Defaults to post. :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 1. + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type buffer_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. @@ -94,7 +94,10 @@ def __init__( self.method = method if buffer_size is None: - buffer_size = DEFAULT_MAX_LENGTH + if method == 'post': + buffer_size = DEFAULT_MAX_LENGTH + else: + buffer_size = 1 self.buffer_size = buffer_size self.buffer = [] @@ -345,7 +348,7 @@ def __init__( :type port: int | None :param method: The HTTP request method :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 1. + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type buffer_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. From d852e363939f84508c29703d033f45db837dd727 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 15:21:09 +0000 Subject: [PATCH 13/18] Update integration tests --- snowplow_tracker/test/integration/test_integration.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 7d73378c..c487bb70 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -36,9 +36,9 @@ querystrings = [""] -default_emitter = emitters.Emitter("localhost", protocol="http", port=80) +default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=1) -get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get', buffer_size=1) +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get') default_subject = subject.Subject() @@ -313,6 +313,7 @@ def test_integration_failure_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", + method='get', on_success=lambda x: callback_success_queue.append(x), on_failure=lambda x, y: callback_failure_queue.append(x)) t = tracker.Tracker([callback_emitter], default_subject) From bc4ce1fef350b9c41e726d56495810f3cd390b56 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 15:21:24 +0000 Subject: [PATCH 14/18] Update unit tests --- snowplow_tracker/test/unit/test_emitters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 94ae1f62..51b107d0 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -56,7 +56,7 @@ def test_init(self) -> None: e = Emitter('0.0.0.0') self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') self.assertEqual(e.method, 'post') - self.assertEqual(e.buffer_size, 1) + self.assertEqual(e.buffer_size, 10) self.assertEqual(e.buffer, []) self.assertIsNone(e.byte_limit) self.assertIsNone(e.bytes_queued) From 36b1a170cfc510b539520ffc09b3607408722663 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 15:43:06 +0000 Subject: [PATCH 15/18] Fix formatting --- snowplow_tracker/emitters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 96d69604..d2549d25 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -94,11 +94,10 @@ def __init__( self.method = method if buffer_size is None: - if method == 'post': + if method == "post": buffer_size = DEFAULT_MAX_LENGTH else: buffer_size = 1 - self.buffer_size = buffer_size self.buffer = [] self.byte_limit = byte_limit From 4b6b95609f68db39c9769fb444af04e67c954d9b Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 16:42:16 +0000 Subject: [PATCH 16/18] Update exmaple app --- examples/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/app.py b/examples/app.py index 829055d0..973f5a99 100644 --- a/examples/app.py +++ b/examples/app.py @@ -19,7 +19,7 @@ def main(): t = Tracker(e, s) - print("Sending events to " + collector_url) + print("Sending events to " + e.endpoint) t.track_page_view("https://www.snowplow.io", "Homepage") t.track_page_ping("https://www.snowplow.io", "Homepage") @@ -32,6 +32,7 @@ def main(): ) ) t.track_struct_event("shop", "add-to-basket", None, "pcs", 2) + t.flush() if __name__ == "__main__": From f07c5e7dcb7bbd8f4e88fef19b1194d2d2e03d72 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 16:44:02 +0000 Subject: [PATCH 17/18] Update example app --- examples/app.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/app.py b/examples/app.py index 973f5a99..829055d0 100644 --- a/examples/app.py +++ b/examples/app.py @@ -19,7 +19,7 @@ def main(): t = Tracker(e, s) - print("Sending events to " + e.endpoint) + print("Sending events to " + collector_url) t.track_page_view("https://www.snowplow.io", "Homepage") t.track_page_ping("https://www.snowplow.io", "Homepage") @@ -32,7 +32,6 @@ def main(): ) ) t.track_struct_event("shop", "add-to-basket", None, "pcs", 2) - t.flush() if __name__ == "__main__": From d695e6a826f4c9c650f0ed8fcb4c967f37e5cf16 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Wed, 2 Nov 2022 16:48:27 +0000 Subject: [PATCH 18/18] Update example app --- examples/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/app.py b/examples/app.py index 829055d0..973f5a99 100644 --- a/examples/app.py +++ b/examples/app.py @@ -19,7 +19,7 @@ def main(): t = Tracker(e, s) - print("Sending events to " + collector_url) + print("Sending events to " + e.endpoint) t.track_page_view("https://www.snowplow.io", "Homepage") t.track_page_ping("https://www.snowplow.io", "Homepage") @@ -32,6 +32,7 @@ def main(): ) ) t.track_struct_event("shop", "add-to-basket", None, "pcs", 2) + t.flush() if __name__ == "__main__":