From 6d8d588c6e2a270fefd290541331f866a9f8b4cd Mon Sep 17 00:00:00 2001 From: Greg Leonard <45019882+greg-el@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:39:33 +0100 Subject: [PATCH 01/15] Fix `PagePing`, `PageView`, and `StructuredEvent` property getters (close #361) --- snowplow_tracker/events/page_ping.py | 14 +++---- snowplow_tracker/events/page_view.py | 6 +-- snowplow_tracker/events/structured_event.py | 10 ++--- snowplow_tracker/test/unit/test_page_ping.py | 38 +++++++++++++++++++ snowplow_tracker/test/unit/test_page_view.py | 27 +++++++++++++ .../test/unit/test_structured_event.py | 24 ++++++++++++ 6 files changed, 104 insertions(+), 15 deletions(-) create mode 100644 snowplow_tracker/test/unit/test_page_ping.py create mode 100644 snowplow_tracker/test/unit/test_page_view.py create mode 100644 snowplow_tracker/test/unit/test_structured_event.py diff --git a/snowplow_tracker/events/page_ping.py b/snowplow_tracker/events/page_ping.py index b0084797..43bbb210 100644 --- a/snowplow_tracker/events/page_ping.py +++ b/snowplow_tracker/events/page_ping.py @@ -81,7 +81,7 @@ def page_url(self) -> str: """ URL of the viewed page """ - return self.payload.get("url") + return self.payload.nv_pairs["url"] @page_url.setter def page_url(self, value: str): @@ -93,7 +93,7 @@ def page_title(self) -> Optional[str]: """ URL of the viewed page """ - return self.payload.get("page") + return self.payload.nv_pairs.get("page") @page_title.setter def page_title(self, value: Optional[str]): @@ -104,7 +104,7 @@ def referrer(self) -> Optional[str]: """ The referrer of the page """ - return self.payload.get("refr") + return self.payload.nv_pairs.get("refr") @referrer.setter def referrer(self, value: Optional[str]): @@ -115,7 +115,7 @@ def min_x(self) -> Optional[int]: """ Minimum page x offset seen in the last ping period """ - return self.payload.get("pp_mix") + return self.payload.nv_pairs.get("pp_mix") @min_x.setter def min_x(self, value: Optional[int]): @@ -126,7 +126,7 @@ def max_x(self) -> Optional[int]: """ Maximum page x offset seen in the last ping period """ - return self.payload.get("pp_max") + return self.payload.nv_pairs.get("pp_max") @max_x.setter def max_x(self, value: Optional[int]): @@ -137,7 +137,7 @@ def min_y(self) -> Optional[int]: """ Minimum page y offset seen in the last ping period """ - return self.payload.get("pp_miy") + return self.payload.nv_pairs.get("pp_miy") @min_y.setter def min_y(self, value: Optional[int]): @@ -148,7 +148,7 @@ def max_y(self) -> Optional[int]: """ Maximum page y offset seen in the last ping period """ - return self.payload.get("pp_may") + return self.payload.nv_pairs.get("pp_may") @max_y.setter def max_y(self, value: Optional[int]): diff --git a/snowplow_tracker/events/page_view.py b/snowplow_tracker/events/page_view.py index dc9ca51f..53e44bb6 100644 --- a/snowplow_tracker/events/page_view.py +++ b/snowplow_tracker/events/page_view.py @@ -65,7 +65,7 @@ def page_url(self) -> str: """ URL of the viewed page """ - return self.payload.get("url") + return self.payload.nv_pairs["url"] @page_url.setter def page_url(self, value: str): @@ -77,7 +77,7 @@ def page_title(self) -> Optional[str]: """ Title of the viewed page """ - return self.payload.get("page") + return self.payload.nv_pairs.get("page") @page_title.setter def page_title(self, value: Optional[str]): @@ -88,7 +88,7 @@ def referrer(self) -> Optional[str]: """ The referrer of the page """ - return self.payload.get("refr") + return self.payload.nv_pairs.get("refr") @referrer.setter def referrer(self, value: Optional[str]): diff --git a/snowplow_tracker/events/structured_event.py b/snowplow_tracker/events/structured_event.py index a92acc0f..00658e9f 100644 --- a/snowplow_tracker/events/structured_event.py +++ b/snowplow_tracker/events/structured_event.py @@ -81,7 +81,7 @@ def category(self) -> Optional[str]: """ Category of the event """ - return self.payload.get("se_ca") + return self.payload.nv_pairs.get("se_ca") @category.setter def category(self, value: Optional[str]): @@ -93,7 +93,7 @@ def action(self) -> Optional[str]: """ The event itself """ - return self.payload.get("se_ac") + return self.payload.nv_pairs.get("se_ac") @action.setter def action(self, value: Optional[str]): @@ -105,7 +105,7 @@ def label(self) -> Optional[str]: """ Refer to the object the action is performed on """ - return self.payload.get("se_la") + return self.payload.nv_pairs.get("se_la") @label.setter def label(self, value: Optional[str]): @@ -116,7 +116,7 @@ def property_(self) -> Optional[str]: """ Property associated with either the action or the object """ - return self.payload.get("se_pr") + return self.payload.nv_pairs.get("se_pr") @property_.setter def property_(self, value: Optional[str]): @@ -127,7 +127,7 @@ def value(self) -> Optional[int]: """ A value associated with the user action """ - return self.payload.get("se_va") + return self.payload.nv_pairs.get("se_va") @value.setter def value(self, value: Optional[int]): diff --git a/snowplow_tracker/test/unit/test_page_ping.py b/snowplow_tracker/test/unit/test_page_ping.py new file mode 100644 index 00000000..7539ce43 --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_ping.py @@ -0,0 +1,38 @@ +import pytest + +from snowplow_tracker.events.page_ping import PagePing + + +class TestPagePing: + def test_getters(self): + pp = PagePing("url", "title", "referrer", 1, 2, 3, 4) + assert pp.page_url == "url" + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + + def test_setters(self): + pp = PagePing("url") + pp.page_title = "title" + pp.referrer = "referrer" + pp.min_x = 1 + pp.max_x = 2 + pp.min_y = 3 + pp.max_y = 4 + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + assert pp.page_url == "url" + + def test_page_url_non_empty_string(self): + pp = PagePing("url") + pp.page_url = "new_url" + assert pp.page_url == "new_url" + with pytest.raises(ValueError): + pp.page_url = "" diff --git a/snowplow_tracker/test/unit/test_page_view.py b/snowplow_tracker/test/unit/test_page_view.py new file mode 100644 index 00000000..3736710c --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_view.py @@ -0,0 +1,27 @@ +import pytest + +from snowplow_tracker.events.page_view import PageView + + +class TestPageView: + def test_getters(self): + pv = PageView("url", "title", "referrer") + assert pv.page_url == "url" + assert pv.page_title == "title" + assert pv.referrer == "referrer" + + def test_setters(self): + pv = PageView("url", "title", "referrer") + pv.page_url = "new_url" + pv.page_title = "new_title" + pv.referrer = "new_referrer" + assert pv.page_url == "new_url" + assert pv.page_title == "new_title" + assert pv.referrer == "new_referrer" + + def test_page_url_non_empty_string(self): + pv = PageView("url") + pv.page_url = "new_url" + assert pv.page_url == "new_url" + with pytest.raises(ValueError): + pv.page_url = "" diff --git a/snowplow_tracker/test/unit/test_structured_event.py b/snowplow_tracker/test/unit/test_structured_event.py new file mode 100644 index 00000000..fdf00014 --- /dev/null +++ b/snowplow_tracker/test/unit/test_structured_event.py @@ -0,0 +1,24 @@ +from snowplow_tracker.events.structured_event import StructuredEvent + + +class TestStructuredEvent: + def test_getters(self): + se = StructuredEvent("category", "action", "label", "property", 1) + assert se.category == "category" + assert se.action == "action" + assert se.label == "label" + assert se.property_ == "property" + assert se.value == 1 + + def test_setters(self): + se = StructuredEvent("category", "action") + se.category = "new_category" + se.action = "new_action" + se.label = "new_label" + se.property_ = "new_property" + se.value = 2 + assert se.category == "new_category" + assert se.action == "new_action" + assert se.label == "new_label" + assert se.property_ == "new_property" + assert se.value == 2 From f58d845969ec9daf368f7b3dab1732fa764c0818 Mon Sep 17 00:00:00 2001 From: Greg Leonard <45019882+greg-el@users.noreply.github.com> Date: Wed, 21 Aug 2024 12:52:33 +0100 Subject: [PATCH 02/15] Update typing --- .github/workflows/ci.yml | 5 +++ setup.py | 6 +++- snowplow_tracker/constants.py | 2 +- snowplow_tracker/contracts.py | 2 +- snowplow_tracker/emitters.py | 35 ++++++++++++++++----- snowplow_tracker/event_store.py | 11 ++++--- snowplow_tracker/events/event.py | 5 ++- snowplow_tracker/events/screen_view.py | 4 +-- snowplow_tracker/events/structured_event.py | 12 +++---- snowplow_tracker/payload.py | 5 ++- snowplow_tracker/snowplow.py | 6 ++-- snowplow_tracker/subject.py | 4 +-- snowplow_tracker/tracker.py | 31 ++++++++++++------ snowplow_tracker/tracker_configuration.py | 8 ++--- snowplow_tracker/typing.py | 9 ++++-- 15 files changed, 94 insertions(+), 51 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85929119..a285a2d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,6 +40,11 @@ jobs: - name: Tests run: | pytest --cov=snowplow_tracker --cov-report=xml + + - name: MyPy + run: | + python -m pip install mypy + mypy snowplow_tracker --exclude '/test' - name: Demo run: | diff --git a/setup.py b/setup.py index d0ef7f04..e1b2aa33 100644 --- a/setup.py +++ b/setup.py @@ -65,5 +65,9 @@ "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", ], - install_requires=["requests>=2.25.1,<3.0", "typing_extensions>=3.7.4"], + install_requires=[ + "requests>=2.25.1,<3.0", + "types-requests>=2.25.1,<3.0", + "typing_extensions>=3.7.4", + ], ) diff --git a/snowplow_tracker/constants.py b/snowplow_tracker/constants.py index 579ff86e..53ecc151 100644 --- a/snowplow_tracker/constants.py +++ b/snowplow_tracker/constants.py @@ -18,7 +18,7 @@ from snowplow_tracker import _version, SelfDescribingJson VERSION = "py-%s" % _version.__version__ -DEFAULT_ENCODE_BASE64 = True +DEFAULT_ENCODE_BASE64: bool = True # Type hint required for Python 3.6 MyPy check BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" SCHEMA_TAG = "jsonschema" diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py index c54ac668..3b17e1a3 100644 --- a/snowplow_tracker/contracts.py +++ b/snowplow_tracker/contracts.py @@ -77,7 +77,7 @@ def _get_parameter_name() -> str: match = _MATCH_FIRST_PARAMETER_REGEX.search(code) if not match: return "Unnamed parameter" - return match.groups(0)[0] + return str(match.groups(0)[0]) def _check_form_element(element: Dict[str, Any]) -> bool: diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index af233566..6a138f0f 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -20,7 +20,7 @@ import threading import requests import random -from typing import Optional, Union, Tuple, Dict +from typing import Optional, Union, Tuple, Dict, cast, Callable from queue import Queue from snowplow_tracker.self_describing_json import SelfDescribingJson @@ -31,6 +31,7 @@ Method, SuccessCallback, FailureCallback, + EmitterProtocol, ) from snowplow_tracker.contracts import one_of from snowplow_tracker.event_store import EventStore, InMemoryEventStore @@ -48,7 +49,20 @@ METHODS = {"get", "post"} -class Emitter(object): +# Unifes the two request methods under one interface +class Requester: + post: Callable + get: Callable + + def __init__(self, post: Callable, get: Callable): + # 3.6 MyPy compatibility: + # error: Cannot assign to a method + # https://github.com/python/mypy/issues/2427 + setattr(self, "post", post) + setattr(self, "get", get) + + +class Emitter(EmitterProtocol): """ Synchronously send Snowplow events to a Snowplow collector Supports both GET and POST requests @@ -151,12 +165,15 @@ def __init__( self.retry_timer = FlushTimer(emitter=self, repeating=False) self.max_retry_delay_seconds = max_retry_delay_seconds - self.retry_delay = 0 + self.retry_delay: Union[int, float] = 0 self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) - self.request_method = requests if session is None else session + if session is None: + self.request_method = Requester(post=requests.post, get=requests.get) + else: + self.request_method = Requester(post=session.post, get=session.get) @staticmethod def as_collector_uri( @@ -183,7 +200,7 @@ def as_collector_uri( if endpoint.split("://")[0] in PROTOCOLS: endpoint_arr = endpoint.split("://") - protocol = endpoint_arr[0] + protocol = cast(HttpProtocol, endpoint_arr[0]) endpoint = endpoint_arr[1] if method == "get": @@ -427,6 +444,10 @@ def _cancel_retry_timer(self) -> None: """ self.retry_timer.cancel() + # This is only here to satisfy the `EmitterProtocol` interface + def async_flush(self) -> None: + return + class AsyncEmitter(Emitter): """ @@ -446,7 +467,7 @@ def __init__( byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, max_retry_delay_seconds: int = 60, - buffer_capacity: int = None, + buffer_capacity: Optional[int] = None, custom_retry_codes: Dict[int, bool] = {}, event_store: Optional[EventStore] = None, session: Optional[requests.Session] = None, @@ -501,7 +522,7 @@ def __init__( event_store=event_store, session=session, ) - self.queue = Queue() + self.queue: Queue = Queue() for i in range(thread_count): t = threading.Thread(target=self.consume) t.daemon = True diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py index 898f92ff..b8d13028 100644 --- a/snowplow_tracker/event_store.py +++ b/snowplow_tracker/event_store.py @@ -15,6 +15,7 @@ # language governing permissions and limitations there under. # """ +from typing import List from typing_extensions import Protocol from snowplow_tracker.typing import PayloadDict, PayloadDictList from logging import Logger @@ -25,7 +26,7 @@ class EventStore(Protocol): EventStore protocol. For buffering events in the Emitter. """ - def add_event(payload: PayloadDict) -> bool: + def add_event(self, payload: PayloadDict) -> bool: """ Add PayloadDict to buffer. Returns True if successful. @@ -35,7 +36,7 @@ def add_event(payload: PayloadDict) -> bool: """ ... - def get_events_batch() -> PayloadDictList: + def get_events_batch(self) -> PayloadDictList: """ Get a list of all the PayloadDicts in the buffer. @@ -43,7 +44,7 @@ def get_events_batch() -> PayloadDictList: """ ... - def cleanup(batch: PayloadDictList, need_retry: bool) -> None: + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: """ Removes sent events from the event store. If events need to be retried they are re-added to the buffer. @@ -54,7 +55,7 @@ def cleanup(batch: PayloadDictList, need_retry: bool) -> None: """ ... - def size() -> int: + def size(self) -> int: """ Returns the number of events in the buffer @@ -76,7 +77,7 @@ def __init__(self, logger: Logger, buffer_capacity: int = 10000) -> None: When the buffer is full new events are lost. :type buffer_capacity int """ - self.event_buffer = [] + self.event_buffer: List[PayloadDict] = [] self.buffer_capacity = buffer_capacity self.logger = logger diff --git a/snowplow_tracker/events/event.py b/snowplow_tracker/events/event.py index c9d9b82e..fb300b87 100644 --- a/snowplow_tracker/events/event.py +++ b/snowplow_tracker/events/event.py @@ -97,10 +97,9 @@ def build_payload( if self.event_subject is not None: fin_payload_dict = self.event_subject.combine_subject(subject) else: - fin_payload_dict = None if subject is None else subject.standard_nv_pairs + fin_payload_dict = {} if subject is None else subject.standard_nv_pairs - if fin_payload_dict is not None: - self.payload.add_dict(fin_payload_dict) + self.payload.add_dict(fin_payload_dict) return self.payload @property diff --git a/snowplow_tracker/events/screen_view.py b/snowplow_tracker/events/screen_view.py index d0cea5d0..6b4af927 100644 --- a/snowplow_tracker/events/screen_view.py +++ b/snowplow_tracker/events/screen_view.py @@ -15,7 +15,7 @@ # language governing permissions and limitations there under. # """ -from typing import Optional, List +from typing import Dict, Optional, List from snowplow_tracker.typing import JsonEncoderFunction from snowplow_tracker.events.event import Event from snowplow_tracker.events.self_describing import SelfDescribing @@ -76,7 +76,7 @@ def __init__( super(ScreenView, self).__init__( event_subject=event_subject, context=context, true_timestamp=true_timestamp ) - self.screen_view_properties = {} + self.screen_view_properties: Dict[str, str] = {} self.id_ = id_ self.name = name self.type = type diff --git a/snowplow_tracker/events/structured_event.py b/snowplow_tracker/events/structured_event.py index 00658e9f..23abafa8 100644 --- a/snowplow_tracker/events/structured_event.py +++ b/snowplow_tracker/events/structured_event.py @@ -15,7 +15,7 @@ # language governing permissions and limitations there under. # """ from snowplow_tracker.events.event import Event -from typing import Optional, List +from typing import Optional, List, Union from snowplow_tracker.subject import Subject from snowplow_tracker.self_describing_json import SelfDescribingJson from snowplow_tracker.contracts import non_empty_string @@ -41,7 +41,7 @@ def __init__( action: str, label: Optional[str] = None, property_: Optional[str] = None, - value: Optional[int] = None, + value: Optional[Union[int, float]] = None, event_subject: Optional[Subject] = None, context: Optional[List[SelfDescribingJson]] = None, true_timestamp: Optional[float] = None, @@ -84,7 +84,7 @@ def category(self) -> Optional[str]: return self.payload.nv_pairs.get("se_ca") @category.setter - def category(self, value: Optional[str]): + def category(self, value: str): non_empty_string(value) self.payload.add("se_ca", value) @@ -96,7 +96,7 @@ def action(self) -> Optional[str]: return self.payload.nv_pairs.get("se_ac") @action.setter - def action(self, value: Optional[str]): + def action(self, value: str): non_empty_string(value) self.payload.add("se_ac", value) @@ -123,12 +123,12 @@ def property_(self, value: Optional[str]): self.payload.add("se_pr", value) @property - def value(self) -> Optional[int]: + def value(self) -> Optional[Union[int, float]]: """ A value associated with the user action """ return self.payload.nv_pairs.get("se_va") @value.setter - def value(self, value: Optional[int]): + def value(self, value: Optional[Union[int, float]]): self.payload.add("se_va", value) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index 26e3262c..18d1bf4d 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -83,9 +83,8 @@ def add_json( if encode_base64: encoded_dict = base64.urlsafe_b64encode(json_dict.encode("utf-8")) - if not isinstance(encoded_dict, str): - encoded_dict = encoded_dict.decode("utf-8") - self.add(type_when_encoded, encoded_dict) + encoded_dict_str = encoded_dict.decode("utf-8") + self.add(type_when_encoded, encoded_dict_str) else: self.add(type_when_not_encoded, json_dict) diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index d824ed26..daa1434b 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -16,7 +16,7 @@ # """ import logging -from typing import Optional +from typing import Dict, Optional from snowplow_tracker import ( Tracker, Emitter, @@ -37,7 +37,7 @@ class Snowplow: - _trackers = {} + _trackers: Dict[str, Tracker] = {} @staticmethod def create_tracker( @@ -149,7 +149,7 @@ def reset(cls): cls._trackers = {} @classmethod - def get_tracker(cls, namespace: str) -> Tracker: + def get_tracker(cls, namespace: str) -> Optional[Tracker]: """ Returns a Snowplow tracker from the Snowplow object if it exists :param namespace: Snowplow tracker namespace diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index c3165d34..cbf29aa8 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -15,7 +15,7 @@ # language governing permissions and limitations there under. # """ -from typing import Optional +from typing import Dict, Optional, Union from snowplow_tracker.contracts import one_of, greater_than from snowplow_tracker.typing import SupportedPlatform, SUPPORTED_PLATFORMS, PayloadDict @@ -30,7 +30,7 @@ class Subject(object): """ def __init__(self) -> None: - self.standard_nv_pairs = {"p": DEFAULT_PLATFORM} + self.standard_nv_pairs: Dict[str, Union[str, int]] = {"p": DEFAULT_PLATFORM} def set_platform(self, value: SupportedPlatform) -> "Subject": """ diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 2effe835..4dc489dc 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -80,13 +80,13 @@ def __init__( if subject is None: subject = Subject() - if type(emitters) is list: + if isinstance(emitters, list): non_empty(emitters) self.emitters = emitters else: self.emitters = [emitters] - self.subject = subject + self.subject: Optional[Subject] = subject self.encode_base64 = encode_base64 self.json_encoder = json_encoder @@ -145,6 +145,8 @@ def track( if "eid" in payload.nv_pairs.keys(): return payload.nv_pairs["eid"] + return None + def complete_payload( self, event: Event, @@ -298,7 +300,7 @@ def track_link_click( ) non_empty_string(target_url) - properties = {} + properties: Dict[str, Union[str, ElementClasses]] = {} properties["targetUrl"] = target_url if element_id is not None: properties["elementId"] = element_id @@ -361,7 +363,7 @@ def track_add_to_cart( ) non_empty_string(sku) - properties = {} + properties: Union[Dict[str, Union[str, float, int]]] = {} properties["sku"] = sku properties["quantity"] = quantity if name is not None: @@ -425,7 +427,7 @@ def track_remove_from_cart( ) non_empty_string(sku) - properties = {} + properties: Dict[str, Union[str, float, int]] = {} properties["sku"] = sku properties["quantity"] = quantity if name is not None: @@ -493,7 +495,7 @@ def track_form_change( if type_ is not None: one_of(type_.lower(), FORM_TYPES) - properties = dict() + properties: Dict[str, Union[Optional[str], ElementClasses]] = dict() properties["formId"] = form_id properties["elementId"] = element_id properties["nodeName"] = node_name @@ -549,7 +551,9 @@ def track_form_submit( for element in elements or []: form_element(element) - properties = dict() + properties: Dict[ + str, Union[str, ElementClasses, FormClasses, List[Dict[str, Any]]] + ] = dict() properties["formId"] = form_id if form_classes is not None: properties["formClasses"] = form_classes @@ -602,7 +606,9 @@ def track_site_search( ) non_empty(terms) - properties = {} + properties: Dict[ + str, Union[Sequence[str], Dict[str, Union[str, bool]], int] + ] = {} properties["terms"] = terms if filters is not None: properties["filters"] = filters @@ -878,7 +884,7 @@ def track_struct_event( action: str, label: Optional[str] = None, property_: Optional[str] = None, - value: Optional[float] = None, + value: Optional[Union[int, float]] = None, context: Optional[List[SelfDescribingJson]] = None, tstamp: Optional[float] = None, event_subject: Optional[Subject] = None, @@ -1037,4 +1043,9 @@ def add_emitter(self, emitter: EmitterProtocol) -> "Tracker": return self def get_namespace(self) -> str: - return self.standard_nv_pairs["tna"] + # As app_id is added to the standard_nv_pairs dict above with a type of Optional[str], the type for + # the whole standard_nv_pairs dict is inferred to be dict[str, Optional[str]]. + # But, we know that "tna" should always be present in the dict, since namespace is a required argument. + # + # This ignores MyPy saying Incompatible return value type (got "str | None", expected "str") + return self.standard_nv_pairs["tna"] # type: ignore diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py index af2a4b9d..6a574dc2 100644 --- a/snowplow_tracker/tracker_configuration.py +++ b/snowplow_tracker/tracker_configuration.py @@ -22,7 +22,7 @@ class TrackerConfiguration(object): def __init__( self, - encode_base64: Optional[bool] = None, + encode_base64: bool = True, json_encoder: Optional[JsonEncoderFunction] = None, ) -> None: """ @@ -37,18 +37,16 @@ def __init__( self.json_encoder = json_encoder @property - def encode_base64(self) -> Optional[bool]: + def encode_base64(self) -> bool: """ Whether JSONs in the payload should be base-64 encoded. Default is True. """ return self._encode_base64 @encode_base64.setter - def encode_base64(self, value: Optional[bool]): + def encode_base64(self, value: bool): if isinstance(value, bool) or value is None: self._encode_base64 = value - else: - raise ValueError("encode_base64 must be True or False") @property def json_encoder(self) -> Optional[JsonEncoderFunction]: diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py index 5bbc477b..3e973562 100644 --- a/snowplow_tracker/typing.py +++ b/snowplow_tracker/typing.py @@ -65,5 +65,10 @@ class EmitterProtocol(Protocol): - def input(self, payload: PayloadDict) -> None: - ... + def input(self, payload: PayloadDict) -> None: ... + + def flush(self) -> None: ... + + def async_flush(self) -> None: ... + + def sync_flush(self) -> None: ... From 65bdd7b777b6cd153f405e11c78342f4b1b461ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Sun, 28 Jul 2024 13:05:21 -0600 Subject: [PATCH 03/15] Add py.typed to package (close #360) --- setup.py | 1 + snowplow_tracker/py.typed | 0 2 files changed, 1 insertion(+) create mode 100644 snowplow_tracker/py.typed diff --git a/setup.py b/setup.py index e1b2aa33..1cdd729f 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], + package_data={"snowplow_tracker": ["py.typed"]}, url="http://snowplow.io", license="Apache License 2.0", description="Snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games", diff --git a/snowplow_tracker/py.typed b/snowplow_tracker/py.typed new file mode 100644 index 00000000..e69de29b From ea634ec6e0b29fea82ca1637c5dc553f76f5686e Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Fri, 23 Aug 2024 11:51:21 +0100 Subject: [PATCH 04/15] Update `on_success` docstring (close #358) PR #366 * Update `on_success` docstring --- snowplow_tracker/emitter_configuration.py | 8 +++----- snowplow_tracker/emitters.py | 10 ++++------ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 1cf90238..82626fa4 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -39,13 +39,12 @@ def __init__( :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads :type on_success: function | None :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 Gets passed two arguments: 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads + 2) An array of dictionaries corresponding to the unsent events' payloads :type on_failure: function | None :param byte_limit: The size event list after reaching which queued events will be flushed :type byte_limit: int | None @@ -105,8 +104,7 @@ def on_failure(self) -> Optional[FailureCallback]: Callback executed if at least one HTTP request in a flush has status code other than 200 Gets passed two arguments: 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads + 2) An array of dictionaries corresponding to the unsent events' payloads """ return self._on_failure diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 6a138f0f..72f451bf 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -97,13 +97,12 @@ def __init__( :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads :type on_success: function | None :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 Gets passed two arguments: 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads + 2) An array of dictionaries corresponding to the unsent events' payloads :type on_failure: function | None :param byte_limit: The size event list after reaching which queued events will be flushed :type byte_limit: int | None @@ -484,13 +483,12 @@ def __init__( :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads :type on_success: function | None :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 Gets passed two arguments: 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads + 2) An array of dictionaries corresponding to the unsent events' payloads :type on_failure: function | None :param thread_count: Number of worker threads to use for HTTP requests :type thread_count: int From 9554954092bf00fca6db786f39d6db525c5a4fc9 Mon Sep 17 00:00:00 2001 From: Greg Leonard <45019882+greg-el@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:02:15 +0100 Subject: [PATCH 05/15] Fix docs action (close #367) --- docs/requirements.txt | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 229d0ff5..36f69b0c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,15 @@ -sphinx -sphinx_rtd_theme -sphinx_copybutton -sphinx_minipres -sphinx_tabs -sphinx_togglebutton>=0.2.0 -sphinx-autobuild + +sphinx==7.1.2 +sphinx_rtd_theme==2.0.0 +sphinx_copybutton==0.5.2 +sphinx_minipres==0.2.1 +sphinx_tabs==3.4.5 + +sphinx_togglebutton==0.3.2 +# Transitive dependency of togglebutton causing: +# https://security.snyk.io/vuln/SNYK-PYTHON-SETUPTOOLS-7448482 +setuptools==70.0.0 + +sphinx-autobuild==2021.3.14 myst_nb>0.8.3 -sphinx_rtd_theme_ext_color_contrast \ No newline at end of file +sphinx_rtd_theme_ext_color_contrast==0.3.2 From b75934afdcd40dd8804dfcc8174a59c1868ebd1c Mon Sep 17 00:00:00 2001 From: Greg Leonard <45019882+greg-el@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:42:28 +0100 Subject: [PATCH 06/15] Prepare for 1.0.3 release --- CHANGES.txt | 8 ++++++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4a5c0c29..861f8987 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,11 @@ +Version 1.0.3 (2024-08-27) +-------------------------- +Fix docs action (close #367) +Update `on_success` docstring (close #358) +Add py.typed to package (close #360) (Thanks to @edgarrmondragon) +Update typing +Fix `PagePing`, `PageView`, and `StructuredEvent` property getters (close #361) + Version 1.0.2 (2024-02-26) -------------------------- Add Python 3.12 to CI tests (#356) (Thanks to @edgarrmondragon) diff --git a/docs/source/conf.py b/docs/source/conf.py index e2762fe0..647cf119 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "1.0.2" +release = "1.0.3" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 1cdd729f..4b180da6 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( name="snowplow-tracker", - version="1.0.2", + version="1.0.3", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 38be1911..b71b2e2c 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (1, 0, 2) +__version_info__ = (1, 0, 3) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From 92fb34ae9e5b9acfc0d3f552357ab298f63b561e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 22:20:22 +0000 Subject: [PATCH 07/15] Bump actions/download-artifact from 3 to 4.1.7 in /.github/workflows Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4.1.7. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4.1.7) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .github/workflows/cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 5e95d341..b809ea1a 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -76,7 +76,7 @@ jobs: python-version: '3.x' - name: Download artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4.1.7 with: name: distfiles_${{ github.run_id }} path: ${{ github.workspace }}/dist From 74c9377598b58b5fa6a0c07988f411104993789b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 19 Nov 2024 05:10:37 -0600 Subject: [PATCH 08/15] Test with Python 3.13 (#365) --- .github/workflows/ci.yml | 15 +++++++++++--- Dockerfile | 2 +- requirements-test.txt | 5 +++-- run-tests.sh | 42 ++++++++++++++++++++++++++-------------- setup.py | 1 + 5 files changed, 45 insertions(+), 20 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a285a2d9..2bb32b7b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,15 +11,24 @@ jobs: runs-on: ubuntu-20.04 strategy: + fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8, 3.9, "3.10", "3.11", "3.12"] + python-version: + - 3.6 + - 3.7 + - 3.8 + - 3.9 + - "3.10" + - "3.11" + - "3.12" + - "3.13" steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/Dockerfile b/Dockerfile index ba2948f6..d79a72cd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH RUN git clone --depth=1 https://github.com/pyenv/pyenv.git $PYENV_ROOT RUN git clone --depth=1 https://github.com/pyenv/pyenv-virtualenv.git $PYENV_ROOT/plugins/pyenv-virtualenv -RUN pyenv install 3.5.10 && pyenv install 3.6.14 && pyenv install 3.7.11 && pyenv install 3.8.11 && pyenv install 3.9.6 && pyenv install 3.10.1 && pyenv install 3.11.0 && pyenv install 3.12.1 +RUN pyenv install 3.5.10 && pyenv install 3.6.15 && pyenv install 3.7.17 && pyenv install 3.8.20 && pyenv install 3.9.20 && pyenv install 3.10.15 && pyenv install 3.11.10 && pyenv install 3.12.7 && pyenv install 3.13.0 WORKDIR /app COPY . . diff --git a/requirements-test.txt b/requirements-test.txt index 55463d8c..cde305f6 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,7 +1,8 @@ pytest==4.6.11; python_version < '3.10.0' -pytest==6.2.5; python_version >= '3.10.0' +pytest==8.3.2; python_version >= '3.10.0' attrs==21.2.0 httmock==1.4.0 -freezegun==1.1.0 +freezegun==1.1.0; python_version < '3.13' +freezegun==1.5.1; python_version >= '3.13' pytest-cov coveralls==3.3.1 diff --git a/run-tests.sh b/run-tests.sh index 23b13a73..bb938e85 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -24,68 +24,77 @@ function deploy { source deactivate fi - # pyenv install 3.6.14 + # pyenv install 3.6.15 if [ ! -e ~/.pyenv/versions/tracker36 ]; then - pyenv virtualenv 3.6.14 tracker36 + pyenv virtualenv 3.6.15 tracker36 pyenv activate tracker36 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.7.11 + # pyenv install 3.7.17 if [ ! -e ~/.pyenv/versions/tracker37 ]; then - pyenv virtualenv 3.7.11 tracker37 + pyenv virtualenv 3.7.17 tracker37 pyenv activate tracker37 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.8.11 + # pyenv install 3.8.20 if [ ! -e ~/.pyenv/versions/tracker38 ]; then - pyenv virtualenv 3.8.11 tracker38 + pyenv virtualenv 3.8.20 tracker38 pyenv activate tracker38 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.9.6 + # pyenv install 3.9.20 if [ ! -e ~/.pyenv/versions/tracker39 ]; then - pyenv virtualenv 3.9.6 tracker39 + pyenv virtualenv 3.9.20 tracker39 pyenv activate tracker39 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.10.1 + # pyenv install 3.10.15 if [ ! -e ~/.pyenv/versions/tracker310 ]; then - pyenv virtualenv 3.10.1 tracker310 + pyenv virtualenv 3.10.15 tracker310 pyenv activate tracker310 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.11.0 + # pyenv install 3.11.10 if [ ! -e ~/.pyenv/versions/tracker311 ]; then - pyenv virtualenv 3.11.0 tracker311 + pyenv virtualenv 3.11.10 tracker311 pyenv activate tracker311 pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.12.0 + # pyenv install 3.12.7 if [ ! -e ~/.pyenv/versions/tracker312 ]; then - pyenv virtualenv 3.12.0 tracker312 + pyenv virtualenv 3.12.7 tracker312 pyenv activate tracker312 pip install . pip install -r requirements-test.txt source deactivate fi + + # pyenv install 3.13.0 + if [ ! -e ~/.pyenv/versions/tracker313 ]; then + pyenv virtualenv 3.13.0 tracker313 + pyenv activate tracker313 + pip install . + pip install -r requirements-test.txt + source deactivate + fi } @@ -121,6 +130,10 @@ function run_tests { pyenv activate tracker312 pytest source deactivate + + pyenv activate tracker313 + pytest + source deactivate } function refresh_deploy { @@ -132,6 +145,7 @@ function refresh_deploy { pyenv uninstall -f tracker310 pyenv uninstall -f tracker311 pyenv uninstall -f tracker312 + pyenv uninstall -f tracker313 } diff --git a/setup.py b/setup.py index 4b180da6..26137437 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,7 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", ], install_requires=[ From 0341cb19d5fac344095dbb5e11bc0ebae3c4e880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matu=CC=81s=CC=8C=20Tomlein?= Date: Tue, 19 Nov 2024 12:13:28 +0100 Subject: [PATCH 09/15] Shorten automatic github release title (close #352) --- .github/workflows/cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index b809ea1a..cfb160bb 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -108,6 +108,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ github.ref }} - name: Snowplow Python Tracker v${{ needs.version_check.outputs.v_tracker }} + name: Version ${{ needs.version_check.outputs.v_tracker }} draft: false prerelease: ${{ contains(needs.version_check.outputs.v_tracker, 'rc') }} From 9a70fb37146e83e1ab31b96ab4c8e0ac9bb9d696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matu=CC=81s=CC=8C=20Tomlein?= Date: Tue, 19 Nov 2024 12:18:36 +0100 Subject: [PATCH 10/15] Prepare for 1.0.4 release --- CHANGES.txt | 5 +++++ docs/source/conf.py | 4 ++-- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 861f8987..581f8e6a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,8 @@ +Version 1.0.4 (2024-11-19) +-------------------------- +Test with Python 3.13 (#365) (Thanks to @edgarrmondragon) +Shorten automatic github release title (#352) + Version 1.0.3 (2024-08-27) -------------------------- Fix docs action (close #367) diff --git a/docs/source/conf.py b/docs/source/conf.py index 647cf119..46a8fb74 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "1.0.3" +release = "1.0.4" # -- General configuration --------------------------------------------------- @@ -60,4 +60,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ['_static'] diff --git a/setup.py b/setup.py index 26137437..6d386306 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( name="snowplow-tracker", - version="1.0.3", + version="1.0.4", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index b71b2e2c..b6a9d713 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (1, 0, 3) +__version_info__ = (1, 0, 4) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From 30717fbc517f7bbac75075287fd3fd286fb6dd4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matu=CC=81s=CC=8C=20Tomlein?= Date: Wed, 20 Nov 2024 12:49:54 +0100 Subject: [PATCH 11/15] Use the same version for the upload-artifact and download-artifact action --- .github/workflows/cd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index cfb160bb..935d1c35 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -57,7 +57,7 @@ jobs: python setup.py sdist bdist_wheel - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: distfiles_${{ github.run_id }} path: dist @@ -76,7 +76,7 @@ jobs: python-version: '3.x' - name: Download artifacts - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: distfiles_${{ github.run_id }} path: ${{ github.workspace }}/dist From a10e3e4b525a2649d0df6fcc1ee695e34545f409 Mon Sep 17 00:00:00 2001 From: Jack Keene Date: Thu, 20 Feb 2025 13:49:12 +0000 Subject: [PATCH 12/15] Bump Ubuntu Version in GH Actions (close #374) PR #375 * bump ubuntu version in CI/CD * drop EoL python versions --- .github/workflows/cd.yml | 8 ++++---- .github/workflows/ci.yml | 6 ++---- README.md | 1 + setup.py | 3 --- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 935d1c35..2a0fcff1 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -7,7 +7,7 @@ on: jobs: version_check: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest outputs: v_tracker: ${{ steps.version.outputs.PYTHON_TRACKER_VERSION}} @@ -32,7 +32,7 @@ jobs: build: needs: ["version_check"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: @@ -64,7 +64,7 @@ jobs: publish: needs: ["build"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout code @@ -96,7 +96,7 @@ jobs: release: needs: ["publish", "version_check"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout code diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2bb32b7b..09b5de9e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,14 +8,12 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: - - 3.6 - - 3.7 - 3.8 - 3.9 - "3.10" @@ -74,7 +72,7 @@ jobs: coveralls_finish: needs: ["build"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Coveralls finished diff --git a/README.md b/README.md index 16fd5b51..694d3ce9 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Python Support | Python version | snowplow-tracker version | | :----: | :----: | +| \>=3.8 | > 1.1.0 | | \>=3.5 | > 0.10.0 | | 2.7 | > 0.9.1 | diff --git a/setup.py b/setup.py index 6d386306..91bc2c65 100644 --- a/setup.py +++ b/setup.py @@ -56,9 +56,6 @@ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", From c74108d203184b40ba2df89f6b124f82bfd47bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Thu, 29 Aug 2024 02:25:43 -0600 Subject: [PATCH 13/15] Avoid install `types-requests` at run-time --- .github/workflows/ci.yml | 2 +- setup.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09b5de9e..9c14d2fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,7 +50,7 @@ jobs: - name: MyPy run: | - python -m pip install mypy + python -m pip install -e .[typing] mypy snowplow_tracker --exclude '/test' - name: Demo diff --git a/setup.py b/setup.py index 91bc2c65..f3a89bfe 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,12 @@ ], install_requires=[ "requests>=2.25.1,<3.0", - "types-requests>=2.25.1,<3.0", "typing_extensions>=3.7.4", ], + extras_require={ + "typing": [ + "mypy>=0.971", + "types-requests>=2.25.1,<3.0", + ], + }, ) From 197fa9cd1307f12ffb3e12b61dcf13009525b875 Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Thu, 20 Feb 2025 14:14:40 +0000 Subject: [PATCH 14/15] Prepare for 1.1.0 release --- CHANGES.txt | 5 +++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 581f8e6a..6a56dedb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,8 @@ +Version 1.1.0 (2025-02-20) +-------------------------- +Bump Ubuntu Version in GH Actions (#375) +Avoid installing types-requests at run-time (#370) (Thanks to @edgarrmondragon) + Version 1.0.4 (2024-11-19) -------------------------- Test with Python 3.13 (#365) (Thanks to @edgarrmondragon) diff --git a/docs/source/conf.py b/docs/source/conf.py index 46a8fb74..88d210c3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = "1.0.4" +release = "1.1.0" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index f3a89bfe..efaf6536 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ setup( name="snowplow-tracker", - version="1.0.4", + version="1.1.0", author=authors_str, author_email=authors_email_str, packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index b6a9d713..f4ff17a0 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (1, 0, 4) +__version_info__ = (1, 1, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" From e8f2629e78334a974794efa1f20442d923c14e39 Mon Sep 17 00:00:00 2001 From: Patricio Date: Mon, 1 Sep 2025 12:42:22 +0200 Subject: [PATCH 15/15] claude mds instrumentation --- CLAUDE.md | 370 ++++++++++++++++++++++++++++++ snowplow_tracker/events/CLAUDE.md | 284 +++++++++++++++++++++++ snowplow_tracker/test/CLAUDE.md | 365 +++++++++++++++++++++++++++++ 3 files changed, 1019 insertions(+) create mode 100644 CLAUDE.md create mode 100644 snowplow_tracker/events/CLAUDE.md create mode 100644 snowplow_tracker/test/CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..dd4a535e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,370 @@ +# Snowplow Python Tracker - CLAUDE.md + +## Project Overview + +The Snowplow Python Tracker is a public Python library for sending analytics events to Snowplow collectors. It enables developers to integrate Snowplow analytics into Python applications, games, and web servers. The library provides a robust event tracking system with support for various event types, custom contexts, and reliable event delivery through configurable emitters. + +**Key Technologies:** +- Python 3.8+ (supported versions: 3.8-3.13) +- requests library for HTTP communication +- typing_extensions for enhanced type hints +- Event-driven architecture with schema validation +- Asynchronous and synchronous event emission + +## Development Commands + +```bash +# Install dependencies +pip install -r requirements-test.txt + +# Run tests +./run-tests.sh + +# Run specific test module +python -m pytest snowplow_tracker/test/unit/test_tracker.py + +# Run integration tests +python -m pytest snowplow_tracker/test/integration/ + +# Install package in development mode +pip install -e . + +# Build Docker image for testing +docker build -t snowplow-python-tracker . +docker run snowplow-python-tracker +``` + +## Architecture + +The tracker follows a layered architecture with clear separation of concerns: + +``` +snowplow_tracker/ +├── Core Components +│ ├── tracker.py # Main Tracker class orchestrating events +│ ├── snowplow.py # High-level API for tracker management +│ └── subject.py # User/device context management +├── Event Layer (events/) +│ ├── event.py # Base Event class +│ ├── page_view.py # PageView event +│ ├── structured_event.py # Structured events +│ └── self_describing.py # Custom schema events +├── Emission Layer +│ ├── emitters.py # Sync/Async event transmission +│ ├── event_store.py # Event buffering and persistence +│ └── payload.py # Event payload construction +├── Configuration +│ ├── tracker_configuration.py +│ └── emitter_configuration.py +└── Validation + ├── contracts.py # Runtime validation + └── typing.py # Type definitions +``` + +## Core Architectural Principles + +1. **Schema-First Design**: All events conform to Iglu schemas for consistency +2. **Separation of Concerns**: Event creation, validation, and emission are separate +3. **Configuration Objects**: Use dedicated configuration classes, not raw dictionaries +4. **Type Safety**: Extensive use of type hints and Protocol classes +5. **Fail-Safe Delivery**: Events are buffered and retried on failure +6. **Immutability**: Event objects are largely immutable after creation + +## Layer Organization & Responsibilities + +### Application Layer (snowplow.py) +- Singleton pattern for global tracker management +- Factory methods for tracker creation +- Namespace-based tracker registry + +### Domain Layer (tracker.py, events/) +- Event creation and validation +- Subject (user/device) context management +- Event enrichment with standard fields + +### Infrastructure Layer (emitters.py, event_store.py) +- HTTP communication with collectors +- Event buffering and retry logic +- Async/sync emission strategies + +### Cross-Cutting (contracts.py, typing.py) +- Runtime validation with togglable contracts +- Shared type definitions and protocols + +## Critical Import Patterns + +```python +# ✅ Import from package root for public API +from snowplow_tracker import Snowplow, Tracker, Subject +from snowplow_tracker import EmitterConfiguration, TrackerConfiguration + +# ✅ Import specific event classes +from snowplow_tracker.events import PageView, StructuredEvent + +# ❌ Don't import from internal modules +from snowplow_tracker.emitters import Requester # Internal class + +# ✅ Use typing module for type hints +from snowplow_tracker.typing import PayloadDict, Method +``` + +## Essential Library Patterns + +### Tracker Initialization Pattern +```python +# ✅ Use Snowplow factory with configuration objects +tracker = Snowplow.create_tracker( + namespace="my_app", + endpoint="https://collector.example.com", + tracker_config=TrackerConfiguration(encode_base64=True), + emitter_config=EmitterConfiguration(batch_size=10) +) + +# ❌ Don't instantiate Tracker directly without Snowplow +tracker = Tracker("namespace", emitter) # Missing registration +``` + +### Event Creation Pattern +```python +# ✅ Use event classes with named parameters +page_view = PageView( + page_url="https://example.com", + page_title="Homepage" +) + +# ✅ Add contexts to events +event.context = [SelfDescribingJson(schema, data)] + +# ❌ Don't modify event payload directly +event.payload.add("custom", "value") # Breaks schema validation +``` + +### Subject Management Pattern +```python +# ✅ Set subject at tracker or event level +subject = Subject() +subject.set_user_id("user123") +tracker = Snowplow.create_tracker(..., subject=subject) + +# ✅ Override subject per event +event = PageView(..., event_subject=Subject()) + +# ❌ Don't modify subject after tracker creation +tracker.subject.set_user_id("new_id") # Not thread-safe +``` + +### Emitter Configuration Pattern +```python +# ✅ Configure retry and buffering behavior +config = EmitterConfiguration( + batch_size=50, + buffer_capacity=10000, + custom_retry_codes={429: True, 500: True} +) + +# ❌ Don't use magic numbers +emitter = Emitter(endpoint, 443, "post", 100) # Use config object +``` + +## Model Organization Pattern + +### Event Hierarchy +```python +Event (base class) +├── PageView # Web page views +├── PagePing # Page engagement tracking +├── ScreenView # Mobile screen views +├── StructuredEvent # Category/action/label/property/value events +└── SelfDescribing # Custom schema events +``` + +### Data Structures +```python +# SelfDescribingJson for custom contexts +context = SelfDescribingJson( + "iglu:com.example/context/jsonschema/1-0-0", + {"key": "value"} +) + +# Payload for event data assembly +payload = Payload() +payload.add("e", "pv") # Event type +payload.add_dict({"aid": "app_id"}) +``` + +## Common Pitfalls & Solutions + +### Contract Validation +```python +# ❌ Passing invalid parameters silently fails in production +tracker.track_page_view("") # Empty URL + +# ✅ Enable contracts during development +from snowplow_tracker import enable_contracts +enable_contracts() +``` + +### Event Buffering +```python +# ❌ Not flushing events before shutdown +tracker.track(event) +sys.exit() # Events lost! + +# ✅ Always flush before exit +tracker.track(event) +tracker.flush() +``` + +### Thread Safety +```python +# ❌ Sharing emitter across threads +emitter = Emitter(endpoint) +# Multiple threads using same emitter + +# ✅ Use AsyncEmitter for concurrent scenarios +emitter = AsyncEmitter(endpoint, thread_count=2) +``` + +### Schema Validation +```python +# ❌ Hardcoding schema strings +schema = "iglu:com.snowplow/event/1-0-0" + +# ✅ Use constants for schemas +from snowplow_tracker.constants import CONTEXT_SCHEMA +``` + +## File Structure Template + +``` +project/ +├── tracker_app.py # Application entry point +├── config/ +│ └── tracker_config.py # Tracker configuration +├── events/ +│ ├── __init__.py +│ └── custom_events.py # Custom event definitions +├── contexts/ +│ └── custom_contexts.py # Custom context schemas +└── tests/ + ├── unit/ + │ └── test_events.py + └── integration/ + └── test_emission.py +``` + +## Testing Patterns + +### Unit Testing +```python +# ✅ Mock emitters for unit tests +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_track_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(...)) + mock_emitter.input.assert_called_once() +``` + +### Contract Testing +```python +# ✅ Use ContractsDisabled context manager +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) +``` + +### Integration Testing +```python +# ✅ Test against mock collector +def test_event_delivery(): + with requests_mock.Mocker() as m: + m.post("https://collector.test/com.snowplow/tp2") + # Track and verify delivery +``` + +## Configuration Best Practices + +### Environment-Based Configuration +```python +# ✅ Use environment variables +import os +endpoint = os.getenv("SNOWPLOW_COLLECTOR_URL") +namespace = os.getenv("SNOWPLOW_NAMESPACE", "default") +``` + +### Retry Configuration +```python +# ✅ Configure intelligent retry behavior +EmitterConfiguration( + max_retry_delay_seconds=120, + custom_retry_codes={ + 429: True, # Retry rate limits + 500: True, # Retry server errors + 400: False # Don't retry bad requests + } +) +``` + +## Quick Reference + +### Import Checklist +- [ ] Import from `snowplow_tracker` package root +- [ ] Use `EmitterConfiguration` and `TrackerConfiguration` +- [ ] Import specific event classes from `snowplow_tracker.events` +- [ ] Use type hints from `snowplow_tracker.typing` + +### Event Tracking Checklist +- [ ] Create tracker with `Snowplow.create_tracker()` +- [ ] Configure emitter with appropriate batch size +- [ ] Set subject context if tracking users +- [ ] Use appropriate event class for the use case +- [ ] Add custom contexts as `SelfDescribingJson` +- [ ] Call `flush()` before application shutdown +- [ ] Handle failures with callbacks + +### Common Event Types +- `PageView`: Web page views +- `ScreenView`: Mobile app screens +- `StructuredEvent`: Generic events with 5 parameters +- `SelfDescribing`: Custom schema events +- `PagePing`: Engagement tracking + +## Contributing to CLAUDE.md + +When adding or updating content in this document, please follow these guidelines: + +### File Size Limit +- **CLAUDE.md must not exceed 40KB** (currently ~19KB) +- Check file size after updates: `wc -c CLAUDE.md` +- Remove outdated content if approaching the limit + +### Code Examples +- Keep all code examples **4 lines or fewer** +- Focus on the essential pattern, not complete implementations +- Use `// ❌` and `// ✅` to clearly show wrong vs right approaches + +### Content Organization +- Add new patterns to existing sections when possible +- Create new sections sparingly to maintain structure +- Update the architectural principles section for major changes +- Ensure examples follow current codebase conventions + +### Quality Standards +- Test any new patterns in actual code before documenting +- Verify imports and syntax are correct for the codebase +- Keep language concise and actionable +- Focus on "what" and "how", minimize "why" explanations + +### Multiple CLAUDE.md Files +- **Directory-specific CLAUDE.md files** can be created for specialized modules +- Follow the same structure and guidelines as this root CLAUDE.md +- Keep them focused on directory-specific patterns and conventions +- Maximum 20KB per directory-specific CLAUDE.md file + +### Instructions for LLMs +When editing files in this repository, **always check for CLAUDE.md guidance**: + +1. **Look for CLAUDE.md in the same directory** as the file being edited +2. **If not found, check parent directories** recursively up to project root +3. **Follow the patterns and conventions** described in the applicable CLAUDE.md +4. **Prioritize directory-specific guidance** over root-level guidance when conflicts exist \ No newline at end of file diff --git a/snowplow_tracker/events/CLAUDE.md b/snowplow_tracker/events/CLAUDE.md new file mode 100644 index 00000000..efc0f5ab --- /dev/null +++ b/snowplow_tracker/events/CLAUDE.md @@ -0,0 +1,284 @@ +# Snowplow Event Types - CLAUDE.md + +## Directory Overview + +The `events/` directory contains all event type implementations for the Snowplow Python Tracker. Each event class represents a specific type of analytics event that can be sent to Snowplow collectors. All events inherit from the base `Event` class and follow a consistent pattern for construction, validation, and payload generation. + +## Event Class Hierarchy + +``` +Event (base class) +├── PageView # Web page view tracking +├── PagePing # Page engagement/heartbeat +├── ScreenView # Mobile/app screen views +├── StructuredEvent # Generic 5-parameter events +└── SelfDescribing # Custom schema events +``` + +## Core Event Patterns + +### Event Construction Pattern +```python +# ✅ Use keyword arguments for clarity +event = PageView( + page_url="https://example.com", + page_title="Homepage", + referrer="https://google.com" +) + +# ❌ Don't use positional arguments +event = PageView("https://example.com", "Homepage") +``` + +### Event Context Pattern +```python +# ✅ Add contexts as SelfDescribingJson list +geo_context = SelfDescribingJson( + "iglu:com.acme/geolocation/jsonschema/1-0-0", + {"latitude": 40.0, "longitude": -73.0} +) +event = PageView(page_url="...", context=[geo_context]) + +# ❌ Don't use raw dictionaries for context +event.context = [{"latitude": 40.0}] # Missing schema! +``` + +### Event Subject Override Pattern +```python +# ✅ Override tracker subject for specific event +special_subject = Subject() +special_subject.set_user_id("anonymous_user") +event = StructuredEvent( + category="shop", + action="view", + event_subject=special_subject +) + +# ❌ Don't modify shared subject +tracker.subject.set_user_id("temp") # Affects all events +``` + +### True Timestamp Pattern +```python +# ✅ Use milliseconds for true_timestamp +import time +timestamp_ms = time.time() * 1000 +event = PageView( + page_url="...", + true_timestamp=timestamp_ms +) + +# ❌ Don't use seconds +event = PageView(true_timestamp=time.time()) +``` + +## Event-Specific Patterns + +### PageView Events +```python +# ✅ Complete PageView with all fields +page_view = PageView( + page_url="https://example.com/products", + page_title="Products", + referrer="https://example.com/home" +) + +# ❌ Missing required page_url +page_view = PageView(page_title="Products") +``` + +### StructuredEvent Pattern +```python +# ✅ Use descriptive category/action pairs +event = StructuredEvent( + category="ecommerce", + action="add-to-cart", + label="SKU-123", + property_="size:XL", + value=29.99 +) + +# ❌ Generic naming loses meaning +event = StructuredEvent("event", "click") +``` + +### SelfDescribing Events +```python +# ✅ Custom events with Iglu schemas +purchase_event = SelfDescribing( + SelfDescribingJson( + "iglu:com.acme/purchase/jsonschema/2-0-0", + { + "orderId": "ORD-123", + "total": 99.99, + "currency": "USD" + } + ) +) + +# ❌ Missing schema version +event = SelfDescribing( + SelfDescribingJson("iglu:com.acme/purchase", {...}) +) +``` + +### ScreenView Pattern (Mobile) +```python +# ✅ Mobile screen tracking with ID +screen = ScreenView( + name="ProductDetailScreen", + id_="screen-456", + previous_name="ProductListScreen" +) + +# ❌ Using PageView for mobile apps +page = PageView(page_url="app://product-detail") +``` + +## Event Validation Rules + +### Required Fields by Event Type +- **PageView**: `page_url` (required), `page_title`, `referrer` +- **StructuredEvent**: `category`, `action` (required), `label`, `property_`, `value` +- **SelfDescribing**: `event_json` (SelfDescribingJson required) +- **ScreenView**: `name` or `id_` (at least one required) +- **PagePing**: `page_url` (required) + +### Schema Validation Pattern +```python +# ✅ Validate schema format +SCHEMA_PATTERN = r"^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/" +SCHEMA_PATTERN += r"[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + +# ❌ Invalid schema formats +"iglu:com.acme/event" # Missing version +"com.acme/event/1-0-0" # Missing iglu: prefix +``` + +## Payload Building Pattern + +### Internal Payload Construction +```python +# ✅ Event classes handle payload internally +def build_payload(self, encode_base64, json_encoder, subject): + # Add event-specific fields + self.payload.add("e", "pv") # Page view type + self.payload.add("url", self.page_url) + + # Let base class handle common fields + return super().build_payload(encode_base64, json_encoder, subject) + +# ❌ Don't expose payload building to users +event.payload = Payload() +event.payload.add("custom", "field") +``` + +## Testing Event Classes + +### Unit Test Pattern +```python +# ✅ Test event construction and validation +def test_page_view_required_fields(): + with self.assertRaises(TypeError): + PageView() # Missing required page_url + + event = PageView(page_url="https://test.com") + assert event.page_url == "https://test.com" + +# ✅ Test payload generation +def test_event_payload(): + event = PageView(page_url="https://test.com") + payload = event.build_payload(False, None, None) + assert payload.get()["url"] == "https://test.com" +``` + +### Context Testing Pattern +```python +# ✅ Test context attachment +def test_event_context(): + context = SelfDescribingJson(schema, data) + event = PageView(page_url="...", context=[context]) + + payload = event.build_payload(True, None, None) + assert "cx" in payload.get() # Base64 context +``` + +## Common Event Pitfalls + +### Timestamp Confusion +```python +# ❌ Mixing timestamp types +event.true_timestamp = "2024-01-01" # String not allowed +event.true_timestamp = datetime.now() # Use milliseconds + +# ✅ Consistent millisecond timestamps +event.true_timestamp = int(time.time() * 1000) +``` + +### Context Array Management +```python +# ❌ Modifying context after creation +event.context.append(new_context) # Unexpected behavior + +# ✅ Set complete context at creation +all_contexts = [context1, context2] +event = PageView(page_url="...", context=all_contexts) +``` + +### Schema Version Control +```python +# ❌ Hardcoding schema versions +schema = "iglu:com.acme/event/jsonschema/1-0-0" + +# ✅ Centralize schema definitions +PURCHASE_SCHEMA = "iglu:com.acme/purchase/jsonschema/2-1-0" +event = SelfDescribing(SelfDescribingJson(PURCHASE_SCHEMA, data)) +``` + +## Event Migration Guide + +### Upgrading Event Schemas +```python +# From version 1-0-0 to 2-0-0 +# ✅ Handle backward compatibility +def create_purchase_event(data): + if "items" in data: # New schema + schema = "iglu:.../purchase/jsonschema/2-0-0" + else: # Old schema + schema = "iglu:.../purchase/jsonschema/1-0-0" + + return SelfDescribing(SelfDescribingJson(schema, data)) +``` + +## Quick Reference + +### Event Type Selection +- **PageView**: Traditional web page tracking +- **ScreenView**: Mobile app screen tracking +- **StructuredEvent**: Generic business events +- **SelfDescribing**: Complex custom events +- **PagePing**: Engagement/time-on-page tracking + +### Event Field Checklist +- [ ] Required fields provided +- [ ] Timestamps in milliseconds +- [ ] Contexts as SelfDescribingJson array +- [ ] Valid Iglu schema format +- [ ] Event-specific subject if needed + +### Common Event Methods +- `build_payload()`: Internal payload generation +- `event_subject`: Per-event user context +- `context`: Custom context array +- `true_timestamp`: User-defined timestamp + +## Contributing to events/CLAUDE.md + +When modifying event implementations or adding new event types: + +1. **Follow the Event base class pattern** - All events must inherit from Event +2. **Implement required abstract methods** - Ensure payload building works correctly +3. **Document required fields** - Update this file with new event requirements +4. **Add comprehensive tests** - Test construction, validation, and payload generation +5. **Maintain backward compatibility** - Don't break existing event APIs +6. **Update schema constants** - Add new schemas to constants.py if needed \ No newline at end of file diff --git a/snowplow_tracker/test/CLAUDE.md b/snowplow_tracker/test/CLAUDE.md new file mode 100644 index 00000000..08d0b042 --- /dev/null +++ b/snowplow_tracker/test/CLAUDE.md @@ -0,0 +1,365 @@ +# Snowplow Python Tracker Tests - CLAUDE.md + +## Directory Overview + +The `test/` directory contains comprehensive test suites for the Snowplow Python Tracker. Tests are organized into unit tests (isolated component testing) and integration tests (end-to-end collector communication). The test suite uses pytest and unittest.mock for mocking, with freezegun for time-based testing. + +## Test Organization + +``` +test/ +├── unit/ # Isolated component tests +│ ├── test_tracker.py # Tracker class tests +│ ├── test_emitters.py # Emitter functionality +│ ├── test_event.py # Base event class +│ ├── test_payload.py # Payload construction +│ ├── test_contracts.py # Validation logic +│ └── test_*.py # Other component tests +└── integration/ # End-to-end tests + └── test_integration.py # Collector communication +``` + +## Core Testing Patterns + +### Mock Pattern for Emitters +```python +# ✅ Mock emitter for isolated tracker testing +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_tracker_tracks_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(page_url="test.com")) + mock_emitter.input.assert_called_once() + +# ❌ Don't test with real network calls in unit tests +def test_tracker(): + emitter = Emitter("https://real-collector.com") +``` + +### Contract Testing Pattern +```python +# ✅ Use ContractsDisabled context manager +class ContractsDisabled: + def __enter__(self): + disable_contracts() + def __exit__(self, type, value, traceback): + enable_contracts() + +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) + +# ❌ Don't disable contracts globally +disable_contracts() +# ... rest of test file +``` + +### Time-Based Testing Pattern +```python +# ✅ Use freezegun for deterministic timestamps +from freezegun import freeze_time + +@freeze_time("2024-01-01 12:00:00") +def test_event_timestamp(): + event = PageView(page_url="test.com") + # Timestamp will be consistent + +# ❌ Don't use actual system time +import time +timestamp = time.time() # Non-deterministic +``` + +### UUID Mocking Pattern +```python +# ✅ Mock UUID generation for predictable IDs +@mock.patch('snowplow_tracker.tracker.Tracker.get_uuid') +def test_event_id(mock_uuid): + mock_uuid.return_value = "test-uuid-123" + tracker.track(event) + assert payload["eid"] == "test-uuid-123" + +# ❌ Don't rely on random UUIDs +event_id = tracker.get_uuid() # Different each run +``` + +## Unit Test Patterns + +### Payload Testing +```python +# ✅ Test payload field presence and values +def test_payload_construction(): + payload = Payload() + payload.add("e", "pv") + payload.add("url", "https://test.com") + + result = payload.get() + assert result["e"] == "pv" + assert result["url"] == "https://test.com" + +# ✅ Test JSON encoding +def test_payload_json_encoding(): + payload.add_json({"key": "value"}, True, "cx", "co") + assert "cx" in payload.get() # Base64 encoded +``` + +### Event Testing +```python +# ✅ Test event construction with all parameters +def test_page_view_complete(): + context = SelfDescribingJson(schema, data) + subject = Subject() + + event = PageView( + page_url="https://test.com", + page_title="Test", + context=[context], + event_subject=subject, + true_timestamp=1234567890 + ) + + assert event.page_url == "https://test.com" + assert len(event.context) == 1 + +# ❌ Don't test internal implementation details +def test_private_methods(): + event._internal_method() # Testing private methods +``` + +### Emitter Testing +```python +# ✅ Mock HTTP requests for emitter tests +@mock.patch('requests.post') +def test_emitter_sends_events(mock_post): + mock_post.return_value.status_code = 200 + + emitter = Emitter("https://collector.test") + emitter.input({"e": "pv"}) + emitter.flush() + + mock_post.assert_called_once() + +# ✅ Test retry logic +def test_emitter_retry_on_failure(mock_post): + mock_post.return_value.status_code = 500 + emitter.custom_retry_codes = {500: True} + # Verify retry behavior +``` + +### Contract Validation Testing +```python +# ✅ Test validation rules +def test_non_empty_string_validation(): + with self.assertRaises(ValueError): + non_empty_string("") + + non_empty_string("valid") # Should not raise + +# ✅ Test form element validation +def test_form_element_contract(): + valid_element = { + "name": "field1", + "value": "test", + "nodeName": "INPUT", + "type": "text" + } + form_element(valid_element) # Should not raise +``` + +## Integration Test Patterns + +### Mock Collector Pattern +```python +# ✅ Use micro mock collector for integration tests +from http.server import HTTPServer, BaseHTTPRequestHandler + +class MockCollector(BaseHTTPRequestHandler): + def do_POST(self): + # Capture and validate payload + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + # Store for assertions + self.send_response(200) + +# Start mock collector in test +server = HTTPServer(('localhost', 9090), MockCollector) +``` + +### End-to-End Testing +```python +# ✅ Test complete tracking flow +def test_end_to_end_tracking(): + tracker = Snowplow.create_tracker( + namespace="test", + endpoint="http://localhost:9090" + ) + + # Track multiple events + tracker.track(PageView(page_url="test1.com")) + tracker.track(StructuredEvent("cat", "act")) + tracker.flush() + + # Verify collector received both events + assert len(received_events) == 2 +``` + +## Testing Best Practices + +### Test Isolation +```python +# ✅ Clean up after each test +def setUp(self): + Snowplow.reset() # Clear all trackers + +def tearDown(self): + # Clean up any test artifacts + if hasattr(self, 'server'): + self.server.shutdown() + +# ❌ Don't leave state between tests +class TestSuite: + shared_tracker = Tracker(...) # Shared state! +``` + +### Assertion Patterns +```python +# ✅ Use specific assertions +assert event.page_url == "https://expected.com" +assert "e" in payload.get() +mock_func.assert_called_with(expected_arg) + +# ❌ Avoid generic assertions +assert event # Too vague +assert payload.get() # What are we checking? +``` + +### Mock Management +```python +# ✅ Use patch decorators or context managers +@mock.patch('snowplow_tracker.tracker.uuid.uuid4') +def test_with_mock(mock_uuid): + mock_uuid.return_value = "test-id" + +# ✅ Clean up patches +def create_patch(self, name): + patcher = mock.patch(name) + thing = patcher.start() + self.addCleanup(patcher.stop) + return thing +``` + +## Common Test Scenarios + +### Testing Event Contexts +```python +# ✅ Test context encoding and attachment +def test_event_with_multiple_contexts(): + contexts = [ + SelfDescribingJson(schema1, data1), + SelfDescribingJson(schema2, data2) + ] + event = PageView(page_url="test", context=contexts) + + payload = event.build_payload(True, None, None) + cx_data = json.loads(base64.b64decode(payload.get()["cx"])) + assert len(cx_data["data"]) == 2 +``` + +### Testing Failure Scenarios +```python +# ✅ Test failure callbacks +def test_emitter_failure_callback(): + failed_events = [] + + def on_failure(count, events): + failed_events.extend(events) + + emitter = Emitter( + "https://invalid.collector", + on_failure=on_failure + ) + # Trigger failure and verify callback +``` + +### Testing Async Behavior +```python +# ✅ Test async emitter threading +def test_async_emitter(): + emitter = AsyncEmitter("https://collector.test") + + # Track events + for i in range(100): + emitter.input({"e": "pv", "url": f"test{i}.com"}) + + # Wait for flush + emitter.flush() + time.sleep(1) # Allow async processing + + # Verify all events sent +``` + +## Test Utilities + +### Helper Functions +```python +# ✅ Create reusable test helpers +def create_test_tracker(namespace="test"): + emitter = mock.MagicMock() + return Tracker(namespace, emitter) + +def create_test_event(): + return PageView(page_url="https://test.com") + +# ❌ Don't duplicate test setup +def test_one(): + emitter = mock.MagicMock() + tracker = Tracker("test", emitter) + # ... repeated in every test +``` + +## Performance Testing + +### Load Testing Pattern +```python +# ✅ Test tracker under load +def test_high_volume_tracking(): + tracker = create_test_tracker() + + start = time.time() + for i in range(10000): + tracker.track(PageView(page_url=f"test{i}.com")) + + duration = time.time() - start + assert duration < 5.0 # Performance threshold +``` + +## Quick Reference + +### Test File Naming +- Unit tests: `test_.py` +- Integration tests: `test_integration_.py` +- Test classes: `Test` +- Test methods: `test_` + +### Essential Test Imports +```python +import unittest +import unittest.mock as mock +from freezegun import freeze_time +from snowplow_tracker.contracts import ContractsDisabled +``` + +### Common Mock Targets +- `snowplow_tracker.tracker.Tracker.get_uuid` +- `requests.post` / `requests.get` +- `time.time` +- `snowplow_tracker.emitters.Emitter.sync_flush` + +## Contributing to test/CLAUDE.md + +When adding or modifying tests: + +1. **Maintain test isolation** - Each test should be independent +2. **Mock external dependencies** - No real network calls in unit tests +3. **Use descriptive test names** - Clear what is being tested +4. **Test both success and failure paths** - Include edge cases +5. **Keep tests fast** - Mock time-consuming operations +6. **Document complex test scenarios** - Add comments for clarity \ No newline at end of file