diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 10b24303..2a0fcff1 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -7,22 +7,22 @@ on: jobs: version_check: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest outputs: v_tracker: ${{ steps.version.outputs.PYTHON_TRACKER_VERSION}} steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 - name: Get tag and tracker versions id: version run: | - echo ::set-output name=TAG_VERSION::${GITHUB_REF#refs/*/} - echo "##[set-output name=PYTHON_TRACKER_VERSION;]$(python setup.py --version)" + echo "TAG_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT + echo "PYTHON_TRACKER_VERSION=$(python setup.py --version)" >> $GITHUB_OUTPUT - name: Fail if version mismatch if: ${{ steps.version.outputs.TAG_VERSION != steps.version.outputs.PYTHON_TRACKER_VERSION }} @@ -32,7 +32,7 @@ jobs: build: needs: ["version_check"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: @@ -40,10 +40,10 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -57,26 +57,26 @@ jobs: python setup.py sdist bdist_wheel - name: Upload artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: distfiles_${{ github.run_id }} path: dist publish: needs: ["build"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.x' - name: Download artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: distfiles_${{ github.run_id }} path: ${{ github.workspace }}/dist @@ -96,11 +96,11 @@ jobs: release: needs: ["publish", "version_check"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Release uses: softprops/action-gh-release@v1 @@ -108,6 +108,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ github.ref }} - name: Snowplow Python Tracker v${{ needs.version_check.outputs.v_tracker }} + name: Version ${{ needs.version_check.outputs.v_tracker }} draft: false prerelease: ${{ contains(needs.version_check.outputs.v_tracker, 'rc') }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f92176df..9c14d2fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,30 +8,25 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8, 3.9, "3.10"] - extras-required: [".", ".[redis]"] - - services: - redis: - image: redis - options: >- - --health-cmd "redis-cli ping" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 6379:6379 + python-version: + - 3.8 + - 3.9 + - "3.10" + - "3.11" + - "3.12" + - "3.13" steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -42,7 +37,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade "${{ env.pip_v }}" setuptools wheel - python -m pip install -e ${{ matrix.extras-required }} + python -m pip install -e . python -m pip install -r requirements-test.txt - name: Build @@ -52,12 +47,24 @@ jobs: - name: Tests run: | pytest --cov=snowplow_tracker --cov-report=xml + + - name: MyPy + run: | + python -m pip install -e .[typing] + mypy snowplow_tracker --exclude '/test' - name: Demo run: | cd examples + cd tracker_api_example python app.py "localhost:9090" + - name: Snowplow Demo + run: | + cd examples + cd snowplow_api_example + python snowplow_app.py "localhost:9090" + - name: Coveralls uses: AndreMiras/coveralls-python-action@develop with: @@ -65,7 +72,7 @@ jobs: coveralls_finish: needs: ["build"] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Coveralls finished diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index eefd7831..c50ac5e2 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -8,7 +8,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 - uses: ammaraskar/sphinx-action@master with: docs-folder: "docs/" diff --git a/.github/workflows/snyk.yml b/.github/workflows/snyk.yml index 7e86a6ba..b2e36c27 100644 --- a/.github/workflows/snyk.yml +++ b/.github/workflows/snyk.yml @@ -9,11 +9,11 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: snyk/actions/setup@master - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.8 - name: Install dependencies diff --git a/CHANGES.txt b/CHANGES.txt index 7139b7c5..6a56dedb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,69 @@ +Version 1.1.0 (2025-02-20) +-------------------------- +Bump Ubuntu Version in GH Actions (#375) +Avoid installing types-requests at run-time (#370) (Thanks to @edgarrmondragon) + +Version 1.0.4 (2024-11-19) +-------------------------- +Test with Python 3.13 (#365) (Thanks to @edgarrmondragon) +Shorten automatic github release title (#352) + +Version 1.0.3 (2024-08-27) +-------------------------- +Fix docs action (close #367) +Update `on_success` docstring (close #358) +Add py.typed to package (close #360) (Thanks to @edgarrmondragon) +Update typing +Fix `PagePing`, `PageView`, and `StructuredEvent` property getters (close #361) + +Version 1.0.2 (2024-02-26) +-------------------------- +Add Python 3.12 to CI tests (#356) (Thanks to @edgarrmondragon) + +Version 1.0.1 (2023-07-12) +-------------------------- +Fix tstamp parameter in track_self_describing_event (#350) (Thanks to @andehen) + +Version 1.0.0 (2023-06-16) +-------------------------- +Remove Redis and Celery Emitters (#335) +Make tracker namespace mandatory (#337) +Track function to return event_id (#338) +Fix namespace assignment in Snowplow API (#341) +Refactor track_xxx() methods (#343) +Update payload builder to combine event subjects (#347) + +Version 0.15.0 (2023-04-19) +--------------------------- +Use Requests Session for sending eventss (#221) +Add Redis example app (#322) + +Version 0.14.0 (2023-03-21) +--------------------------- +Adds deprecation warnings for V1 changes (#315) +Update GH actions to use Node16 (#317) +Adds event store parameter to Snowplow interface (#320) +Adds missing parameters to async emitter (#323) + +Version 0.13.0 (2023-01-24) +--------------------------- +Adds Snowplow Interface (#295) +Adds retry for failed events (#296) +Adds customisable retry codes (#297) +Adds EventStore with max limit (#309) +Adds Snowplow Example App (#302) +Fix Collector URL with trailing '/' (#300) +Rename unstruct_event to self_describing_event (#298) +Upgrade `set-output` in cd (#294) + +Version 0.12.0 (2022-11-03) +--------------------------- +Adds Domain Session ID and Domain Session Index to Subject class (#282) (Thanks to @cpnat) +Add support for Python 3.11 (#286) +Change default protocol to HTTPS in the Emitter (#14) +Change default method to POST in the Emitter (#289) +Update Docker base image (#283) (Thanks to @cpnat) + Version 0.11.0 (2022-10-06) --------------------------- Update README file (#264) diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..dd4a535e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,370 @@ +# Snowplow Python Tracker - CLAUDE.md + +## Project Overview + +The Snowplow Python Tracker is a public Python library for sending analytics events to Snowplow collectors. It enables developers to integrate Snowplow analytics into Python applications, games, and web servers. The library provides a robust event tracking system with support for various event types, custom contexts, and reliable event delivery through configurable emitters. + +**Key Technologies:** +- Python 3.8+ (supported versions: 3.8-3.13) +- requests library for HTTP communication +- typing_extensions for enhanced type hints +- Event-driven architecture with schema validation +- Asynchronous and synchronous event emission + +## Development Commands + +```bash +# Install dependencies +pip install -r requirements-test.txt + +# Run tests +./run-tests.sh + +# Run specific test module +python -m pytest snowplow_tracker/test/unit/test_tracker.py + +# Run integration tests +python -m pytest snowplow_tracker/test/integration/ + +# Install package in development mode +pip install -e . + +# Build Docker image for testing +docker build -t snowplow-python-tracker . +docker run snowplow-python-tracker +``` + +## Architecture + +The tracker follows a layered architecture with clear separation of concerns: + +``` +snowplow_tracker/ +├── Core Components +│ ├── tracker.py # Main Tracker class orchestrating events +│ ├── snowplow.py # High-level API for tracker management +│ └── subject.py # User/device context management +├── Event Layer (events/) +│ ├── event.py # Base Event class +│ ├── page_view.py # PageView event +│ ├── structured_event.py # Structured events +│ └── self_describing.py # Custom schema events +├── Emission Layer +│ ├── emitters.py # Sync/Async event transmission +│ ├── event_store.py # Event buffering and persistence +│ └── payload.py # Event payload construction +├── Configuration +│ ├── tracker_configuration.py +│ └── emitter_configuration.py +└── Validation + ├── contracts.py # Runtime validation + └── typing.py # Type definitions +``` + +## Core Architectural Principles + +1. **Schema-First Design**: All events conform to Iglu schemas for consistency +2. **Separation of Concerns**: Event creation, validation, and emission are separate +3. **Configuration Objects**: Use dedicated configuration classes, not raw dictionaries +4. **Type Safety**: Extensive use of type hints and Protocol classes +5. **Fail-Safe Delivery**: Events are buffered and retried on failure +6. **Immutability**: Event objects are largely immutable after creation + +## Layer Organization & Responsibilities + +### Application Layer (snowplow.py) +- Singleton pattern for global tracker management +- Factory methods for tracker creation +- Namespace-based tracker registry + +### Domain Layer (tracker.py, events/) +- Event creation and validation +- Subject (user/device) context management +- Event enrichment with standard fields + +### Infrastructure Layer (emitters.py, event_store.py) +- HTTP communication with collectors +- Event buffering and retry logic +- Async/sync emission strategies + +### Cross-Cutting (contracts.py, typing.py) +- Runtime validation with togglable contracts +- Shared type definitions and protocols + +## Critical Import Patterns + +```python +# ✅ Import from package root for public API +from snowplow_tracker import Snowplow, Tracker, Subject +from snowplow_tracker import EmitterConfiguration, TrackerConfiguration + +# ✅ Import specific event classes +from snowplow_tracker.events import PageView, StructuredEvent + +# ❌ Don't import from internal modules +from snowplow_tracker.emitters import Requester # Internal class + +# ✅ Use typing module for type hints +from snowplow_tracker.typing import PayloadDict, Method +``` + +## Essential Library Patterns + +### Tracker Initialization Pattern +```python +# ✅ Use Snowplow factory with configuration objects +tracker = Snowplow.create_tracker( + namespace="my_app", + endpoint="https://collector.example.com", + tracker_config=TrackerConfiguration(encode_base64=True), + emitter_config=EmitterConfiguration(batch_size=10) +) + +# ❌ Don't instantiate Tracker directly without Snowplow +tracker = Tracker("namespace", emitter) # Missing registration +``` + +### Event Creation Pattern +```python +# ✅ Use event classes with named parameters +page_view = PageView( + page_url="https://example.com", + page_title="Homepage" +) + +# ✅ Add contexts to events +event.context = [SelfDescribingJson(schema, data)] + +# ❌ Don't modify event payload directly +event.payload.add("custom", "value") # Breaks schema validation +``` + +### Subject Management Pattern +```python +# ✅ Set subject at tracker or event level +subject = Subject() +subject.set_user_id("user123") +tracker = Snowplow.create_tracker(..., subject=subject) + +# ✅ Override subject per event +event = PageView(..., event_subject=Subject()) + +# ❌ Don't modify subject after tracker creation +tracker.subject.set_user_id("new_id") # Not thread-safe +``` + +### Emitter Configuration Pattern +```python +# ✅ Configure retry and buffering behavior +config = EmitterConfiguration( + batch_size=50, + buffer_capacity=10000, + custom_retry_codes={429: True, 500: True} +) + +# ❌ Don't use magic numbers +emitter = Emitter(endpoint, 443, "post", 100) # Use config object +``` + +## Model Organization Pattern + +### Event Hierarchy +```python +Event (base class) +├── PageView # Web page views +├── PagePing # Page engagement tracking +├── ScreenView # Mobile screen views +├── StructuredEvent # Category/action/label/property/value events +└── SelfDescribing # Custom schema events +``` + +### Data Structures +```python +# SelfDescribingJson for custom contexts +context = SelfDescribingJson( + "iglu:com.example/context/jsonschema/1-0-0", + {"key": "value"} +) + +# Payload for event data assembly +payload = Payload() +payload.add("e", "pv") # Event type +payload.add_dict({"aid": "app_id"}) +``` + +## Common Pitfalls & Solutions + +### Contract Validation +```python +# ❌ Passing invalid parameters silently fails in production +tracker.track_page_view("") # Empty URL + +# ✅ Enable contracts during development +from snowplow_tracker import enable_contracts +enable_contracts() +``` + +### Event Buffering +```python +# ❌ Not flushing events before shutdown +tracker.track(event) +sys.exit() # Events lost! + +# ✅ Always flush before exit +tracker.track(event) +tracker.flush() +``` + +### Thread Safety +```python +# ❌ Sharing emitter across threads +emitter = Emitter(endpoint) +# Multiple threads using same emitter + +# ✅ Use AsyncEmitter for concurrent scenarios +emitter = AsyncEmitter(endpoint, thread_count=2) +``` + +### Schema Validation +```python +# ❌ Hardcoding schema strings +schema = "iglu:com.snowplow/event/1-0-0" + +# ✅ Use constants for schemas +from snowplow_tracker.constants import CONTEXT_SCHEMA +``` + +## File Structure Template + +``` +project/ +├── tracker_app.py # Application entry point +├── config/ +│ └── tracker_config.py # Tracker configuration +├── events/ +│ ├── __init__.py +│ └── custom_events.py # Custom event definitions +├── contexts/ +│ └── custom_contexts.py # Custom context schemas +└── tests/ + ├── unit/ + │ └── test_events.py + └── integration/ + └── test_emission.py +``` + +## Testing Patterns + +### Unit Testing +```python +# ✅ Mock emitters for unit tests +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_track_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(...)) + mock_emitter.input.assert_called_once() +``` + +### Contract Testing +```python +# ✅ Use ContractsDisabled context manager +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) +``` + +### Integration Testing +```python +# ✅ Test against mock collector +def test_event_delivery(): + with requests_mock.Mocker() as m: + m.post("https://collector.test/com.snowplow/tp2") + # Track and verify delivery +``` + +## Configuration Best Practices + +### Environment-Based Configuration +```python +# ✅ Use environment variables +import os +endpoint = os.getenv("SNOWPLOW_COLLECTOR_URL") +namespace = os.getenv("SNOWPLOW_NAMESPACE", "default") +``` + +### Retry Configuration +```python +# ✅ Configure intelligent retry behavior +EmitterConfiguration( + max_retry_delay_seconds=120, + custom_retry_codes={ + 429: True, # Retry rate limits + 500: True, # Retry server errors + 400: False # Don't retry bad requests + } +) +``` + +## Quick Reference + +### Import Checklist +- [ ] Import from `snowplow_tracker` package root +- [ ] Use `EmitterConfiguration` and `TrackerConfiguration` +- [ ] Import specific event classes from `snowplow_tracker.events` +- [ ] Use type hints from `snowplow_tracker.typing` + +### Event Tracking Checklist +- [ ] Create tracker with `Snowplow.create_tracker()` +- [ ] Configure emitter with appropriate batch size +- [ ] Set subject context if tracking users +- [ ] Use appropriate event class for the use case +- [ ] Add custom contexts as `SelfDescribingJson` +- [ ] Call `flush()` before application shutdown +- [ ] Handle failures with callbacks + +### Common Event Types +- `PageView`: Web page views +- `ScreenView`: Mobile app screens +- `StructuredEvent`: Generic events with 5 parameters +- `SelfDescribing`: Custom schema events +- `PagePing`: Engagement tracking + +## Contributing to CLAUDE.md + +When adding or updating content in this document, please follow these guidelines: + +### File Size Limit +- **CLAUDE.md must not exceed 40KB** (currently ~19KB) +- Check file size after updates: `wc -c CLAUDE.md` +- Remove outdated content if approaching the limit + +### Code Examples +- Keep all code examples **4 lines or fewer** +- Focus on the essential pattern, not complete implementations +- Use `// ❌` and `// ✅` to clearly show wrong vs right approaches + +### Content Organization +- Add new patterns to existing sections when possible +- Create new sections sparingly to maintain structure +- Update the architectural principles section for major changes +- Ensure examples follow current codebase conventions + +### Quality Standards +- Test any new patterns in actual code before documenting +- Verify imports and syntax are correct for the codebase +- Keep language concise and actionable +- Focus on "what" and "how", minimize "why" explanations + +### Multiple CLAUDE.md Files +- **Directory-specific CLAUDE.md files** can be created for specialized modules +- Follow the same structure and guidelines as this root CLAUDE.md +- Keep them focused on directory-specific patterns and conventions +- Maximum 20KB per directory-specific CLAUDE.md file + +### Instructions for LLMs +When editing files in this repository, **always check for CLAUDE.md guidance**: + +1. **Look for CLAUDE.md in the same directory** as the file being edited +2. **If not found, check parent directories** recursively up to project root +3. **Follow the patterns and conventions** described in the applicable CLAUDE.md +4. **Prioritize directory-specific guidance** over root-level guidance when conflicts exist \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 9d0577e8..d79a72cd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,16 @@ -FROM centos:8 -RUN cd /etc/yum.repos.d/ -RUN sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* -RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* +FROM debian:bullseye-slim + +RUN apt-get update && apt-get install -y --no-install-recommends make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev \ + libsqlite3-dev wget curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ + mecab-ipadic-utf8 git ca-certificates -RUN yum -y install wget -RUN yum install -y epel-release -RUN yum -y install git tar gcc make bzip2 openssl openssl-devel patch gcc-c++ libffi-devel sqlite-devel -RUN git clone https://github.com/yyuu/pyenv.git ~/.pyenv ENV HOME /root ENV PYENV_ROOT $HOME/.pyenv ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH +RUN git clone --depth=1 https://github.com/pyenv/pyenv.git $PYENV_ROOT +RUN git clone --depth=1 https://github.com/pyenv/pyenv-virtualenv.git $PYENV_ROOT/plugins/pyenv-virtualenv -RUN pyenv install 3.5.10 && pyenv install 3.6.14 && pyenv install 3.7.11 && pyenv install 3.8.11 && pyenv install 3.9.6 && pyenv install 3.10.1 -RUN git clone https://github.com/pyenv/pyenv-virtualenv.git ~/.pyenv/plugins/pyenv-virtualenv +RUN pyenv install 3.5.10 && pyenv install 3.6.15 && pyenv install 3.7.17 && pyenv install 3.8.20 && pyenv install 3.9.20 && pyenv install 3.10.15 && pyenv install 3.11.10 && pyenv install 3.12.7 && pyenv install 3.13.0 WORKDIR /app COPY . . diff --git a/LICENSE b/LICENSE index ec02d7cc..db047f7e 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2013-2022 Snowplow Analytics Ltd. + Copyright 2013-2023 Snowplow Analytics Ltd. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 114aecb7..694d3ce9 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Python Support | Python version | snowplow-tracker version | | :----: | :----: | +| \>=3.8 | > 1.1.0 | | \>=3.5 | > 0.10.0 | | 2.7 | > 0.9.1 | @@ -47,7 +48,7 @@ Assuming [docker](https://www.docker.com/) is installed Copyright and license --------------------- -The Snowplow Python Tracker is copyright 2013-2022 Snowplow Analytics +The Snowplow Python Tracker is copyright 2013-2023 Snowplow Analytics Ltd. Licensed under the [Apache License, Version diff --git a/docs/requirements.txt b/docs/requirements.txt index 229d0ff5..36f69b0c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,15 @@ -sphinx -sphinx_rtd_theme -sphinx_copybutton -sphinx_minipres -sphinx_tabs -sphinx_togglebutton>=0.2.0 -sphinx-autobuild + +sphinx==7.1.2 +sphinx_rtd_theme==2.0.0 +sphinx_copybutton==0.5.2 +sphinx_minipres==0.2.1 +sphinx_tabs==3.4.5 + +sphinx_togglebutton==0.3.2 +# Transitive dependency of togglebutton causing: +# https://security.snyk.io/vuln/SNYK-PYTHON-SETUPTOOLS-7448482 +setuptools==70.0.0 + +sphinx-autobuild==2021.3.14 myst_nb>0.8.3 -sphinx_rtd_theme_ext_color_contrast \ No newline at end of file +sphinx_rtd_theme_ext_color_contrast==0.3.2 diff --git a/docs/source/conf.py b/docs/source/conf.py index c1bab387..88d210c3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,11 +24,11 @@ # -- Project information ----------------------------------------------------- project = 'Snowplow Python Tracker' -copyright = '2022, Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' +copyright = "2023, Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene" author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = '0.11' +release = "1.1.0" # -- General configuration --------------------------------------------------- @@ -60,4 +60,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ['_static'] diff --git a/examples/app.py b/examples/app.py deleted file mode 100644 index 829055d0..00000000 --- a/examples/app.py +++ /dev/null @@ -1,38 +0,0 @@ -from distutils.log import error -from snowplow_tracker import Tracker, Emitter, Subject, SelfDescribingJson -import sys - - -def get_url_from_args(): - if len(sys.argv) != 2: - raise ValueError("Collector Endpoint is required") - return sys.argv[1] - - -def main(): - collector_url = get_url_from_args() - - e = Emitter(collector_url) - - s = Subject().set_platform("pc") - s.set_lang("en").set_user_id("test_user") - - t = Tracker(e, s) - - print("Sending events to " + collector_url) - - t.track_page_view("https://www.snowplow.io", "Homepage") - t.track_page_ping("https://www.snowplow.io", "Homepage") - t.track_link_click("https://www.snowplow.io") - - t.track_self_describing_event( - SelfDescribingJson( - "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", - {"targetUrl": "example.com"}, - ) - ) - t.track_struct_event("shop", "add-to-basket", None, "pcs", 2) - - -if __name__ == "__main__": - main() diff --git a/examples/redis_example/README.md b/examples/redis_example/README.md new file mode 100644 index 00000000..59b1aeb7 --- /dev/null +++ b/examples/redis_example/README.md @@ -0,0 +1,26 @@ +# Redis Example App + +This example shows how to set up the Python tracker with a Redis database and a Redis worker to forward events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +- Install redis for your machine. More information can be found [here](https://redis.io/docs/getting-started/installation/) + +`brew install redis` + +- Run `redis-server` to check your redis installation, to stop the server enter `ctrl+c`. + +#### Usage +Navigate to the example folder. + +`cd examples/redis_example` + +This example has two programmes, `redis_app.py` tracks events and sends them to a redis database, `redis_worker.py` then forwards these events onto a Snowplow pipeline. + +To send events to your pipeline, run `redis-server`, followed by the `redis_worker.py {{your_collector_endpoint}}` and finally `redis_app.py`. You should see 3 events in your pipleine. + + + diff --git a/examples/redis_example/redis_app.py b/examples/redis_example/redis_app.py new file mode 100644 index 00000000..553a547f --- /dev/null +++ b/examples/redis_example/redis_app.py @@ -0,0 +1,87 @@ +from snowplow_tracker import ( + Tracker, + ScreenView, + PagePing, + PageView, + SelfDescribing, + StructuredEvent, + SelfDescribingJson, +) +from snowplow_tracker.typing import PayloadDict +import json +import redis +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class RedisEmitter(object): + """ + Sends Snowplow events to a Redis database + """ + + def __init__(self, rdb=None, key: str = "redis_key") -> None: + """ + :param rdb: Optional custom Redis database + :type rdb: redis | None + :param key: The Redis key for the list of events + :type key: string + """ + + if rdb is None: + rdb = redis.StrictRedis() + + self.rdb = rdb + self.key = key + + def input(self, payload: PayloadDict) -> None: + """ + :param payload: The event properties + :type payload: dict(string:*) + """ + logger.info("Pushing event to Redis queue...") + self.rdb.rpush(self.key, json.dumps(payload)) + logger.info("Finished sending event to Redis.") + + def flush(self) -> None: + logger.warning("The RedisEmitter class does not need to be flushed") + return + + def sync_flush(self) -> None: + self.flush() + + +def main(): + emitter = RedisEmitter() + + t = Tracker(namespace="snowplow_tracker", emitters=emitter) + + page_view = PageView(page_url="https://www.snowplow.io", page_title="Homepage") + t.track(page_view) + + page_ping = PagePing(page_url="https://www.snowplow.io", page_title="Homepage") + t.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ) + ) + t.track(link_click) + + id = t.get_uuid() + screen_view = ScreenView(id_=id, name="name") + t.track(screen_view) + + struct_event = StructuredEvent( + category="shop", action="add-to-basket", property_="pcs", value=2 + ) + t.track(struct_event) + + +if __name__ == "__main__": + main() diff --git a/examples/redis_example/redis_worker.py b/examples/redis_example/redis_worker.py new file mode 100644 index 00000000..6a190683 --- /dev/null +++ b/examples/redis_example/redis_worker.py @@ -0,0 +1,74 @@ +import sys +from snowplow_tracker import Emitter +from typing import Any +from snowplow_tracker.typing import PayloadDict +import json +import redis +import signal +import gevent +from gevent.pool import Pool + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +class RedisWorker: + def __init__(self, emitter: Emitter, key) -> None: + self.pool = Pool(5) + self.emitter = emitter + self.rdb = redis.StrictRedis() + self.key = key + + signal.signal(signal.SIGTERM, self.request_shutdown) + signal.signal(signal.SIGINT, self.request_shutdown) + signal.signal(signal.SIGQUIT, self.request_shutdown) + + def send(self, payload: PayloadDict) -> None: + """ + Send an event to an emitter + """ + self.emitter.input(payload) + + def pop_payload(self) -> None: + """ + Get a single event from Redis and send it + If the Redis queue is empty, sleep to avoid making continual requests + """ + payload = self.rdb.lpop(self.key) + if payload: + self.pool.spawn(self.send, json.loads(payload.decode("utf-8"))) + else: + gevent.sleep(5) + + def run(self) -> None: + """ + Run indefinitely + """ + self._shutdown = False + while not self._shutdown: + self.pop_payload() + self.pool.join(timeout=20) + + def request_shutdown(self, *args: Any) -> None: + """ + Halt the worker + """ + self._shutdown = True + + +def main(): + collector_url = get_url_from_args() + + # Configure Emitter + emitter = Emitter(collector_url, batch_size=1) + + # Setup worker + worker = RedisWorker(emitter=emitter, key="redis_key") + worker.run() + + +if __name__ == "__main__": + main() diff --git a/examples/redis_example/requirements.txt b/examples/redis_example/requirements.txt new file mode 100644 index 00000000..ac10dd44 --- /dev/null +++ b/examples/redis_example/requirements.txt @@ -0,0 +1,2 @@ +redis~=4.5 +gevent~=22.10 \ No newline at end of file diff --git a/examples/snowplow_api_example/README.md b/examples/snowplow_api_example/README.md new file mode 100644 index 00000000..6819757b --- /dev/null +++ b/examples/snowplow_api_example/README.md @@ -0,0 +1,18 @@ +# Snowplow API Example App + +This example shows how to set up the Python tracker with the Snowplow API to send events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +#### Usage +Navigate to the example folder. + +`cd examples/snowplow_api_example` + +To send events to your pipeline, run `snowplow_app.py {{your_collector_endpoint}}`. You should see 6 events in your pipleine. + + + diff --git a/examples/snowplow_api_example/snowplow_app.py b/examples/snowplow_api_example/snowplow_app.py new file mode 100644 index 00000000..1bbd21c8 --- /dev/null +++ b/examples/snowplow_api_example/snowplow_app.py @@ -0,0 +1,74 @@ +import sys +from snowplow_tracker import ( + Snowplow, + EmitterConfiguration, + Subject, + TrackerConfiguration, + SelfDescribingJson, + PagePing, + PageView, + ScreenView, + SelfDescribing, + StructuredEvent, +) + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +def main(): + collector_url = get_url_from_args() + # Configure Emitter + custom_retry_codes = {500: False, 401: True} + emitter_config = EmitterConfiguration( + batch_size=5, custom_retry_codes=custom_retry_codes + ) + + # Configure Tracker + tracker_config = TrackerConfiguration(encode_base64=True) + + # Initialise subject + subject = Subject() + subject.set_user_id("uid") + + Snowplow.create_tracker( + namespace="ns", + endpoint=collector_url, + app_id="app1", + subject=subject, + tracker_config=tracker_config, + emitter_config=emitter_config, + ) + + tracker = Snowplow.get_tracker("ns") + + page_view = PageView(page_url="https://www.snowplow.io", page_title="Homepage") + tracker.track(page_view) + + page_ping = PagePing(page_url="https://www.snowplow.io", page_title="Homepage") + tracker.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ) + ) + tracker.track(link_click) + + id = tracker.get_uuid() + screen_view = ScreenView(id_=id, name="name") + tracker.track(screen_view) + + struct_event = StructuredEvent( + category="shop", action="add-to-basket", property_="pcs", value=2 + ) + tracker.track(struct_event) + tracker.flush() + + +if __name__ == "__main__": + main() diff --git a/examples/tracker_api_example/README.md b/examples/tracker_api_example/README.md new file mode 100644 index 00000000..10392b17 --- /dev/null +++ b/examples/tracker_api_example/README.md @@ -0,0 +1,18 @@ +# Example App + +This example shows how to set up the Python tracker with the tracker API to send events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +#### Usage +Navigate to the example folder. + +`cd examples/tracker_api_example` + +To send events to your pipeline, run `app.py {{your_collector_endpoint}}`. You should see 5 events in your pipleine. + + + diff --git a/examples/tracker_api_example/app.py b/examples/tracker_api_example/app.py new file mode 100644 index 00000000..41f520ce --- /dev/null +++ b/examples/tracker_api_example/app.py @@ -0,0 +1,76 @@ +from distutils.log import error +from snowplow_tracker import ( + Tracker, + Emitter, + Subject, + SelfDescribingJson, + PageView, + PagePing, + SelfDescribing, + ScreenView, + StructuredEvent, +) +import sys + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +def main(): + collector_url = get_url_from_args() + + e = Emitter(collector_url) + + s = Subject().set_platform("pc") + s.set_lang("en").set_user_id("test_user") + + t = Tracker(namespace="snowplow_tracker", emitters=e, subject=s) + + print("Sending events to " + e.endpoint) + + event_subject = Subject() + event_subject.set_color_depth(10) + + page_view = PageView( + page_url="https://www.snowplow.io", + page_title="Homepage", + event_subject=event_subject, + ) + t.track(page_view) + + page_ping = PagePing( + page_url="https://www.snowplow.io", + page_title="Homepage", + event_subject=t.subject, + ) + t.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ), + event_subject=t.subject, + ) + t.track(link_click) + + id = t.get_uuid() + screen_view = ScreenView(id_=id, name="name", event_subject=t.subject) + t.track(screen_view) + + struct_event = StructuredEvent( + category="shop", + action="add-to-basket", + property_="pcs", + value=2, + event_subject=t.subject, + ) + t.track(struct_event) + t.flush() + + +if __name__ == "__main__": + main() diff --git a/requirements-test.txt b/requirements-test.txt index 668dae79..cde305f6 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,8 +1,8 @@ pytest==4.6.11; python_version < '3.10.0' -pytest==6.2.5; python_version >= '3.10.0' +pytest==8.3.2; python_version >= '3.10.0' attrs==21.2.0 httmock==1.4.0 -freezegun==1.1.0 +freezegun==1.1.0; python_version < '3.13' +freezegun==1.5.1; python_version >= '3.13' pytest-cov coveralls==3.3.1 -fakeredis==1.7.0 diff --git a/run-tests.sh b/run-tests.sh index 477f3f7e..bb938e85 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -24,95 +24,74 @@ function deploy { source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker35redis ]; then - pyenv virtualenv 3.5.10 tracker35redis - pyenv activate tracker35redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - - # pyenv install 3.6.14 + # pyenv install 3.6.15 if [ ! -e ~/.pyenv/versions/tracker36 ]; then - pyenv virtualenv 3.6.14 tracker36 + pyenv virtualenv 3.6.15 tracker36 pyenv activate tracker36 pip install . pip install -r requirements-test.txt source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker36redis ]; then - pyenv virtualenv 3.6.14 tracker36redis - pyenv activate tracker36redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - - # pyenv install 3.7.11 + # pyenv install 3.7.17 if [ ! -e ~/.pyenv/versions/tracker37 ]; then - pyenv virtualenv 3.7.11 tracker37 + pyenv virtualenv 3.7.17 tracker37 pyenv activate tracker37 pip install . pip install -r requirements-test.txt source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker37redis ]; then - pyenv virtualenv 3.7.11 tracker37redis - pyenv activate tracker37redis - pip install .[redis] - pip install -r requirements-test.txt - source deactivate - fi - - # pyenv install 3.8.11 + # pyenv install 3.8.20 if [ ! -e ~/.pyenv/versions/tracker38 ]; then - pyenv virtualenv 3.8.11 tracker38 + pyenv virtualenv 3.8.20 tracker38 pyenv activate tracker38 pip install . pip install -r requirements-test.txt source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker38redis ]; then - pyenv virtualenv 3.8.11 tracker38redis - pyenv activate tracker38redis - pip install .[redis] + # pyenv install 3.9.20 + if [ ! -e ~/.pyenv/versions/tracker39 ]; then + pyenv virtualenv 3.9.20 tracker39 + pyenv activate tracker39 + pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.9.6 - if [ ! -e ~/.pyenv/versions/tracker39 ]; then - pyenv virtualenv 3.9.6 tracker39 - pyenv activate tracker39 + # pyenv install 3.10.15 + if [ ! -e ~/.pyenv/versions/tracker310 ]; then + pyenv virtualenv 3.10.15 tracker310 + pyenv activate tracker310 pip install . pip install -r requirements-test.txt source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker39redis ]; then - pyenv virtualenv 3.9.6 tracker39redis - pyenv activate tracker39redis - pip install .[redis] + # pyenv install 3.11.10 + if [ ! -e ~/.pyenv/versions/tracker311 ]; then + pyenv virtualenv 3.11.10 tracker311 + pyenv activate tracker311 + pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.10.1 - if [ ! -e ~/.pyenv/versions/tracker310 ]; then - pyenv virtualenv 3.10.1 tracker310 - pyenv activate tracker310 + # pyenv install 3.12.7 + if [ ! -e ~/.pyenv/versions/tracker312 ]; then + pyenv virtualenv 3.12.7 tracker312 + pyenv activate tracker312 pip install . pip install -r requirements-test.txt source deactivate fi - if [ ! -e ~/.pyenv/versions/tracker310redis ]; then - pyenv virtualenv 3.10.1 tracker310redis - pyenv activate tracker310redis - pip install .[redis] + # pyenv install 3.13.0 + if [ ! -e ~/.pyenv/versions/tracker313 ]; then + pyenv virtualenv 3.13.0 tracker313 + pyenv activate tracker313 + pip install . pip install -r requirements-test.txt source deactivate fi @@ -124,64 +103,49 @@ function run_tests { pytest source deactivate - pyenv activate tracker35redis - pytest - source deactivate - pyenv activate tracker36 pytest source deactivate - pyenv activate tracker36redis - pytest - source deactivate - pyenv activate tracker37 pytest source deactivate - pyenv activate tracker37redis - pytest - source deactivate - pyenv activate tracker38 pytest source deactivate - pyenv activate tracker38redis + pyenv activate tracker39 pytest source deactivate - pyenv activate tracker39 + pyenv activate tracker310 pytest source deactivate - pyenv activate tracker39redis + pyenv activate tracker311 pytest source deactivate - pyenv activate tracker310 + pyenv activate tracker312 pytest source deactivate - pyenv activate tracker310redis + pyenv activate tracker313 pytest source deactivate } function refresh_deploy { pyenv uninstall -f tracker35 - pyenv uninstall -f tracker35redis pyenv uninstall -f tracker36 - pyenv uninstall -f tracker36redis pyenv uninstall -f tracker37 - pyenv uninstall -f tracker37redis pyenv uninstall -f tracker38 - pyenv uninstall -f tracker38redis pyenv uninstall -f tracker39 - pyenv uninstall -f tracker39redis pyenv uninstall -f tracker310 - pyenv uninstall -f tracker310redis + pyenv uninstall -f tracker311 + pyenv uninstall -f tracker312 + pyenv uninstall -f tracker313 } diff --git a/setup.py b/setup.py index 6dd3bc73..efaf6536 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ # # setup.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,11 +13,7 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 -# +# """ #!/usr/bin/env python # -*- coding: utf-8 -*- @@ -27,7 +23,14 @@ except ImportError: from distutils.core import setup -authors_list = ["Anuj More", "Alexander Dean", "Fred Blundun", "Paul Boocock"] +authors_list = [ + "Anuj More", + "Alexander Dean", + "Fred Blundun", + "Paul Boocock", + "Matus Tomlein", + "Jack Keene", +] authors_str = ", ".join(authors_list) authors_email_list = [ @@ -37,15 +40,11 @@ setup( name="snowplow-tracker", - version="0.11.0", + version="1.1.0", author=authors_str, author_email=authors_email_str, - packages=[ - "snowplow_tracker", - "snowplow_tracker.test", - "snowplow_tracker.redis", - "snowplow_tracker.celery", - ], + packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], + package_data={"snowplow_tracker": ["py.typed"]}, url="http://snowplow.io", license="Apache License 2.0", description="Snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games", @@ -57,24 +56,22 @@ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", ], - install_requires=["requests>=2.25.1,<3.0", "typing_extensions>=3.7.4"], + install_requires=[ + "requests>=2.25.1,<3.0", + "typing_extensions>=3.7.4", + ], extras_require={ - "celery": [ - "celery>=4.0,<5.0;python_version<'3.0'", - "celery>=4.0;python_version>='3.0'", - ], - "redis": [ - "redis>=2.9.1,<4.0;python_version<'3.0'", - "redis>=2.9.1;python_version>='3.0'", - "gevent>=21.1.2", + "typing": [ + "mypy>=0.971", + "types-requests>=2.25.1,<3.0", ], }, ) diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index 3d618f9f..689b2539 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -3,10 +3,16 @@ from snowplow_tracker.emitters import logger, Emitter, AsyncEmitter from snowplow_tracker.self_describing_json import SelfDescribingJson from snowplow_tracker.tracker import Tracker +from snowplow_tracker.emitter_configuration import EmitterConfiguration +from snowplow_tracker.tracker_configuration import TrackerConfiguration +from snowplow_tracker.snowplow import Snowplow from snowplow_tracker.contracts import disable_contracts, enable_contracts - -# celery extra -from .celery import CeleryEmitter - -# redis extra -from .redis import RedisEmitter, RedisWorker +from snowplow_tracker.event_store import EventStore +from snowplow_tracker.events import ( + Event, + PageView, + PagePing, + SelfDescribing, + StructuredEvent, + ScreenView, +) diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 43043c72..f4ff17a0 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -1,7 +1,7 @@ # """ # _version.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,12 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ -__version_info__ = (0, 11, 0) +__version_info__ = (1, 1, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + "" diff --git a/snowplow_tracker/celery/__init__.py b/snowplow_tracker/celery/__init__.py deleted file mode 100644 index 2a4d905a..00000000 --- a/snowplow_tracker/celery/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .celery_emitter import CeleryEmitter diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py deleted file mode 100644 index e7a8efae..00000000 --- a/snowplow_tracker/celery/celery_emitter.py +++ /dev/null @@ -1,85 +0,0 @@ -# """ -# celery_emitter.py - -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - -# This program is licensed to you under the Apache License Version 2.0, -# and you may not use this file except in compliance with the Apache License -# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at -# http://www.apache.org/licenses/LICENSE-2.0. - -# Unless required by applicable law or agreed to in writing, -# software distributed under the Apache License Version 2.0 is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the Apache License Version 2.0 for the specific -# language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 -# """ - -import logging -from typing import Any, Optional - -from snowplow_tracker.emitters import Emitter -from snowplow_tracker.typing import HttpProtocol, Method - -_CELERY_OPT = True -try: - from celery import Celery -except ImportError: - _CELERY_OPT = False - -# logging -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -class CeleryEmitter(Emitter): - """ - Uses a Celery worker to send HTTP requests asynchronously. - Works like the base Emitter class, - but on_success and on_failure callbacks cannot be set. - """ - if _CELERY_OPT: - - celery_app = None - - def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "get", - buffer_size: Optional[int] = None, - byte_limit: Optional[int] = None) -> None: - super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, None, None, byte_limit) - - try: - # Check whether a custom Celery configuration module named "snowplow_celery_config" exists - import snowplow_celery_config - self.celery_app = Celery() - self.celery_app.config_from_object(snowplow_celery_config) - except ImportError: - # Otherwise configure Celery with default settings - self.celery_app = Celery("Snowplow", broker="redis://guest@localhost//") - - self.async_flush = self.celery_app.task(self.async_flush) - - def flush(self) -> None: - """ - Schedules a flush task - """ - self.async_flush.delay() - logger.info("Scheduled a Celery task to flush the event queue") - - def async_flush(self) -> None: - super(CeleryEmitter, self).flush() - - else: - - def __new__(cls, *args: Any, **kwargs: Any) -> 'CeleryEmitter': - logger.error("CeleryEmitter is not available. Please install snowplow-tracker with celery extra dependency.") - raise RuntimeError('CeleryEmitter is not available. To use: `pip install snowplow-tracker[celery]`') diff --git a/snowplow_tracker/constants.py b/snowplow_tracker/constants.py new file mode 100644 index 00000000..53ecc151 --- /dev/null +++ b/snowplow_tracker/constants.py @@ -0,0 +1,27 @@ +# """ +# constants.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from typing import List +from snowplow_tracker import _version, SelfDescribingJson + +VERSION = "py-%s" % _version.__version__ +DEFAULT_ENCODE_BASE64: bool = True # Type hint required for Python 3.6 MyPy check +BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" +MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" +SCHEMA_TAG = "jsonschema" +CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) +UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) +ContextArray = List[SelfDescribingJson] diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py index 2db8b449..3b17e1a3 100644 --- a/snowplow_tracker/contracts.py +++ b/snowplow_tracker/contracts.py @@ -1,7 +1,7 @@ # """ # contracts.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock, Matus Tomlein -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import traceback @@ -45,7 +41,9 @@ def contracts_enabled() -> bool: def greater_than(value: float, compared_to: float) -> None: if contracts_enabled() and value <= compared_to: - raise ValueError("{0} must be greater than {1}.".format(_get_parameter_name(), compared_to)) + raise ValueError( + "{0} must be greater than {1}.".format(_get_parameter_name(), compared_to) + ) def non_empty(seq: Sized) -> None: @@ -78,21 +76,26 @@ def _get_parameter_name() -> str: match = _MATCH_FIRST_PARAMETER_REGEX.search(code) if not match: - return 'Unnamed parameter' - return match.groups(0)[0] + return "Unnamed parameter" + return str(match.groups(0)[0]) def _check_form_element(element: Dict[str, Any]) -> bool: """ - Helper method to check that dictionary conforms element - in sumbit_form and change_form schemas + Helper method to check that dictionary conforms element + in sumbit_form and change_form schemas """ - all_present = isinstance(element, dict) and 'name' in element and 'value' in element and 'nodeName' in element + all_present = ( + isinstance(element, dict) + and "name" in element + and "value" in element + and "nodeName" in element + ) try: - if element['type'] in FORM_TYPES: + if element["type"] in FORM_TYPES: type_valid = True else: type_valid = False except KeyError: type_valid = True - return all_present and element['nodeName'] in FORM_NODE_NAMES and type_valid + return all_present and element["nodeName"] in FORM_NODE_NAMES and type_valid diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py new file mode 100644 index 00000000..82626fa4 --- /dev/null +++ b/snowplow_tracker/emitter_configuration.py @@ -0,0 +1,213 @@ +# """ +# emitter_configuration.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional, Union, Tuple, Dict +from snowplow_tracker.typing import SuccessCallback, FailureCallback +from snowplow_tracker.event_store import EventStore +import requests + + +class EmitterConfiguration(object): + def __init__( + self, + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + Configuration for the emitter that sends events to the Snowplow collector. + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None + :param session: Persist parameters across requests by using a session object + :type session: request.Session | None + """ + + self.batch_size = batch_size + self.on_success = on_success + self.on_failure = on_failure + self.byte_limit = byte_limit + self.request_timeout = request_timeout + self.buffer_capacity = buffer_capacity + self.custom_retry_codes = custom_retry_codes + self.event_store = event_store + self.session = session + + @property + def batch_size(self) -> Optional[int]: + """ + The maximum number of queued events before the buffer is flushed. Default is 10. + """ + return self._batch_size + + @batch_size.setter + def batch_size(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("batch_size must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("batch_size must be of type int") + self._batch_size = value + + @property + def on_success(self) -> Optional[SuccessCallback]: + """ + Callback executed after every HTTP request in a flush has status code 200. Gets passed the number of events flushed. + """ + return self._on_success + + @on_success.setter + def on_success(self, value: Optional[SuccessCallback]): + self._on_success = value + + @property + def on_failure(self) -> Optional[FailureCallback]: + """ + Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + """ + return self._on_failure + + @on_failure.setter + def on_failure(self, value: Optional[FailureCallback]): + self._on_failure = value + + @property + def byte_limit(self) -> Optional[int]: + """ + The size event list after reaching which queued events will be flushed + """ + return self._byte_limit + + @byte_limit.setter + def byte_limit(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("byte_limit must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("byte_limit must be of type int") + self._byte_limit = value + + @property + def request_timeout(self) -> Optional[Union[float, Tuple[float, float]]]: + """ + Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + """ + return self._request_timeout + + @request_timeout.setter + def request_timeout(self, value: Optional[Union[float, Tuple[float, float]]]): + self._request_timeout = value + + @property + def buffer_capacity(self) -> Optional[int]: + """ + The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + When the buffer is full new events are lost. + """ + return self._buffer_capacity + + @buffer_capacity.setter + def buffer_capacity(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("buffer_capacity must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("buffer_capacity must be of type int") + self._buffer_capacity = value + + @property + def custom_retry_codes(self) -> Dict[int, bool]: + """ + Custom retry rules for HTTP status codes received in emit responses from the Collector. + """ + return self._custom_retry_codes + + @custom_retry_codes.setter + def custom_retry_codes(self, value: Dict[int, bool]): + self._custom_retry_codes = value + + def set_retry_code(self, status_code: int, retry=True) -> bool: + """ + Add a retry rule for HTTP status code received from emit responses from the Collector. + :param status_code: HTTP response code + :type status_code: int + :param retry: Set the status_code to retry (True) or not retry (False). Default is True + :type retry: bool + """ + if not isinstance(status_code, int): + print("status_code must be of type int") + return False + + if not isinstance(retry, bool): + print("retry must be of type bool") + return False + + if 200 <= status_code < 300: + print( + "custom_retry_codes should not include codes for succesful requests (2XX codes)" + ) + return False + + self.custom_retry_codes[status_code] = retry + + return status_code in self.custom_retry_codes.keys() + + @property + def event_store(self) -> Optional[EventStore]: + return self._event_store + + @event_store.setter + def event_store(self, value: Optional[EventStore]): + self._event_store = value + + @property + def session(self) -> Optional[requests.Session]: + """ + Persist parameters across requests using a requests.Session object + """ + return self._session + + @session.setter + def session(self, value: Optional[requests.Session]): + self._session = value diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 2deb0345..72f451bf 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -1,7 +1,7 @@ # """ # emitters.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,23 +13,28 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ - import logging import time import threading import requests -from typing import Optional, Union, Tuple +import random +from typing import Optional, Union, Tuple, Dict, cast, Callable from queue import Queue from snowplow_tracker.self_describing_json import SelfDescribingJson -from snowplow_tracker.typing import PayloadDict, PayloadDictList, HttpProtocol, Method, SuccessCallback, FailureCallback +from snowplow_tracker.typing import ( + PayloadDict, + PayloadDictList, + HttpProtocol, + Method, + SuccessCallback, + FailureCallback, + EmitterProtocol, +) from snowplow_tracker.contracts import one_of +from snowplow_tracker.event_store import EventStore, InMemoryEventStore # logging logging.basicConfig() @@ -37,54 +42,87 @@ logger.setLevel(logging.INFO) DEFAULT_MAX_LENGTH = 10 -PAYLOAD_DATA_SCHEMA = "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" +PAYLOAD_DATA_SCHEMA = ( + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" +) PROTOCOLS = {"http", "https"} METHODS = {"get", "post"} -class Emitter(object): +# Unifes the two request methods under one interface +class Requester: + post: Callable + get: Callable + + def __init__(self, post: Callable, get: Callable): + # 3.6 MyPy compatibility: + # error: Cannot assign to a method + # https://github.com/python/mypy/issues/2427 + setattr(self, "post", post) + setattr(self, "get", get) + + +class Emitter(EmitterProtocol): """ - Synchronously send Snowplow events to a Snowplow collector - Supports both GET and POST requests + Synchronously send Snowplow events to a Snowplow collector + Supports both GET and POST requests """ def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "get", - buffer_size: Optional[int] = None, - on_success: Optional[SuccessCallback] = None, - on_failure: Optional[FailureCallback] = None, - byte_limit: Optional[int] = None, - request_timeout: Optional[Union[float, Tuple[float, float]]] = None) -> None: - """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. - :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to http. - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: The HTTP request method - :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None - :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. - :type on_success: function | None - :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 - Gets passed two arguments: - 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads - :type on_failure: function | None - :param byte_limit: The size event list after reaching which queued events will be flushed - :type byte_limit: int | None - :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which - applies to both "connect" AND "read" timeout, or as tuple with two float values - which specify the "connect" and "read" timeouts separately - :type request_timeout: float | tuple | None + self, + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to https. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method. Defaults to post. + :type method: method + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity: int + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None + :param session: Persist parameters across requests by using a session object + :type session: requests.Session | None """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -93,13 +131,26 @@ def __init__( self.method = method - if buffer_size is None: + if event_store is None: + if buffer_capacity is None: + event_store = InMemoryEventStore(logger=logger) + else: + event_store = InMemoryEventStore( + buffer_capacity=buffer_capacity, logger=logger + ) + + self.event_store = event_store + + if batch_size is None: if method == "post": - buffer_size = DEFAULT_MAX_LENGTH + batch_size = DEFAULT_MAX_LENGTH else: - buffer_size = 1 - self.buffer_size = buffer_size - self.buffer = [] + batch_size = 1 + + if buffer_capacity is not None and batch_size > buffer_capacity: + batch_size = buffer_capacity + + self.batch_size = batch_size self.byte_limit = byte_limit self.bytes_queued = None if byte_limit is None else 0 self.request_timeout = request_timeout @@ -109,30 +160,48 @@ def __init__( self.lock = threading.RLock() - self.timer = None + self.timer = FlushTimer(emitter=self, repeating=True) + self.retry_timer = FlushTimer(emitter=self, repeating=False) + + self.max_retry_delay_seconds = max_retry_delay_seconds + self.retry_delay: Union[int, float] = 0 + self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) + if session is None: + self.request_method = Requester(post=requests.post, get=requests.get) + else: + self.request_method = Requester(post=session.post, get=session.get) + @staticmethod def as_collector_uri( - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "get") -> str: - """ - :param endpoint: The raw endpoint provided by the user - :type endpoint: string - :param protocol: The protocol to use - http or https - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: Either `get` or `post` HTTP method - :type method: method - :rtype: string + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + ) -> str: + """ + :param endpoint: The raw endpoint provided by the user + :type endpoint: string + :param protocol: The protocol to use - http or https + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: Either `get` or `post` HTTP method + :type method: method + :rtype: string """ if len(endpoint) < 1: raise ValueError("No endpoint provided.") + endpoint = endpoint.rstrip("/") + + if endpoint.split("://")[0] in PROTOCOLS: + endpoint_arr = endpoint.split("://") + protocol = cast(HttpProtocol, endpoint_arr[0]) + endpoint = endpoint_arr[1] + if method == "get": path = "/i" else: @@ -144,105 +213,108 @@ def as_collector_uri( def input(self, payload: PayloadDict) -> None: """ - Adds an event to the buffer. - If the maximum size has been reached, flushes the buffer. + Adds an event to the buffer. + If the maximum size has been reached, flushes the buffer. - :param payload: The name-value pairs for the event - :type payload: dict(string:\*) + :param payload: The name-value pairs for the event + :type payload: dict(string:\\*) """ with self.lock: if self.bytes_queued is not None: self.bytes_queued += len(str(payload)) if self.method == "post": - self.buffer.append({key: str(payload[key]) for key in payload}) + self.event_store.add_event({key: str(payload[key]) for key in payload}) else: - self.buffer.append(payload) + self.event_store.add_event(payload) if self.reached_limit(): self.flush() def reached_limit(self) -> bool: """ - Checks if event-size or bytes limit are reached + Checks if event-size or bytes limit are reached - :rtype: bool + :rtype: bool """ if self.byte_limit is None: - return len(self.buffer) >= self.buffer_size + return self.event_store.size() >= self.batch_size else: - return (self.bytes_queued or 0) >= self.byte_limit or len(self.buffer) >= self.buffer_size + return ( + self.bytes_queued or 0 + ) >= self.byte_limit or self.event_store.size() >= self.batch_size def flush(self) -> None: """ - Sends all events in the buffer to the collector. + Sends all events in the buffer to the collector. """ with self.lock: - self.send_events(self.buffer) - self.buffer = [] + if self.retry_timer.is_active(): + return + send_events = self.event_store.get_events_batch() + self.send_events(send_events) if self.bytes_queued is not None: self.bytes_queued = 0 - def http_post(self, data: str) -> bool: + def http_post(self, data: str) -> int: """ - :param data: The array of JSONs to be sent - :type data: string + :param data: The array of JSONs to be sent + :type data: string """ logger.info("Sending POST request to %s..." % self.endpoint) logger.debug("Payload: %s" % data) - post_succeeded = False try: - r = requests.post( + r = self.request_method.post( self.endpoint, data=data, - headers={'Content-Type': 'application/json; charset=utf-8'}, - timeout=self.request_timeout) - post_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if post_succeeded else "warning")("POST request finished with status code: " + str(r.status_code)) + headers={"Content-Type": "application/json; charset=utf-8"}, + timeout=self.request_timeout, + ) except requests.RequestException as e: logger.warning(e) + return -1 - return post_succeeded + return r.status_code - def http_get(self, payload: PayloadDict) -> bool: + def http_get(self, payload: PayloadDict) -> int: """ - :param payload: The event properties - :type payload: dict(string:\*) + :param payload: The event properties + :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) - get_succeeded = False try: - r = requests.get(self.endpoint, params=payload, timeout=self.request_timeout) - get_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if get_succeeded else "warning")("GET request finished with status code: " + str(r.status_code)) + r = self.request_method.get( + self.endpoint, params=payload, timeout=self.request_timeout + ) except requests.RequestException as e: logger.warning(e) + return -1 - return get_succeeded + return r.status_code def sync_flush(self) -> None: """ - Calls the flush method of the base Emitter class. - This is guaranteed to be blocking, not asynchronous. + Calls the flush method of the base Emitter class. + This is guaranteed to be blocking, not asynchronous. """ logger.debug("Starting synchronous flush...") - Emitter.flush(self) + self.flush() logger.info("Finished synchronous flush") @staticmethod def is_good_status_code(status_code: int) -> bool: """ - :param status_code: HTTP status code - :type status_code: int - :rtype: bool + :param status_code: HTTP status code + :type status_code: int + :rtype: bool """ - return 200 <= status_code < 400 + return 200 <= status_code < 300 def send_events(self, evts: PayloadDictList) -> None: """ - :param evts: Array of events to be sent - :type evts: list(dict(string:\*)) + :param evts: Array of events to be sent + :type evts: list(dict(string:\\*)) """ if len(evts) > 0: logger.info("Attempting to send %s events" % len(evts)) @@ -251,17 +323,20 @@ def send_events(self, evts: PayloadDictList) -> None: success_events = [] failure_events = [] - if self.method == 'post': + if self.method == "post": data = SelfDescribingJson(PAYLOAD_DATA_SCHEMA, evts).to_string() - request_succeeded = self.http_post(data) + status_code = self.http_post(data) + request_succeeded = Emitter.is_good_status_code(status_code) if request_succeeded: success_events += evts else: failure_events += evts - elif self.method == 'get': + elif self.method == "get": for evt in evts: - request_succeeded = self.http_get(evt) + status_code = self.http_get(evt) + request_succeeded = Emitter.is_good_status_code(status_code) + if request_succeeded: success_events += [evt] else: @@ -272,94 +347,180 @@ def send_events(self, evts: PayloadDictList) -> None: if self.on_failure is not None and len(failure_events) > 0: self.on_failure(len(success_events), failure_events) + if self._should_retry(status_code): + self._set_retry_delay() + self._retry_failed_events(failure_events) + else: + self.event_store.cleanup(success_events, False) + self._reset_retry_delay() else: logger.info("Skipping flush since buffer is empty") - def set_flush_timer(self, timeout: float, flush_now: bool = False) -> None: + def _set_retry_timer(self, timeout: float) -> None: """ - Set an interval at which the buffer will be flushed + Set an interval at which failed events will be retried - :param timeout: interval in seconds - :type timeout: int | float - :param flush_now: immediately flush buffer - :type flush_now: bool + :param timeout: interval in seconds + :type timeout: int | float """ + self.retry_timer.start(timeout=timeout) - # Repeatable create new timer - if flush_now: - self.flush() - self.timer = threading.Timer(timeout, self.set_flush_timer, [timeout, True]) - self.timer.daemon = True - self.timer.start() + def set_flush_timer(self, timeout: float) -> None: + """ + Set an interval at which the buffer will be flushed + :param timeout: interval in seconds + :type timeout: int | float + """ + self.timer.start(timeout=timeout) def cancel_flush_timer(self) -> None: """ - Abort automatic async flushing + Abort automatic async flushing """ - - if self.timer is not None: - self.timer.cancel() + self.timer.cancel() @staticmethod def attach_sent_timestamp(events: PayloadDictList) -> None: - """ - Attach (by mutating in-place) current timestamp in milliseconds - as `stm` param + """ + Attach (by mutating in-place) current timestamp in milliseconds + as `stm` param - :param events: Array of events to be sent - :type events: list(dict(string:\*)) - :rtype: None + :param events: Array of events to be sent + :type events: list(dict(string:\\*)) + :rtype: None """ + def update(e: PayloadDict) -> None: - e.update({'stm': str(int(time.time()) * 1000)}) + e.update({"stm": str(int(time.time()) * 1000)}) for event in events: update(event) + def _should_retry(self, status_code: int) -> bool: + """ + Checks if a request should be retried + + :param status_code: Response status code + :type status_code: int + :rtype: bool + """ + if Emitter.is_good_status_code(status_code): + return False + + if status_code in self.custom_retry_codes.keys(): + return self.custom_retry_codes[status_code] + + return status_code not in [400, 401, 403, 410, 422] + + def _set_retry_delay(self) -> None: + """ + Sets a delay to retry failed events + """ + random_noise = random.random() + self.retry_delay = min( + self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds + ) + + def _reset_retry_delay(self) -> None: + """ + Resets retry delay to 0 + """ + self.retry_delay = 0 + + def _retry_failed_events(self, failed_events) -> None: + """ + Adds failed events back to the buffer to retry + + :param failed_events: List of failed events + :type List + """ + self.event_store.cleanup(failed_events, True) + self._set_retry_timer(self.retry_delay) + + def _cancel_retry_timer(self) -> None: + """ + Cancels a retry timer + """ + self.retry_timer.cancel() + + # This is only here to satisfy the `EmitterProtocol` interface + def async_flush(self) -> None: + return + class AsyncEmitter(Emitter): """ - Uses threads to send HTTP requests asynchronously + Uses threads to send HTTP requests asynchronously """ def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "get", - buffer_size: Optional[int] = None, - on_success: Optional[SuccessCallback] = None, - on_failure: Optional[FailureCallback] = None, - thread_count: int = 1, - byte_limit: Optional[int] = None) -> None: - """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. - :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to http. - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: The HTTP request method - :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None - :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. - :type on_success: function | None - :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 - Gets passed two arguments: - 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads - :type on_failure: function | None - :param thread_count: Number of worker threads to use for HTTP requests - :type thread_count: int - :param byte_limit: The size event list after reaching which queued events will be flushed - :type byte_limit: int | None - """ - super(AsyncEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, on_success, on_failure, byte_limit) - self.queue = Queue() + self, + endpoint: str, + protocol: HttpProtocol = "http", + port: Optional[int] = None, + method: Method = "post", + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + thread_count: int = 1, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to http. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method + :type method: method + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param thread_count: Number of worker threads to use for HTTP requests + :type thread_count: int + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity: int + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore + :param session: Persist parameters across requests by using a session object + :type session: requests.Session | None + """ + super(AsyncEmitter, self).__init__( + endpoint=endpoint, + protocol=protocol, + port=port, + method=method, + batch_size=batch_size, + on_success=on_success, + on_failure=on_failure, + byte_limit=byte_limit, + request_timeout=request_timeout, + max_retry_delay_seconds=max_retry_delay_seconds, + buffer_capacity=buffer_capacity, + custom_retry_codes=custom_retry_codes, + event_store=event_store, + session=session, + ) + self.queue: Queue = Queue() for i in range(thread_count): t = threading.Thread(target=self.consume) t.daemon = True @@ -369,17 +530,16 @@ def sync_flush(self) -> None: while True: self.flush() self.queue.join() - if len(self.buffer) < 1: + if self.event_store.size() < 1: break def flush(self) -> None: """ - Removes all dead threads, then creates a new thread which - executes the flush method of the base Emitter class + Removes all dead threads, then creates a new thread which + executes the flush method of the base Emitter class """ with self.lock: - self.queue.put(self.buffer) - self.buffer = [] + self.queue.put(self.event_store.get_events_batch()) if self.bytes_queued is not None: self.bytes_queued = 0 @@ -388,3 +548,47 @@ def consume(self) -> None: evts = self.queue.get() self.send_events(evts) self.queue.task_done() + + +class FlushTimer(object): + """ + Internal class used by the Emitter to schedule flush calls for later. + """ + + def __init__(self, emitter: Emitter, repeating: bool): + self.emitter = emitter + self.repeating = repeating + self.timer: Optional[threading.Timer] = None + self.lock = threading.RLock() + + def start(self, timeout: float) -> bool: + with self.lock: + if self.timer is not None: + return False + else: + self._schedule_timer(timeout=timeout) + return True + + def cancel(self) -> None: + with self.lock: + if self.timer is not None: + self.timer.cancel() + self.timer = None + + def is_active(self) -> bool: + with self.lock: + return self.timer is not None + + def _fire(self, timeout: float) -> None: + with self.lock: + if self.repeating: + self._schedule_timer(timeout) + else: + self.timer = None + + self.emitter.flush() + + def _schedule_timer(self, timeout: float) -> None: + self.timer = threading.Timer(timeout, self._fire, [timeout]) + self.timer.daemon = True + self.timer.start() diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py new file mode 100644 index 00000000..b8d13028 --- /dev/null +++ b/snowplow_tracker/event_store.py @@ -0,0 +1,139 @@ +# """ +# event_store.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import List +from typing_extensions import Protocol +from snowplow_tracker.typing import PayloadDict, PayloadDictList +from logging import Logger + + +class EventStore(Protocol): + """ + EventStore protocol. For buffering events in the Emitter. + """ + + def add_event(self, payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. Returns True if successful. + + :param payload: The payload to add + :type payload: PayloadDict + :rtype bool + """ + ... + + def get_events_batch(self) -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the buffer. + + :rtype PayloadDictList + """ + ... + + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the event store. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + ... + + def size(self) -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + ... + + +class InMemoryEventStore(EventStore): + """ + Create a InMemoryEventStore object with custom buffer capacity. The default is 10,000 events. + """ + + def __init__(self, logger: Logger, buffer_capacity: int = 10000) -> None: + """ + :param logger: Logging module + :type logger: Logger + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity int + """ + self.event_buffer: List[PayloadDict] = [] + self.buffer_capacity = buffer_capacity + self.logger = logger + + def add_event(self, payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. + + :param payload: The payload to add + :type payload: PayloadDict + """ + if self._buffer_capacity_reached(): + self.logger.error("Event buffer is full, dropping event.") + return False + + self.event_buffer.append(payload) + return True + + def get_events_batch(self) -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the in the buffer. + + :rtype PayloadDictList + """ + batch = self.event_buffer + self.event_buffer = [] + return batch + + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the InMemoryEventStore buffer. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + if not need_retry: + return + + for event in batch: + if not event in self.event_buffer: + if not self.add_event(event): + return + + def size(self) -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + return len(self.event_buffer) + + def _buffer_capacity_reached(self) -> bool: + """ + Returns true if buffer capacity is reached + + :rtype: bool + """ + return self.size() >= self.buffer_capacity diff --git a/snowplow_tracker/events/CLAUDE.md b/snowplow_tracker/events/CLAUDE.md new file mode 100644 index 00000000..efc0f5ab --- /dev/null +++ b/snowplow_tracker/events/CLAUDE.md @@ -0,0 +1,284 @@ +# Snowplow Event Types - CLAUDE.md + +## Directory Overview + +The `events/` directory contains all event type implementations for the Snowplow Python Tracker. Each event class represents a specific type of analytics event that can be sent to Snowplow collectors. All events inherit from the base `Event` class and follow a consistent pattern for construction, validation, and payload generation. + +## Event Class Hierarchy + +``` +Event (base class) +├── PageView # Web page view tracking +├── PagePing # Page engagement/heartbeat +├── ScreenView # Mobile/app screen views +├── StructuredEvent # Generic 5-parameter events +└── SelfDescribing # Custom schema events +``` + +## Core Event Patterns + +### Event Construction Pattern +```python +# ✅ Use keyword arguments for clarity +event = PageView( + page_url="https://example.com", + page_title="Homepage", + referrer="https://google.com" +) + +# ❌ Don't use positional arguments +event = PageView("https://example.com", "Homepage") +``` + +### Event Context Pattern +```python +# ✅ Add contexts as SelfDescribingJson list +geo_context = SelfDescribingJson( + "iglu:com.acme/geolocation/jsonschema/1-0-0", + {"latitude": 40.0, "longitude": -73.0} +) +event = PageView(page_url="...", context=[geo_context]) + +# ❌ Don't use raw dictionaries for context +event.context = [{"latitude": 40.0}] # Missing schema! +``` + +### Event Subject Override Pattern +```python +# ✅ Override tracker subject for specific event +special_subject = Subject() +special_subject.set_user_id("anonymous_user") +event = StructuredEvent( + category="shop", + action="view", + event_subject=special_subject +) + +# ❌ Don't modify shared subject +tracker.subject.set_user_id("temp") # Affects all events +``` + +### True Timestamp Pattern +```python +# ✅ Use milliseconds for true_timestamp +import time +timestamp_ms = time.time() * 1000 +event = PageView( + page_url="...", + true_timestamp=timestamp_ms +) + +# ❌ Don't use seconds +event = PageView(true_timestamp=time.time()) +``` + +## Event-Specific Patterns + +### PageView Events +```python +# ✅ Complete PageView with all fields +page_view = PageView( + page_url="https://example.com/products", + page_title="Products", + referrer="https://example.com/home" +) + +# ❌ Missing required page_url +page_view = PageView(page_title="Products") +``` + +### StructuredEvent Pattern +```python +# ✅ Use descriptive category/action pairs +event = StructuredEvent( + category="ecommerce", + action="add-to-cart", + label="SKU-123", + property_="size:XL", + value=29.99 +) + +# ❌ Generic naming loses meaning +event = StructuredEvent("event", "click") +``` + +### SelfDescribing Events +```python +# ✅ Custom events with Iglu schemas +purchase_event = SelfDescribing( + SelfDescribingJson( + "iglu:com.acme/purchase/jsonschema/2-0-0", + { + "orderId": "ORD-123", + "total": 99.99, + "currency": "USD" + } + ) +) + +# ❌ Missing schema version +event = SelfDescribing( + SelfDescribingJson("iglu:com.acme/purchase", {...}) +) +``` + +### ScreenView Pattern (Mobile) +```python +# ✅ Mobile screen tracking with ID +screen = ScreenView( + name="ProductDetailScreen", + id_="screen-456", + previous_name="ProductListScreen" +) + +# ❌ Using PageView for mobile apps +page = PageView(page_url="app://product-detail") +``` + +## Event Validation Rules + +### Required Fields by Event Type +- **PageView**: `page_url` (required), `page_title`, `referrer` +- **StructuredEvent**: `category`, `action` (required), `label`, `property_`, `value` +- **SelfDescribing**: `event_json` (SelfDescribingJson required) +- **ScreenView**: `name` or `id_` (at least one required) +- **PagePing**: `page_url` (required) + +### Schema Validation Pattern +```python +# ✅ Validate schema format +SCHEMA_PATTERN = r"^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/" +SCHEMA_PATTERN += r"[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + +# ❌ Invalid schema formats +"iglu:com.acme/event" # Missing version +"com.acme/event/1-0-0" # Missing iglu: prefix +``` + +## Payload Building Pattern + +### Internal Payload Construction +```python +# ✅ Event classes handle payload internally +def build_payload(self, encode_base64, json_encoder, subject): + # Add event-specific fields + self.payload.add("e", "pv") # Page view type + self.payload.add("url", self.page_url) + + # Let base class handle common fields + return super().build_payload(encode_base64, json_encoder, subject) + +# ❌ Don't expose payload building to users +event.payload = Payload() +event.payload.add("custom", "field") +``` + +## Testing Event Classes + +### Unit Test Pattern +```python +# ✅ Test event construction and validation +def test_page_view_required_fields(): + with self.assertRaises(TypeError): + PageView() # Missing required page_url + + event = PageView(page_url="https://test.com") + assert event.page_url == "https://test.com" + +# ✅ Test payload generation +def test_event_payload(): + event = PageView(page_url="https://test.com") + payload = event.build_payload(False, None, None) + assert payload.get()["url"] == "https://test.com" +``` + +### Context Testing Pattern +```python +# ✅ Test context attachment +def test_event_context(): + context = SelfDescribingJson(schema, data) + event = PageView(page_url="...", context=[context]) + + payload = event.build_payload(True, None, None) + assert "cx" in payload.get() # Base64 context +``` + +## Common Event Pitfalls + +### Timestamp Confusion +```python +# ❌ Mixing timestamp types +event.true_timestamp = "2024-01-01" # String not allowed +event.true_timestamp = datetime.now() # Use milliseconds + +# ✅ Consistent millisecond timestamps +event.true_timestamp = int(time.time() * 1000) +``` + +### Context Array Management +```python +# ❌ Modifying context after creation +event.context.append(new_context) # Unexpected behavior + +# ✅ Set complete context at creation +all_contexts = [context1, context2] +event = PageView(page_url="...", context=all_contexts) +``` + +### Schema Version Control +```python +# ❌ Hardcoding schema versions +schema = "iglu:com.acme/event/jsonschema/1-0-0" + +# ✅ Centralize schema definitions +PURCHASE_SCHEMA = "iglu:com.acme/purchase/jsonschema/2-1-0" +event = SelfDescribing(SelfDescribingJson(PURCHASE_SCHEMA, data)) +``` + +## Event Migration Guide + +### Upgrading Event Schemas +```python +# From version 1-0-0 to 2-0-0 +# ✅ Handle backward compatibility +def create_purchase_event(data): + if "items" in data: # New schema + schema = "iglu:.../purchase/jsonschema/2-0-0" + else: # Old schema + schema = "iglu:.../purchase/jsonschema/1-0-0" + + return SelfDescribing(SelfDescribingJson(schema, data)) +``` + +## Quick Reference + +### Event Type Selection +- **PageView**: Traditional web page tracking +- **ScreenView**: Mobile app screen tracking +- **StructuredEvent**: Generic business events +- **SelfDescribing**: Complex custom events +- **PagePing**: Engagement/time-on-page tracking + +### Event Field Checklist +- [ ] Required fields provided +- [ ] Timestamps in milliseconds +- [ ] Contexts as SelfDescribingJson array +- [ ] Valid Iglu schema format +- [ ] Event-specific subject if needed + +### Common Event Methods +- `build_payload()`: Internal payload generation +- `event_subject`: Per-event user context +- `context`: Custom context array +- `true_timestamp`: User-defined timestamp + +## Contributing to events/CLAUDE.md + +When modifying event implementations or adding new event types: + +1. **Follow the Event base class pattern** - All events must inherit from Event +2. **Implement required abstract methods** - Ensure payload building works correctly +3. **Document required fields** - Update this file with new event requirements +4. **Add comprehensive tests** - Test construction, validation, and payload generation +5. **Maintain backward compatibility** - Don't break existing event APIs +6. **Update schema constants** - Add new schemas to constants.py if needed \ No newline at end of file diff --git a/snowplow_tracker/events/__init__.py b/snowplow_tracker/events/__init__.py new file mode 100644 index 00000000..0f75c84f --- /dev/null +++ b/snowplow_tracker/events/__init__.py @@ -0,0 +1,22 @@ +# """ +# __init__.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from snowplow_tracker.events.page_ping import PagePing +from snowplow_tracker.events.page_view import PageView +from snowplow_tracker.events.self_describing import SelfDescribing +from snowplow_tracker.events.structured_event import StructuredEvent +from snowplow_tracker.events.screen_view import ScreenView diff --git a/snowplow_tracker/events/event.py b/snowplow_tracker/events/event.py new file mode 100644 index 00000000..fb300b87 --- /dev/null +++ b/snowplow_tracker/events/event.py @@ -0,0 +1,136 @@ +# """ +# event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional, List +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject + +from snowplow_tracker.self_describing_json import SelfDescribingJson + +from snowplow_tracker.constants import CONTEXT_SCHEMA +from snowplow_tracker.typing import JsonEncoderFunction, PayloadDict + + +class Event(object): + """ + Event class which contains + elements that can be set in all events. These are context, trueTimestamp, and Subject. + + Context is a list of custom SelfDescribingJson entities. + TrueTimestamp is a user-defined timestamp. + Subject is an event-specific Subject. Its fields will override those of the + Tracker-associated Subject, if present. + + """ + + def __init__( + self, + dict_: Optional[PayloadDict] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + Constructor + :param dict_: Optional Dictionary to be added to the Events Payload + :type dict_: dict(string:\\*) | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + + """ + self.payload = payload.Payload(dict_=dict_) + self.event_subject = event_subject + self.context = context or [] + self.true_timestamp = true_timestamp + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + if len(self.context) > 0: + context_jsons = list(map(lambda c: c.to_json(), self.context)) + context_envelope = SelfDescribingJson( + CONTEXT_SCHEMA, context_jsons + ).to_json() + self.payload.add_json( + context_envelope, encode_base64, "cx", "co", json_encoder + ) + + if isinstance( + self.true_timestamp, + ( + int, + float, + ), + ): + self.payload.add("ttm", int(self.true_timestamp)) + + if self.event_subject is not None: + fin_payload_dict = self.event_subject.combine_subject(subject) + else: + fin_payload_dict = {} if subject is None else subject.standard_nv_pairs + + self.payload.add_dict(fin_payload_dict) + return self.payload + + @property + def event_subject(self) -> Optional[Subject]: + """ + Optional per event subject + """ + return self._event_subject + + @event_subject.setter + def event_subject(self, value: Optional[Subject]): + self._event_subject = value + + @property + def context(self) -> List[SelfDescribingJson]: + """ + Custom context for the event + """ + return self._context + + @context.setter + def context(self, value: List[SelfDescribingJson]): + self._context = value + + @property + def true_timestamp(self) -> Optional[float]: + """ + Optional event timestamp in milliseconds + """ + return self._true_timestamp + + @true_timestamp.setter + def true_timestamp(self, value: Optional[float]): + self._true_timestamp = value diff --git a/snowplow_tracker/events/page_ping.py b/snowplow_tracker/events/page_ping.py new file mode 100644 index 00000000..43bbb210 --- /dev/null +++ b/snowplow_tracker/events/page_ping.py @@ -0,0 +1,155 @@ +# """ +# page_ping.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string + + +class PagePing(Event): + """ + Constructs a PagePing event object. + + When tracked, generates a "pp" or "page_ping" event. + + """ + + def __init__( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(PagePing, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "pp") + self.page_url = page_url + self.page_title = page_title + self.referrer = referrer + self.min_x = min_x + self.max_x = max_x + self.min_y = min_y + self.max_y = max_y + + @property + def page_url(self) -> str: + """ + URL of the viewed page + """ + return self.payload.nv_pairs["url"] + + @page_url.setter + def page_url(self, value: str): + non_empty_string(value) + self.payload.add("url", value) + + @property + def page_title(self) -> Optional[str]: + """ + URL of the viewed page + """ + return self.payload.nv_pairs.get("page") + + @page_title.setter + def page_title(self, value: Optional[str]): + self.payload.add("page", value) + + @property + def referrer(self) -> Optional[str]: + """ + The referrer of the page + """ + return self.payload.nv_pairs.get("refr") + + @referrer.setter + def referrer(self, value: Optional[str]): + self.payload.add("refr", value) + + @property + def min_x(self) -> Optional[int]: + """ + Minimum page x offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_mix") + + @min_x.setter + def min_x(self, value: Optional[int]): + self.payload.add("pp_mix", value) + + @property + def max_x(self) -> Optional[int]: + """ + Maximum page x offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_max") + + @max_x.setter + def max_x(self, value: Optional[int]): + self.payload.add("pp_max", value) + + @property + def min_y(self) -> Optional[int]: + """ + Minimum page y offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_miy") + + @min_y.setter + def min_y(self, value: Optional[int]): + self.payload.add("pp_miy", value) + + @property + def max_y(self) -> Optional[int]: + """ + Maximum page y offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_may") + + @max_y.setter + def max_y(self, value: Optional[int]): + self.payload.add("pp_may", value) diff --git a/snowplow_tracker/events/page_view.py b/snowplow_tracker/events/page_view.py new file mode 100644 index 00000000..53e44bb6 --- /dev/null +++ b/snowplow_tracker/events/page_view.py @@ -0,0 +1,95 @@ +# """ +# page_view.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.contracts import non_empty_string + + +class PageView(Event): + """ + Constructs a PageView event object. + + When tracked, generates a "pv" or "page_view" event. + + """ + + def __init__( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(PageView, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "pv") + self.page_url = page_url + self.page_title = page_title + self.referrer = referrer + + @property + def page_url(self) -> str: + """ + URL of the viewed page + """ + return self.payload.nv_pairs["url"] + + @page_url.setter + def page_url(self, value: str): + non_empty_string(value) + self.payload.add("url", value) + + @property + def page_title(self) -> Optional[str]: + """ + Title of the viewed page + """ + return self.payload.nv_pairs.get("page") + + @page_title.setter + def page_title(self, value: Optional[str]): + self.payload.add("page", value) + + @property + def referrer(self) -> Optional[str]: + """ + The referrer of the page + """ + return self.payload.nv_pairs.get("refr") + + @referrer.setter + def referrer(self, value: Optional[str]): + self.payload.add("refr", value) diff --git a/snowplow_tracker/events/screen_view.py b/snowplow_tracker/events/screen_view.py new file mode 100644 index 00000000..6b4af927 --- /dev/null +++ b/snowplow_tracker/events/screen_view.py @@ -0,0 +1,199 @@ +# """ +# screen_view.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Dict, Optional, List +from snowplow_tracker.typing import JsonEncoderFunction +from snowplow_tracker.events.event import Event +from snowplow_tracker.events.self_describing import SelfDescribing +from snowplow_tracker import SelfDescribingJson +from snowplow_tracker.constants import ( + MOBILE_SCHEMA_PATH, + SCHEMA_TAG, +) +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string + + +class ScreenView(Event): + """ + Constructs a ScreenView event object. + + When tracked, generates a SelfDescribing event (event type "ue"). + + Schema: `iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0` + """ + + def __init__( + self, + id_: str, + name: str, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param id_: Screen view ID. This must be of type UUID. + :type id_: string + :param name: The name of the screen view event + :type name: string + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(ScreenView, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.screen_view_properties: Dict[str, str] = {} + self.id_ = id_ + self.name = name + self.type = type + self.previous_name = previous_name + self.previous_id = previous_id + self.previous_type = previous_type + self.transition_type = transition_type + + @property + def id_(self) -> str: + """ + Screen view ID. This must be of type UUID. + """ + return self.screen_view_properties["id"] + + @id_.setter + def id_(self, value: str): + non_empty_string(value) + self.screen_view_properties["id"] = value + + @property + def name(self) -> str: + """ + The name of the screen view event + """ + return self.screen_view_properties["name"] + + @name.setter + def name(self, value: str): + non_empty_string(value) + self.screen_view_properties["name"] = value + + @property + def type(self) -> Optional[str]: + """ + The type of screen that was viewed e.g feed / carousel + """ + return self.screen_view_properties["type"] + + @type.setter + def type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["type"] = value + + @property + def previous_name(self) -> Optional[str]: + """ + The name of the previous screen. + """ + return self.screen_view_properties["previousName"] + + @previous_name.setter + def previous_name(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousName"] = value + + @property + def previous_id(self) -> Optional[str]: + """ + The screenview ID of the previous screenview. + """ + return self.screen_view_properties["previousId"] + + @previous_id.setter + def previous_id(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousId"] = value + + @property + def previous_type(self) -> Optional[str]: + """ + The screen type of the previous screenview + """ + return self.screen_view_properties["previousType"] + + @previous_type.setter + def previous_type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousType"] = value + + @property + def transition_type(self) -> Optional[str]: + """ + The type of transition that led to the screen being viewed + """ + return self.screen_view_properties["transitionType"] + + @transition_type.setter + def transition_type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["transitionType"] = value + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (MOBILE_SCHEMA_PATH, SCHEMA_TAG), + self.screen_view_properties, + ) + self_describing = SelfDescribing( + event_json=event_json, + event_subject=self.event_subject, + context=self.context, + true_timestamp=self.true_timestamp, + ) + return self_describing.build_payload( + encode_base64, json_encoder, subject=subject + ) diff --git a/snowplow_tracker/events/self_describing.py b/snowplow_tracker/events/self_describing.py new file mode 100644 index 00000000..e560eb72 --- /dev/null +++ b/snowplow_tracker/events/self_describing.py @@ -0,0 +1,98 @@ +# """ +# self_describing.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from typing import Optional, List +from snowplow_tracker.typing import JsonEncoderFunction +from snowplow_tracker.events.event import Event +from snowplow_tracker import SelfDescribingJson +from snowplow_tracker.constants import UNSTRUCT_EVENT_SCHEMA +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty + + +class SelfDescribing(Event): + """ + Constructs a SelfDescribing event object. + + This is a customisable event type which allows you to track anything describable + by a JsonSchema. + + When tracked, generates a self-describing event (event type "ue"). + """ + + def __init__( + self, + event_json: SelfDescribingJson, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(SelfDescribing, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "ue") + self.event_json = event_json + + @property + def event_json(self) -> SelfDescribingJson: + """ + The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + """ + return self._event_json + + @event_json.setter + def event_json(self, value: SelfDescribingJson): + self._event_json = value + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + + envelope = SelfDescribingJson( + UNSTRUCT_EVENT_SCHEMA, self.event_json.to_json() + ).to_json() + self.payload.add_json(envelope, encode_base64, "ue_px", "ue_pr", json_encoder) + + return super(SelfDescribing, self).build_payload( + encode_base64=encode_base64, json_encoder=json_encoder, subject=subject + ) diff --git a/snowplow_tracker/events/structured_event.py b/snowplow_tracker/events/structured_event.py new file mode 100644 index 00000000..23abafa8 --- /dev/null +++ b/snowplow_tracker/events/structured_event.py @@ -0,0 +1,134 @@ +# """ +# struct_event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List, Union +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.contracts import non_empty_string + + +class StructuredEvent(Event): + """ + Constructs a Structured event object. + + This event type is provided to be roughly equivalent to Google Analytics-style events. + Note that it is not automatically clear what data should be placed in what field. + To aid data quality and modeling, agree on business-wide definitions when designing + your tracking strategy. + + We recommend using SelfDescribing - fully custom - events instead. + + When tracked, generates a "struct" or "se" event. + """ + + def __init__( + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[Union[int, float]] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(StructuredEvent, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "se") + self.category = category + self.action = action + self.label = label + self.property_ = property_ + self.value = value + + @property + def category(self) -> Optional[str]: + """ + Category of the event + """ + return self.payload.nv_pairs.get("se_ca") + + @category.setter + def category(self, value: str): + non_empty_string(value) + self.payload.add("se_ca", value) + + @property + def action(self) -> Optional[str]: + """ + The event itself + """ + return self.payload.nv_pairs.get("se_ac") + + @action.setter + def action(self, value: str): + non_empty_string(value) + self.payload.add("se_ac", value) + + @property + def label(self) -> Optional[str]: + """ + Refer to the object the action is performed on + """ + return self.payload.nv_pairs.get("se_la") + + @label.setter + def label(self, value: Optional[str]): + self.payload.add("se_la", value) + + @property + def property_(self) -> Optional[str]: + """ + Property associated with either the action or the object + """ + return self.payload.nv_pairs.get("se_pr") + + @property_.setter + def property_(self, value: Optional[str]): + self.payload.add("se_pr", value) + + @property + def value(self) -> Optional[Union[int, float]]: + """ + A value associated with the user action + """ + return self.payload.nv_pairs.get("se_va") + + @value.setter + def value(self, value: Optional[Union[int, float]]): + self.payload.add("se_va", value) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index 77fa6759..18d1bf4d 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -1,7 +1,7 @@ # """ # payload.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json @@ -26,10 +22,9 @@ class Payload: - def __init__(self, dict_: Optional[PayloadDict] = None) -> None: """ - Constructor + Constructor """ self.nv_pairs = {} @@ -44,41 +39,42 @@ def __init__(self, dict_: Optional[PayloadDict] = None) -> None: def add(self, name: str, value: Any) -> None: """ - Add a name value pair to the Payload object + Add a name value pair to the Payload object """ if not (value == "" or value is None): self.nv_pairs[name] = value def add_dict(self, dict_: PayloadDict, base64: bool = False) -> None: """ - Add a dict of name value pairs to the Payload object + Add a dict of name value pairs to the Payload object - :param dict_: Dictionary to be added to the Payload - :type dict_: dict(string:\*) + :param dict_: Dictionary to be added to the Payload + :type dict_: dict(string:\\*) """ for f in dict_: self.add(f, dict_[f]) def add_json( - self, - dict_: Optional[PayloadDict], - encode_base64: bool, - type_when_encoded: str, - type_when_not_encoded: str, - json_encoder: Optional[JsonEncoderFunction] = None) -> None: + self, + dict_: Optional[PayloadDict], + encode_base64: bool, + type_when_encoded: str, + type_when_not_encoded: str, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: """ - Add an encoded or unencoded JSON to the payload - - :param dict_: Custom context for the event - :type dict_: dict(string:\*) | None - :param encode_base64: If the payload is base64 encoded - :type encode_base64: bool - :param type_when_encoded: Name of the field when encode_base64 is set - :type type_when_encoded: string - :param type_when_not_encoded: Name of the field when encode_base64 is not set - :type type_when_not_encoded: string - :param json_encoder: Custom JSON serializer that gets called on non-serializable object - :type json_encoder: function | None + Add an encoded or unencoded JSON to the payload + + :param dict_: Custom context for the event + :type dict_: dict(string:\\*) | None + :param encode_base64: If the payload is base64 encoded + :type encode_base64: bool + :param type_when_encoded: Name of the field when encode_base64 is set + :type type_when_encoded: string + :param type_when_not_encoded: Name of the field when encode_base64 is not set + :type type_when_not_encoded: string + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ if dict_ is not None and dict_ != {}: @@ -87,15 +83,14 @@ def add_json( if encode_base64: encoded_dict = base64.urlsafe_b64encode(json_dict.encode("utf-8")) - if not isinstance(encoded_dict, str): - encoded_dict = encoded_dict.decode("utf-8") - self.add(type_when_encoded, encoded_dict) + encoded_dict_str = encoded_dict.decode("utf-8") + self.add(type_when_encoded, encoded_dict_str) else: self.add(type_when_not_encoded, json_dict) def get(self) -> PayloadDict: """ - Returns the context dictionary from the Payload object + Returns the context dictionary from the Payload object """ return self.nv_pairs diff --git a/snowplow_tracker/py.typed b/snowplow_tracker/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/snowplow_tracker/redis/__init__.py b/snowplow_tracker/redis/__init__.py deleted file mode 100644 index 794bcdd3..00000000 --- a/snowplow_tracker/redis/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .redis_emitter import RedisEmitter -from .redis_worker import RedisWorker diff --git a/snowplow_tracker/redis/redis_emitter.py b/snowplow_tracker/redis/redis_emitter.py deleted file mode 100644 index a24ee50b..00000000 --- a/snowplow_tracker/redis/redis_emitter.py +++ /dev/null @@ -1,77 +0,0 @@ -# """ -# redis_emitter.py - -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - -# This program is licensed to you under the Apache License Version 2.0, -# and you may not use this file except in compliance with the Apache License -# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at -# http://www.apache.org/licenses/LICENSE-2.0. - -# Unless required by applicable law or agreed to in writing, -# software distributed under the Apache License Version 2.0 is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the Apache License Version 2.0 for the specific -# language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 -# """ - -import json -import logging -from typing import Any, Optional -from snowplow_tracker.typing import PayloadDict, RedisProtocol - -_REDIS_OPT = True -try: - import redis -except ImportError: - _REDIS_OPT = False - -# logging -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -class RedisEmitter(object): - """ - Sends Snowplow events to a Redis database - """ - if _REDIS_OPT: - - def __init__(self, rdb: Optional[RedisProtocol] = None, key: str = "snowplow") -> None: - """ - :param rdb: Optional custom Redis database - :type rdb: redis | None - :param key: The Redis key for the list of events - :type key: string - """ - if rdb is None: - rdb = redis.StrictRedis() - - self.rdb = rdb - self.key = key - - def input(self, payload: PayloadDict) -> None: - """ - :param payload: The event properties - :type payload: dict(string:*) - """ - logger.debug("Pushing event to Redis queue...") - self.rdb.rpush(self.key, json.dumps(payload)) - logger.info("Finished sending event to Redis.") - - def flush(self) -> None: - logger.warning("The RedisEmitter class does not need to be flushed") - - def sync_flush(self) -> None: - self.flush() - - else: - - def __new__(cls, *args: Any, **kwargs: Any) -> 'RedisEmitter': - logger.error("RedisEmitter is not available. Please install snowplow-tracker with redis extra dependency.") - raise RuntimeError('RedisEmitter is not available. To use: `pip install snowplow-tracker[redis]`') diff --git a/snowplow_tracker/redis/redis_worker.py b/snowplow_tracker/redis/redis_worker.py deleted file mode 100644 index 3f1e9f51..00000000 --- a/snowplow_tracker/redis/redis_worker.py +++ /dev/null @@ -1,94 +0,0 @@ -# """ -# redis_worker.py - -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. - -# This program is licensed to you under the Apache License Version 2.0, -# and you may not use this file except in compliance with the Apache License -# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at -# http://www.apache.org/licenses/LICENSE-2.0. - -# Unless required by applicable law or agreed to in writing, -# software distributed under the Apache License Version 2.0 is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the Apache License Version 2.0 for the specific -# language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 -# """ - - -import json -import signal -from typing import Any, Optional - -from snowplow_tracker.typing import EmitterProtocol, PayloadDict, RedisProtocol - -_REDIS_OPT = True -try: - import redis - import gevent - from gevent.pool import Pool -except ImportError: - _REDIS_OPT = False - -DEFAULT_KEY = "snowplow" - - -class RedisWorker(object): - """ - Asynchronously take events from redis and send them to an emitter - """ - if _REDIS_OPT: - - def __init__(self, emitter: EmitterProtocol, rdb: Optional[RedisProtocol] = None, key: str = DEFAULT_KEY) -> None: - self.emitter = emitter - self.key = key - if rdb is None: - rdb = redis.StrictRedis() - self.rdb = rdb - self.pool = Pool(5) - - signal.signal(signal.SIGTERM, self.request_shutdown) - signal.signal(signal.SIGINT, self.request_shutdown) - signal.signal(signal.SIGQUIT, self.request_shutdown) - - def send(self, payload: PayloadDict) -> None: - """ - Send an event to an emitter - """ - self.emitter.input(payload) - - def pop_payload(self) -> None: - """ - Get a single event from Redis and send it - If the Redis queue is empty, sleep to avoid making continual requests - """ - payload = self.rdb.lpop(self.key) - if payload: - self.pool.spawn(self.send, json.loads(payload.decode("utf-8"))) - else: - gevent.sleep(5) - - def run(self) -> None: - """ - Run indefinitely - """ - self._shutdown = False - - while not self._shutdown: - self.pop_payload() - self.pool.join(timeout=20) - - def request_shutdown(self, *args: Any) -> None: - """ - Halt the worker - """ - self._shutdown = True - - else: - - def __new__(cls, *args: Any, **kwargs: Any) -> 'RedisWorker': - raise RuntimeError('RedisWorker is not available. To use: `pip install snowplow-tracker[redis]`') diff --git a/snowplow_tracker/self_describing_json.py b/snowplow_tracker/self_describing_json.py index 84b49c94..8f7b65ea 100644 --- a/snowplow_tracker/self_describing_json.py +++ b/snowplow_tracker/self_describing_json.py @@ -1,7 +1,7 @@ # """ # self_describing_json.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,29 +13,31 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json from typing import Union from snowplow_tracker.typing import PayloadDict, PayloadDictList +from snowplow_tracker.contracts import non_empty_string class SelfDescribingJson(object): - def __init__(self, schema: str, data: Union[PayloadDict, PayloadDictList]) -> None: self.schema = schema self.data = data + @property + def schema(self) -> str: + return self._schema + + @schema.setter + def schema(self, value: str): + non_empty_string(value) + self._schema = value + def to_json(self) -> PayloadDict: - return { - "schema": self.schema, - "data": self.data - } + return {"schema": self.schema, "data": self.data} def to_string(self) -> str: return json.dumps(self.to_json()) diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py new file mode 100644 index 00000000..daa1434b --- /dev/null +++ b/snowplow_tracker/snowplow.py @@ -0,0 +1,161 @@ +# """ +# snowplow.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import logging +from typing import Dict, Optional +from snowplow_tracker import ( + Tracker, + Emitter, + subject, + EmitterConfiguration, + TrackerConfiguration, +) +from snowplow_tracker.typing import Method + +# Logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +""" +Snowplow Class +""" + + +class Snowplow: + _trackers: Dict[str, Tracker] = {} + + @staticmethod + def create_tracker( + namespace: str, + endpoint: str, + method: Method = "post", + app_id: Optional[str] = None, + subject: Optional[subject.Subject] = None, + tracker_config: TrackerConfiguration = TrackerConfiguration(), + emitter_config: EmitterConfiguration = EmitterConfiguration(), + ) -> Tracker: + """ + Create a Snowplow tracker with a namespace and collector URL + + :param namespace: Name of the tracker + :type namespace: String + :param endpoint: The collector URL + :type endpoint: String + :param method: The HTTP request method. Defaults to post. + :type method: method + :param appId: Application ID + :type appId: String | None + :param subject: Subject to be tracked + :type subject: Subject | None + :param tracker_config: Tracker configuration + :type tracker_config: TrackerConfiguration + :param emitter_config: Emitter configuration + :type emitter_config: EmitterConfiguration + :rtype Tracker + """ + if endpoint is None: + raise TypeError("Emitter or Collector URL must be provided") + + emitter = Emitter( + endpoint=endpoint, + method=method, + batch_size=emitter_config.batch_size, + on_success=emitter_config.on_success, + on_failure=emitter_config.on_failure, + byte_limit=emitter_config.byte_limit, + request_timeout=emitter_config.request_timeout, + custom_retry_codes=emitter_config.custom_retry_codes, + event_store=emitter_config.event_store, + session=emitter_config.session, + ) + + tracker = Tracker( + namespace=namespace, + emitters=emitter, + app_id=app_id, + subject=subject, + encode_base64=tracker_config.encode_base64, + json_encoder=tracker_config.json_encoder, + ) + + return Snowplow.add_tracker(tracker) + + @classmethod + def add_tracker(cls, tracker: Tracker) -> Tracker: + """ + Add a Snowplow tracker to the Snowplow object + + :param tracker: Tracker object to add to Snowplow + :type tracker: Tracker + :rtype Tracker + """ + if not isinstance(tracker, Tracker): + logger.info("Tracker not provided.") + return None + + namespace = tracker.get_namespace() + + if namespace in cls._trackers.keys(): + raise TypeError("Tracker with this namespace already exists") + + cls._trackers[namespace] = tracker + logger.info("Tracker with namespace: '" + namespace + "' added to Snowplow") + return cls._trackers[namespace] + + @classmethod + def remove_tracker(cls, tracker: Tracker): + """ + Remove a Snowplow tracker from the Snowplow object if it exists + + :param tracker: Tracker object to remove from Snowplow + :type tracker: Tracker | None + """ + namespace = tracker.get_namespace() + cls.remove_tracker_by_namespace(namespace) + + @classmethod + def remove_tracker_by_namespace(cls, namespace: str): + """ + Remove a Snowplow tracker from the Snowplow object using it's namespace if it exists + + :param namespace: Tracker namespace to remove from Snowplow + :type tracker: String | None + """ + if not cls._trackers.pop(namespace, False): + logger.info("Tracker with namespace: '" + namespace + "' does not exist") + return + logger.info("Tracker with namespace: '" + namespace + "' removed from Snowplow") + + @classmethod + def reset(cls): + """ + Remove all active Snowplow trackers from the Snowplow object + """ + cls._trackers = {} + + @classmethod + def get_tracker(cls, namespace: str) -> Optional[Tracker]: + """ + Returns a Snowplow tracker from the Snowplow object if it exists + :param namespace: Snowplow tracker namespace + :type namespace: string + :rtype: Tracker + """ + if namespace in cls._trackers.keys(): + return cls._trackers[namespace] + return None diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index d9c10c80..cbf29aa8 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -1,7 +1,7 @@ # """ # subject.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,55 +13,52 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ +from typing import Dict, Optional, Union from snowplow_tracker.contracts import one_of, greater_than -from snowplow_tracker.typing import SupportedPlatform, SUPPORTED_PLATFORMS +from snowplow_tracker.typing import SupportedPlatform, SUPPORTED_PLATFORMS, PayloadDict DEFAULT_PLATFORM = "pc" class Subject(object): """ - Class for an event subject, where we view events as of the form + Class for an event subject, where we view events as of the form - (Subject) -> (Verb) -> (Object) + (Subject) -> (Verb) -> (Object) """ - def __init__(self) -> None: - self.standard_nv_pairs = {"p": DEFAULT_PLATFORM} + def __init__(self) -> None: + self.standard_nv_pairs: Dict[str, Union[str, int]] = {"p": DEFAULT_PLATFORM} - def set_platform(self, value: SupportedPlatform) -> 'Subject': + def set_platform(self, value: SupportedPlatform) -> "Subject": """ - :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] - :type value: supported_platform - :rtype: subject + :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] + :type value: supported_platform + :rtype: subject """ one_of(value, SUPPORTED_PLATFORMS) self.standard_nv_pairs["p"] = value return self - def set_user_id(self, user_id: str) -> 'Subject': + def set_user_id(self, user_id: str) -> "Subject": """ - :param user_id: User ID - :type user_id: string - :rtype: subject + :param user_id: User ID + :type user_id: string + :rtype: subject """ self.standard_nv_pairs["uid"] = user_id return self - def set_screen_resolution(self, width: int, height: int) -> 'Subject': + def set_screen_resolution(self, width: int, height: int) -> "Subject": """ - :param width: Width of the screen - :param height: Height of the screen - :type width: int,>0 - :type height: int,>0 - :rtype: subject + :param width: Width of the screen + :param height: Height of the screen + :type width: int,>0 + :type height: int,>0 + :rtype: subject """ greater_than(width, 0) greater_than(height, 0) @@ -69,13 +66,13 @@ def set_screen_resolution(self, width: int, height: int) -> 'Subject': self.standard_nv_pairs["res"] = "".join([str(width), "x", str(height)]) return self - def set_viewport(self, width: int, height: int) -> 'Subject': + def set_viewport(self, width: int, height: int) -> "Subject": """ - :param width: Width of the viewport - :param height: Height of the viewport - :type width: int,>0 - :type height: int,>0 - :rtype: subject + :param width: Width of the viewport + :param height: Height of the viewport + :type width: int,>0 + :type height: int,>0 + :rtype: subject """ greater_than(width, 0) greater_than(height, 0) @@ -83,76 +80,109 @@ def set_viewport(self, width: int, height: int) -> 'Subject': self.standard_nv_pairs["vp"] = "".join([str(width), "x", str(height)]) return self - def set_color_depth(self, depth: int) -> 'Subject': + def set_color_depth(self, depth: int) -> "Subject": """ - :param depth: Depth of the color on the screen - :type depth: int - :rtype: subject + :param depth: Depth of the color on the screen + :type depth: int + :rtype: subject """ self.standard_nv_pairs["cd"] = depth return self - def set_timezone(self, timezone: str) -> 'Subject': + def set_timezone(self, timezone: str) -> "Subject": """ - :param timezone: Timezone as a string - :type timezone: string - :rtype: subject + :param timezone: Timezone as a string + :type timezone: string + :rtype: subject """ self.standard_nv_pairs["tz"] = timezone return self - def set_lang(self, lang: str) -> 'Subject': + def set_lang(self, lang: str) -> "Subject": """ - Set language. + Set language. - :param lang: Language the application is set to - :type lang: string - :rtype: subject + :param lang: Language the application is set to + :type lang: string + :rtype: subject """ self.standard_nv_pairs["lang"] = lang return self - def set_domain_user_id(self, duid: str) -> 'Subject': + def set_domain_user_id(self, duid: str) -> "Subject": """ - Set the domain user ID + Set the domain user ID - :param duid: Domain user ID - :type duid: string - :rtype: subject + :param duid: Domain user ID + :type duid: string + :rtype: subject """ self.standard_nv_pairs["duid"] = duid return self - def set_ip_address(self, ip: str) -> 'Subject': + def set_domain_session_id(self, sid: str) -> "Subject": + """ + Set the domain session ID + :param sid: Domain session ID + :type sid: string + :rtype: subject + """ + self.standard_nv_pairs["sid"] = sid + return self + + def set_domain_session_index(self, vid: int) -> "Subject": + """ + Set the domain session Index + :param vid: Domain session Index + :type vid: int + :rtype: subject + """ + self.standard_nv_pairs["vid"] = vid + return self + + def set_ip_address(self, ip: str) -> "Subject": """ - Set the domain user ID + Set the domain user ID - :param ip: IP address - :type ip: string - :rtype: subject + :param ip: IP address + :type ip: string + :rtype: subject """ self.standard_nv_pairs["ip"] = ip return self - def set_useragent(self, ua: str) -> 'Subject': + def set_useragent(self, ua: str) -> "Subject": """ - Set the user agent + Set the user agent - :param ua: User agent - :type ua: string - :rtype: subject + :param ua: User agent + :type ua: string + :rtype: subject """ self.standard_nv_pairs["ua"] = ua return self - def set_network_user_id(self, nuid: str) -> 'Subject': + def set_network_user_id(self, nuid: str) -> "Subject": """ - Set the network user ID field - This overwrites the nuid field set by the collector + Set the network user ID field + This overwrites the nuid field set by the collector - :param nuid: Network user ID - :type nuid: string - :rtype: subject + :param nuid: Network user ID + :type nuid: string + :rtype: subject """ self.standard_nv_pairs["tnuid"] = nuid return self + + def combine_subject(self, subject: Optional["Subject"]) -> PayloadDict: + """ + Merges another instance of Subject, with self taking priority + :param subject Subject to update + :type subject subject + :rtype PayloadDict + + """ + if subject is not None: + return {**subject.standard_nv_pairs, **self.standard_nv_pairs} + + return self.standard_nv_pairs diff --git a/snowplow_tracker/test/CLAUDE.md b/snowplow_tracker/test/CLAUDE.md new file mode 100644 index 00000000..08d0b042 --- /dev/null +++ b/snowplow_tracker/test/CLAUDE.md @@ -0,0 +1,365 @@ +# Snowplow Python Tracker Tests - CLAUDE.md + +## Directory Overview + +The `test/` directory contains comprehensive test suites for the Snowplow Python Tracker. Tests are organized into unit tests (isolated component testing) and integration tests (end-to-end collector communication). The test suite uses pytest and unittest.mock for mocking, with freezegun for time-based testing. + +## Test Organization + +``` +test/ +├── unit/ # Isolated component tests +│ ├── test_tracker.py # Tracker class tests +│ ├── test_emitters.py # Emitter functionality +│ ├── test_event.py # Base event class +│ ├── test_payload.py # Payload construction +│ ├── test_contracts.py # Validation logic +│ └── test_*.py # Other component tests +└── integration/ # End-to-end tests + └── test_integration.py # Collector communication +``` + +## Core Testing Patterns + +### Mock Pattern for Emitters +```python +# ✅ Mock emitter for isolated tracker testing +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_tracker_tracks_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(page_url="test.com")) + mock_emitter.input.assert_called_once() + +# ❌ Don't test with real network calls in unit tests +def test_tracker(): + emitter = Emitter("https://real-collector.com") +``` + +### Contract Testing Pattern +```python +# ✅ Use ContractsDisabled context manager +class ContractsDisabled: + def __enter__(self): + disable_contracts() + def __exit__(self, type, value, traceback): + enable_contracts() + +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) + +# ❌ Don't disable contracts globally +disable_contracts() +# ... rest of test file +``` + +### Time-Based Testing Pattern +```python +# ✅ Use freezegun for deterministic timestamps +from freezegun import freeze_time + +@freeze_time("2024-01-01 12:00:00") +def test_event_timestamp(): + event = PageView(page_url="test.com") + # Timestamp will be consistent + +# ❌ Don't use actual system time +import time +timestamp = time.time() # Non-deterministic +``` + +### UUID Mocking Pattern +```python +# ✅ Mock UUID generation for predictable IDs +@mock.patch('snowplow_tracker.tracker.Tracker.get_uuid') +def test_event_id(mock_uuid): + mock_uuid.return_value = "test-uuid-123" + tracker.track(event) + assert payload["eid"] == "test-uuid-123" + +# ❌ Don't rely on random UUIDs +event_id = tracker.get_uuid() # Different each run +``` + +## Unit Test Patterns + +### Payload Testing +```python +# ✅ Test payload field presence and values +def test_payload_construction(): + payload = Payload() + payload.add("e", "pv") + payload.add("url", "https://test.com") + + result = payload.get() + assert result["e"] == "pv" + assert result["url"] == "https://test.com" + +# ✅ Test JSON encoding +def test_payload_json_encoding(): + payload.add_json({"key": "value"}, True, "cx", "co") + assert "cx" in payload.get() # Base64 encoded +``` + +### Event Testing +```python +# ✅ Test event construction with all parameters +def test_page_view_complete(): + context = SelfDescribingJson(schema, data) + subject = Subject() + + event = PageView( + page_url="https://test.com", + page_title="Test", + context=[context], + event_subject=subject, + true_timestamp=1234567890 + ) + + assert event.page_url == "https://test.com" + assert len(event.context) == 1 + +# ❌ Don't test internal implementation details +def test_private_methods(): + event._internal_method() # Testing private methods +``` + +### Emitter Testing +```python +# ✅ Mock HTTP requests for emitter tests +@mock.patch('requests.post') +def test_emitter_sends_events(mock_post): + mock_post.return_value.status_code = 200 + + emitter = Emitter("https://collector.test") + emitter.input({"e": "pv"}) + emitter.flush() + + mock_post.assert_called_once() + +# ✅ Test retry logic +def test_emitter_retry_on_failure(mock_post): + mock_post.return_value.status_code = 500 + emitter.custom_retry_codes = {500: True} + # Verify retry behavior +``` + +### Contract Validation Testing +```python +# ✅ Test validation rules +def test_non_empty_string_validation(): + with self.assertRaises(ValueError): + non_empty_string("") + + non_empty_string("valid") # Should not raise + +# ✅ Test form element validation +def test_form_element_contract(): + valid_element = { + "name": "field1", + "value": "test", + "nodeName": "INPUT", + "type": "text" + } + form_element(valid_element) # Should not raise +``` + +## Integration Test Patterns + +### Mock Collector Pattern +```python +# ✅ Use micro mock collector for integration tests +from http.server import HTTPServer, BaseHTTPRequestHandler + +class MockCollector(BaseHTTPRequestHandler): + def do_POST(self): + # Capture and validate payload + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + # Store for assertions + self.send_response(200) + +# Start mock collector in test +server = HTTPServer(('localhost', 9090), MockCollector) +``` + +### End-to-End Testing +```python +# ✅ Test complete tracking flow +def test_end_to_end_tracking(): + tracker = Snowplow.create_tracker( + namespace="test", + endpoint="http://localhost:9090" + ) + + # Track multiple events + tracker.track(PageView(page_url="test1.com")) + tracker.track(StructuredEvent("cat", "act")) + tracker.flush() + + # Verify collector received both events + assert len(received_events) == 2 +``` + +## Testing Best Practices + +### Test Isolation +```python +# ✅ Clean up after each test +def setUp(self): + Snowplow.reset() # Clear all trackers + +def tearDown(self): + # Clean up any test artifacts + if hasattr(self, 'server'): + self.server.shutdown() + +# ❌ Don't leave state between tests +class TestSuite: + shared_tracker = Tracker(...) # Shared state! +``` + +### Assertion Patterns +```python +# ✅ Use specific assertions +assert event.page_url == "https://expected.com" +assert "e" in payload.get() +mock_func.assert_called_with(expected_arg) + +# ❌ Avoid generic assertions +assert event # Too vague +assert payload.get() # What are we checking? +``` + +### Mock Management +```python +# ✅ Use patch decorators or context managers +@mock.patch('snowplow_tracker.tracker.uuid.uuid4') +def test_with_mock(mock_uuid): + mock_uuid.return_value = "test-id" + +# ✅ Clean up patches +def create_patch(self, name): + patcher = mock.patch(name) + thing = patcher.start() + self.addCleanup(patcher.stop) + return thing +``` + +## Common Test Scenarios + +### Testing Event Contexts +```python +# ✅ Test context encoding and attachment +def test_event_with_multiple_contexts(): + contexts = [ + SelfDescribingJson(schema1, data1), + SelfDescribingJson(schema2, data2) + ] + event = PageView(page_url="test", context=contexts) + + payload = event.build_payload(True, None, None) + cx_data = json.loads(base64.b64decode(payload.get()["cx"])) + assert len(cx_data["data"]) == 2 +``` + +### Testing Failure Scenarios +```python +# ✅ Test failure callbacks +def test_emitter_failure_callback(): + failed_events = [] + + def on_failure(count, events): + failed_events.extend(events) + + emitter = Emitter( + "https://invalid.collector", + on_failure=on_failure + ) + # Trigger failure and verify callback +``` + +### Testing Async Behavior +```python +# ✅ Test async emitter threading +def test_async_emitter(): + emitter = AsyncEmitter("https://collector.test") + + # Track events + for i in range(100): + emitter.input({"e": "pv", "url": f"test{i}.com"}) + + # Wait for flush + emitter.flush() + time.sleep(1) # Allow async processing + + # Verify all events sent +``` + +## Test Utilities + +### Helper Functions +```python +# ✅ Create reusable test helpers +def create_test_tracker(namespace="test"): + emitter = mock.MagicMock() + return Tracker(namespace, emitter) + +def create_test_event(): + return PageView(page_url="https://test.com") + +# ❌ Don't duplicate test setup +def test_one(): + emitter = mock.MagicMock() + tracker = Tracker("test", emitter) + # ... repeated in every test +``` + +## Performance Testing + +### Load Testing Pattern +```python +# ✅ Test tracker under load +def test_high_volume_tracking(): + tracker = create_test_tracker() + + start = time.time() + for i in range(10000): + tracker.track(PageView(page_url=f"test{i}.com")) + + duration = time.time() - start + assert duration < 5.0 # Performance threshold +``` + +## Quick Reference + +### Test File Naming +- Unit tests: `test_.py` +- Integration tests: `test_integration_.py` +- Test classes: `Test` +- Test methods: `test_` + +### Essential Test Imports +```python +import unittest +import unittest.mock as mock +from freezegun import freeze_time +from snowplow_tracker.contracts import ContractsDisabled +``` + +### Common Mock Targets +- `snowplow_tracker.tracker.Tracker.get_uuid` +- `requests.post` / `requests.get` +- `time.time` +- `snowplow_tracker.emitters.Emitter.sync_flush` + +## Contributing to test/CLAUDE.md + +When adding or modifying tests: + +1. **Maintain test isolation** - Each test should be independent +2. **Mock external dependencies** - No real network calls in unit tests +3. **Use descriptive test names** - Clear what is being tested +4. **Test both success and failure paths** - Include edge cases +5. **Keep tests fast** - Mock time-consuming operations +6. **Document complex test scenarios** - Add comments for clarity \ No newline at end of file diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index cea52a46..57b1a58c 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -1,7 +1,7 @@ # """ # test_integration.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest @@ -31,14 +27,13 @@ from snowplow_tracker import tracker, _version, emitters, subject from snowplow_tracker.self_describing_json import SelfDescribingJson -from snowplow_tracker.redis import redis_emitter querystrings = [""] -default_emitter = emitters.Emitter("localhost", protocol="http", port=80) +default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=1) -post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=1) +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method="get") default_subject = subject.Subject() @@ -53,155 +48,293 @@ def from_querystring(field: str, url: str) -> Optional[str]: @all_requests def pass_response_content(url: str, request: Any) -> Dict[str, Any]: querystrings.append(request.url) - return { - "url": request.url, - "status_code": 200 - } + return {"url": request.url, "status_code": 200} @all_requests def pass_post_response_content(url: str, request: Any) -> Dict[str, Any]: querystrings.append(json.loads(request.body)) - return { - "url": request.url, - "status_code": 200 - } + return {"url": request.url, "status_code": 200} @all_requests def fail_response_content(url: str, request: Any) -> Dict[str, Any]: - return { - "url": request.url, - "status_code": 501 - } + return {"url": request.url, "status_code": 501} class IntegrationTest(unittest.TestCase): - def test_integration_page_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_page_view("http://savethearctic.org", "Save The Arctic", "http://referrer.com") - expected_fields = {"e": "pv", "page": "Save+The+Arctic", "url": "http%3A%2F%2Fsavethearctic.org", "refr": "http%3A%2F%2Freferrer.com"} + t.track_page_view( + "http://savethearctic.org", "Save The Arctic", "http://referrer.com" + ) + expected_fields = { + "e": "pv", + "page": "Save+The+Arctic", + "url": "http%3A%2F%2Fsavethearctic.org", + "refr": "http%3A%2F%2Freferrer.com", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_ecommerce_transaction_item(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_ecommerce_transaction_item("12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP") - expected_fields = {"ti_ca": "tarot", "ti_id": "12345", "ti_qu": "2", "ti_sk": "pbz0025", "e": "ti", "ti_nm": "black-tarot", "ti_pr": "7.99", "ti_cu": "GBP"} + t.track_ecommerce_transaction_item( + "12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP" + ) + expected_fields = { + "ti_ca": "tarot", + "ti_id": "12345", + "ti_qu": "2", + "ti_sk": "pbz0025", + "e": "ti", + "ti_nm": "black-tarot", + "ti_pr": "7.99", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_ecommerce_transaction(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction( - "6a8078be", 35, city="London", currency="GBP", + order_id="6a8078be", + total_value=35, + city="London", + currency="GBP", items=[ - { - "sku": "pbz0026", - "price": 20, - "quantity": 1 - }, - { - "sku": "pbz0038", - "price": 15, - "quantity": 1 - }]) + {"sku": "pbz0026", "price": 20, "quantity": 1}, + {"sku": "pbz0038", "price": 15, "quantity": 1}, + ], + tstamp=1399021242240, + ) - expected_fields = {"e": "tr", "tr_id": "6a8078be", "tr_tt": "35", "tr_ci": "London", "tr_cu": "GBP"} + expected_fields = { + "e": "tr", + "tr_id": "6a8078be", + "tr_tt": "35", + "tr_ci": "London", + "tr_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-3]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-3]), expected_fields[key] + ) - expected_fields = {"e": "ti", "ti_id": "6a8078be", "ti_sk": "pbz0026", "ti_pr": "20", "ti_cu": "GBP"} + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0026", + "ti_pr": "20", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-2]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-2]), expected_fields[key] + ) - expected_fields = {"e": "ti", "ti_id": "6a8078be", "ti_sk": "pbz0038", "ti_pr": "15", "ti_cu": "GBP"} + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0038", + "ti_pr": "15", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) - - self.assertEqual(from_querystring("ttm", querystrings[-3]), from_querystring("ttm", querystrings[-2])) - - def test_integration_screen_view(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + self.assertEqual( + from_querystring("ttm", querystrings[-3]), + from_querystring("ttm", querystrings[-2]), + ) + + def test_integration_mobile_screen_view(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - t.track_screen_view("Game HUD 2", id_="534") + t.track_mobile_screen_view(id_="534", name="Game HUD 2") expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = from_querystring("ue_pr", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": { - "schema": "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0", + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data": { - "name": "Game HUD 2", - "id": "534" - } - } - }) + "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", + "data": {"id": "534", "name": "Game HUD 2"}, + }, + }, + ) def test_integration_struct_event(self) -> None: - t = tracker.Tracker([default_emitter], default_subject) + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_struct_event("Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99) - expected_fields = {"se_ca": "Ecomm", "se_pr": "hd", "se_la": "dog-skateboarding-video", "se_va": "13.99", "se_ac": "add-to-basket", "e": "se"} + t.track_struct_event( + "Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99 + ) + expected_fields = { + "se_ca": "Ecomm", + "se_pr": "hd", + "se_la": "dog-skateboarding-video", + "se_va": "13.99", + "se_ac": "add-to-basket", + "e": "se", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) - - def test_integration_unstruct_event_non_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + def test_integration_self_describing_event_non_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = from_querystring("ue_pr", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} - }) - - def test_integration_unstruct_event_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) + + def test_integration_self_describing_event_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=True + ) with HTTMock(pass_response_content): - t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = unquote_plus(from_querystring("ue_px", querystrings[-1])) - envelope = json.loads((base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode("utf-8")) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} - }) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) def test_integration_context_non_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) envelope_string = from_querystring("co", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", - "data": [{"schema": "iglu:com.example/user/jsonschema/2-0-3", "data": {"user_type": "tester"}}] - }) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) def test_integration_context_base64(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=True + ) with HTTMock(pass_response_content): - t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) envelope_string = unquote_plus(from_querystring("cx", querystrings[-1])) - envelope = json.loads((base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode("utf-8")) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", - "data": [{"schema": "iglu:com.example/user/jsonschema/2-0-3", "data": {"user_type": "tester"}}] - }) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) def test_integration_standard_nv_pairs(self) -> None: s = subject.Subject() @@ -212,86 +345,95 @@ def test_integration_standard_nv_pairs(self) -> None: s.set_timezone("Europe London") s.set_lang("en") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker( + "cf", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") - expected_fields = {"tna": "cf", "res": "100x200", - "lang": "en", "aid": "angry-birds-android", "cd": "24", "tz": "Europe+London", - "p": "mob", "tv": "py-" + _version.__version__} + expected_fields = { + "tna": "cf", + "res": "100x200", + "lang": "en", + "aid": "angry-birds-android", + "cd": "24", + "tz": "Europe+London", + "p": "mob", + "tv": "py-" + _version.__version__, + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) self.assertIsNotNone(from_querystring("eid", querystrings[-1])) self.assertIsNotNone(from_querystring("dtm", querystrings[-1])) def test_integration_identification_methods(self) -> None: s = subject.Subject() s.set_domain_user_id("4616bfb38f872d16") + s.set_domain_session_id("59ed13b1a5724dae") + s.set_domain_session_index(1) s.set_ip_address("255.255.255.255") - s.set_useragent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)") + s.set_useragent( + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)" + ) s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker( + "cf", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = { "duid": "4616bfb38f872d16", + "sid": "59ed13b1a5724dae", + "vid": "1", "ip": "255.255.255.255", "ua": "Mozilla%2F5.0+%28compatible%3B+MSIE+9.0%3B+Windows+NT+6.0%3B+Trident%2F5.0%29", - "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5" + "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5", } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_event_subject(self) -> None: s = subject.Subject() s.set_domain_user_id("4616bfb38f872d16") - s.set_ip_address("255.255.255.255") - - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") - evSubject = subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") + s.set_lang("ES") + + t = tracker.Tracker( + "namespace", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) + evSubject = ( + subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", event_subject=evSubject) - expected_fields = { - "duid": "1111aaa11a111a11", - "lang": "EN" - } + expected_fields = {"duid": "1111aaa11a111a11", "lang": "EN"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) - - def test_integration_redis_default(self) -> None: - try: - import fakeredis - r = fakeredis.FakeStrictRedis() - t = tracker.Tracker([redis_emitter.RedisEmitter(rdb=r)], default_subject) - t.track_page_view("http://www.example.com") - event_string = r.rpop("snowplow") - event_dict = json.loads(event_string.decode("utf-8")) - self.assertEqual(event_dict["e"], "pv") - except ImportError: - with pytest.raises(RuntimeError): - redis_emitter.RedisEmitter() - - def test_integration_redis_custom(self) -> None: - try: - import fakeredis - r = fakeredis.FakeStrictRedis() - t = tracker.Tracker([redis_emitter.RedisEmitter(rdb=r, key="custom_key")], default_subject) - t.track_page_view("http://www.example.com") - event_string = r.rpop("custom_key") - event_dict = json.loads(event_string.decode("utf-8")) - self.assertEqual(event_dict["e"], "pv") - except ImportError: - with pytest.raises(RuntimeError): - redis_emitter.RedisEmitter("arg", key="kwarg") + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_success_callback(self) -> None: callback_success_queue = [] callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", + method="get", on_success=lambda x: callback_success_queue.append(x), - on_failure=lambda x, y: callback_failure_queue.append(x)) - t = tracker.Tracker([callback_emitter], default_subject) + on_failure=lambda x, y: callback_failure_queue.append(x), + ) + t = tracker.Tracker("namespace", [callback_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view("http://www.example.com") expected = { @@ -308,27 +450,34 @@ def test_integration_failure_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", + method="get", on_success=lambda x: callback_success_queue.append(x), - on_failure=lambda x, y: callback_failure_queue.append(x)) - t = tracker.Tracker([callback_emitter], default_subject) + on_failure=lambda x, y: callback_failure_queue.append(x), + ) + t = tracker.Tracker("namespace", [callback_emitter], default_subject) with HTTMock(fail_response_content): t.track_page_view("http://www.example.com") self.assertEqual(callback_success_queue, []) self.assertEqual(callback_failure_queue[0], 0) def test_post_page_view(self) -> None: - t = tracker.Tracker([post_emitter], default_subject) + t = tracker.Tracker("namespace", [default_emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "local host", None) expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} request = querystrings[-1] - self.assertEqual(request["schema"], "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4") + self.assertEqual( + request["schema"], + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4", + ) for key in expected_fields: self.assertEqual(request["data"][0][key], expected_fields[key]) def test_post_batched(self) -> None: - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=2) - t = tracker.Tracker(post_emitter, default_subject) + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=2 + ) + t = tracker.Tracker("namespace", default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") t.track_struct_event("Test", "B") @@ -337,8 +486,8 @@ def test_post_batched(self) -> None: @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 def test_timestamps(self) -> None: - emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=3) - t = tracker.Tracker([emitter], default_subject) + emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=3) + t = tracker.Tracker("namespace", [emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "stamp0", None, tstamp=None) t.track_page_view("localhost", "stamp1", None, tstamp=1358933694000) @@ -347,65 +496,82 @@ def test_timestamps(self) -> None: expected_timestamps = [ {"dtm": "1618790401000", "ttm": None, "stm": "1618790401000"}, {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, - {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"} + {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, ] request = querystrings[-1] for i, event in enumerate(expected_timestamps): - self.assertEqual(request["data"][i].get("dtm"), expected_timestamps[i]["dtm"]) - self.assertEqual(request["data"][i].get("ttm"), expected_timestamps[i]["ttm"]) - self.assertEqual(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) + self.assertEqual( + request["data"][i].get("dtm"), expected_timestamps[i]["dtm"] + ) + self.assertEqual( + request["data"][i].get("ttm"), expected_timestamps[i]["ttm"] + ) + self.assertEqual( + request["data"][i].get("stm"), expected_timestamps[i]["stm"] + ) def test_bytelimit(self) -> None: - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=5, byte_limit=420) - t = tracker.Tracker(post_emitter, default_subject) + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=5, byte_limit=459 + ) + t = tracker.Tracker("namespace", default_emitter, default_subject) with HTTMock(pass_post_response_content): - t.track_struct_event("Test", "A") # 140 bytes - t.track_struct_event("Test", "A") # 280 bytes - t.track_struct_event("Test", "A") # 420 bytes. Send - t.track_struct_event("Test", "AA") # 141 + t.track_struct_event("Test", "A") # 153 bytes + t.track_struct_event("Test", "A") # 306 bytes + t.track_struct_event("Test", "A") # 459 bytes. Send + t.track_struct_event("Test", "AA") # 154 + self.assertEqual(len(querystrings[-1]["data"]), 3) - self.assertEqual(post_emitter.bytes_queued, 136 + len(_version.__version__)) + self.assertEqual(default_emitter.bytes_queued, 156 + len(_version.__version__)) def test_unicode_get(self) -> None: - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) - unicode_a = u'\u0107' - unicode_b = u'test.\u0107om' - test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) with HTTMock(pass_response_content): t.track_page_view(unicode_b, context=[test_ctx]) - t.track_screen_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) url_string = unquote_plus(from_querystring("url", querystrings[-2])) try: - self.assertEqual(url_string.decode('utf-8'), unicode_b) + self.assertEqual(url_string.decode("utf-8"), unicode_b) except AttributeError: # in python 3: str type contains unicode (so no 'decode') self.assertEqual(url_string, unicode_b) context_string = unquote_plus(from_querystring("co", querystrings[-1])) - actual_a = json.loads(context_string)['data'][0]['data']['test'] + actual_a = json.loads(context_string)["data"][0]["data"]["test"] self.assertEqual(actual_a, unicode_a) uepr_string = unquote_plus(from_querystring("ue_pr", querystrings[-1])) - actual_b = json.loads(uepr_string)['data']['data']['name'] + actual_b = json.loads(uepr_string)["data"]["data"]["name"] self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: - t = tracker.Tracker([post_emitter], default_subject, encode_base64=False) - unicode_a = u'\u0107' - unicode_b = u'test.\u0107om' - test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) + t = tracker.Tracker( + "namespace", [default_emitter], default_subject, encode_base64=False + ) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) with HTTMock(pass_post_response_content): t.track_page_view(unicode_b, context=[test_ctx]) - t.track_screen_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) pv_event = querystrings[-2] - self.assertEqual(pv_event['data'][0]['url'], unicode_b) + self.assertEqual(pv_event["data"][0]["url"], unicode_b) - in_test_ctx = json.loads(pv_event['data'][0]['co'])['data'][0]['data']['test'] + in_test_ctx = json.loads(pv_event["data"][0]["co"])["data"][0]["data"]["test"] self.assertEqual(in_test_ctx, unicode_a) sv_event = querystrings[-1] - in_uepr_name = json.loads(sv_event['data'][0]['ue_pr'])['data']['data']['name'] + in_uepr_name = json.loads(sv_event["data"][0]["ue_pr"])["data"]["data"]["name"] self.assertEqual(in_uepr_name, unicode_b) diff --git a/snowplow_tracker/test/unit/test_contracts.py b/snowplow_tracker/test/unit/test_contracts.py index 98c4a154..9a913bca 100644 --- a/snowplow_tracker/test/unit/test_contracts.py +++ b/snowplow_tracker/test/unit/test_contracts.py @@ -1,7 +1,7 @@ # """ # test_tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,19 +13,21 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest -from snowplow_tracker.contracts import form_element, greater_than, non_empty, non_empty_string, one_of, satisfies +from snowplow_tracker.contracts import ( + form_element, + greater_than, + non_empty, + non_empty_string, + one_of, + satisfies, +) class TestContracts(unittest.TestCase): - def setUp(self) -> None: pass @@ -37,25 +39,25 @@ def test_greater_than_fails(self) -> None: greater_than(0, 10) def test_non_empty_succeeds(self) -> None: - non_empty(['something']) + non_empty(["something"]) def test_non_empty_fails(self) -> None: with self.assertRaises(ValueError): non_empty([]) def test_non_empty_string_succeeds(self) -> None: - non_empty_string('something') + non_empty_string("something") def test_non_empty_string_fails(self) -> None: with self.assertRaises(ValueError): - non_empty_string('') + non_empty_string("") def test_one_of_succeeds(self) -> None: - one_of('something', ['something', 'something else']) + one_of("something", ["something", "something else"]) def test_one_of_fails(self) -> None: with self.assertRaises(ValueError): - one_of('something', ['something else']) + one_of("something", ["something else"]) def test_satisfies_succeeds(self) -> None: satisfies(10, lambda v: v == 10) @@ -65,11 +67,7 @@ def test_satisfies_fails(self) -> None: satisfies(0, lambda v: v == 10) def test_form_element_no_type(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue", - "nodeName": "INPUT" - } + elem = {"name": "elemName", "value": "elemValue", "nodeName": "INPUT"} form_element(elem) def test_form_element_type_valid(self) -> None: @@ -77,7 +75,7 @@ def test_form_element_type_valid(self) -> None: "name": "elemName", "value": "elemValue", "nodeName": "TEXTAREA", - "type": "button" + "type": "button", } form_element(elem) @@ -86,40 +84,27 @@ def test_form_element_type_invalid(self) -> None: "name": "elemName", "value": "elemValue", "nodeName": "SELECT", - "type": "invalid" + "type": "invalid", } with self.assertRaises(ValueError): form_element(elem) def test_form_element_nodename_invalid(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue", - "nodeName": "invalid" - } + elem = {"name": "elemName", "value": "elemValue", "nodeName": "invalid"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_nodename(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue" - } + elem = {"name": "elemName", "value": "elemValue"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_value(self) -> None: - elem = { - "name": "elemName", - "nodeName": "INPUT" - } + elem = {"name": "elemName", "nodeName": "INPUT"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_name(self) -> None: - elem = { - "value": "elemValue", - "nodeName": "INPUT" - } + elem = {"value": "elemValue", "nodeName": "INPUT"} with self.assertRaises(ValueError): form_element(elem) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 0167525a..f02be943 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -1,7 +1,7 @@ # """ # test_emitters.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,13 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ - import time import unittest import unittest.mock as mock @@ -47,158 +42,181 @@ def mocked_http_failure(*args: Any) -> bool: return False -class TestEmitters(unittest.TestCase): +def mocked_http_response_success(*args: Any) -> int: + return 200 + + +def mocked_http_response_failure(*args: Any) -> int: + return 400 + +def mocked_http_response_failure_retry(*args: Any) -> int: + return 500 + + +class TestEmitters(unittest.TestCase): def setUp(self) -> None: pass def test_init(self) -> None: - e = Emitter('0.0.0.0') - self.assertEqual(e.endpoint, 'http://0.0.0.0/i') - self.assertEqual(e.method, 'get') - self.assertEqual(e.buffer_size, 1) - self.assertEqual(e.buffer, []) + e = Emitter("0.0.0.0") + self.assertEqual( + e.endpoint, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2" + ) + self.assertEqual(e.method, "post") + self.assertEqual(e.batch_size, 10) + self.assertEqual(e.event_store.event_buffer, []) self.assertIsNone(e.byte_limit) self.assertIsNone(e.bytes_queued) self.assertIsNone(e.on_success) self.assertIsNone(e.on_failure) - self.assertIsNone(e.timer) + self.assertFalse(e.timer.is_active()) self.assertIsNone(e.request_timeout) - def test_init_buffer_size(self) -> None: - e = Emitter('0.0.0.0', buffer_size=10) - self.assertEqual(e.buffer_size, 10) + def test_init_batch_size(self) -> None: + e = Emitter("0.0.0.0", batch_size=10) + self.assertEqual(e.batch_size, 10) def test_init_post(self) -> None: - e = Emitter('0.0.0.0', method="post") - self.assertEqual(e.buffer_size, DEFAULT_MAX_LENGTH) + e = Emitter("0.0.0.0") + self.assertEqual(e.batch_size, DEFAULT_MAX_LENGTH) def test_init_byte_limit(self) -> None: - e = Emitter('0.0.0.0', byte_limit=512) + e = Emitter("0.0.0.0", byte_limit=512) self.assertEqual(e.bytes_queued, 0) def test_init_requests_timeout(self) -> None: - e = Emitter('0.0.0.0', request_timeout=(2.5, 5)) + e = Emitter("0.0.0.0", request_timeout=(2.5, 5)) self.assertEqual(e.request_timeout, (2.5, 5)) def test_as_collector_uri(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0') - self.assertEqual(uri, 'http://0.0.0.0/i') + uri = Emitter.as_collector_uri("0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") - def test_as_collector_uri_post(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', method="post") - self.assertEqual(uri, 'http://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + def test_as_collector_uri_get(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0", method="get") + self.assertEqual(uri, "https://0.0.0.0/i") def test_as_collector_uri_port(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', port=9090, method="post") - self.assertEqual(uri, 'http://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') + uri = Emitter.as_collector_uri("0.0.0.0", port=9090) + self.assertEqual(uri, "https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2") - def test_as_collector_uri_https(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', protocol="https") - self.assertEqual(uri, 'https://0.0.0.0/i') + def test_as_collector_uri_http(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0", protocol="http") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_empty_string(self) -> None: with self.assertRaises(ValueError): - Emitter.as_collector_uri('') + Emitter.as_collector_uri("") + + def test_as_collector_uri_endpoint_protocol(self) -> None: + uri = Emitter.as_collector_uri("https://0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") - @mock.patch('snowplow_tracker.Emitter.flush') + def test_as_collector_uri_endpoint_protocol_http(self) -> None: + uri = Emitter.as_collector_uri("http://0.0.0.0") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_no_flush(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2) + e = Emitter("0.0.0.0", method="get", batch_size=2) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) self.assertIsNone(e.byte_limit) self.assertFalse(e.reached_limit()) mok_flush.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_flush_byte_limit(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=16) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=16) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_flush_buffer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=1024) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) + self.assertEqual(len(e.event_store.event_buffer), 1) self.assertFalse(e.reached_limit()) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) nextPairs = {"n0": "v0"} e.input(nextPairs) # since we mock flush, the buffer is not empty - self.assertEqual(e.buffer, [nvPairs, nextPairs]) + self.assertEqual(e.event_store.event_buffer, [nvPairs, nextPairs]) self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_bytes_queued(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=1024) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) + self.assertEqual(len(e.event_store.event_buffer), 1) self.assertEqual(e.bytes_queued, 24) e.input(nvPairs) self.assertEqual(e.bytes_queued, 48) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_bytes_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="post") + e = Emitter("0.0.0.0") nvPairs = {"testString": "test", "testNum": 2.72} e.input(nvPairs) - self.assertEqual(e.buffer, [{"testString": "test", "testNum": "2.72"}]) + self.assertEqual( + e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}] + ) - @mock.patch('snowplow_tracker.Emitter.send_events') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_flush(self, mok_send_events: Any) -> None: - mok_send_events.side_effect = mocked_send_events + mok_send_events.side_effect = mocked_http_response_success - e = Emitter('0.0.0.0', buffer_size=2, byte_limit=None) + e = Emitter("0.0.0.0", batch_size=2, byte_limit=None) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) self.assertEqual(mok_send_events.call_count, 1) - self.assertEqual(len(e.buffer), 0) + self.assertEqual(len(e.event_store.event_buffer), 0) - @mock.patch('snowplow_tracker.Emitter.send_events') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_flush_bytes_queued(self, mok_send_events: Any) -> None: - mok_send_events.side_effect = mocked_send_events + mok_send_events.side_effect = mocked_http_response_success - e = Emitter('0.0.0.0', buffer_size=2, byte_limit=256) + e = Emitter("0.0.0.0", batch_size=2, byte_limit=256) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) self.assertEqual(mok_send_events.call_count, 1) - self.assertEqual(len(e.buffer), 0) + self.assertEqual(len(e.event_store.event_buffer), 0) self.assertEqual(e.bytes_queued, 0) @freeze_time("2021-04-14 00:00:02") # unix: 1618358402000 def test_attach_sent_tstamp(self) -> None: - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.attach_sent_timestamp(ev_list) @@ -207,171 +225,213 @@ def test_attach_sent_tstamp(self) -> None: reduced = reduced and "stm" in ev.keys() and ev["stm"] == "1618358402000" self.assertTrue(reduced) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_flush_timer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="post", buffer_size=10) + e = Emitter("0.0.0.0", batch_size=10) ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] for i in ev_list: e.input(i) e.set_flush_timer(3) - self.assertEqual(len(e.buffer), 3) + self.assertEqual(len(e.event_store.event_buffer), 3) time.sleep(5) - self.assertEqual(mok_flush.call_count, 1) + self.assertGreaterEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_success(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_success + mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_failure(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_failure + mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_once_with(0, evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_success(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_success + mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_failure(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_failure + mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_with(0, evBuffer) - @mock.patch('snowplow_tracker.emitters.requests.post') + @mock.patch("snowplow_tracker.emitters.requests.post") def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0') - post_succeeded = e.http_post("dummy_string") + e = Emitter("0.0.0.0") + response = e.http_post("dummy_string") + post_succeeded = Emitter.is_good_status_code(response) self.assertFalse(post_succeeded) - @mock.patch('snowplow_tracker.emitters.requests.post') + @mock.patch("snowplow_tracker.emitters.requests.post") def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0') - get_succeeded = e.http_get({"a": "b"}) - + e = Emitter("0.0.0.0", method="get") + response = e.http_get({"a": "b"}) + get_succeeded = Emitter.is_good_status_code(response) self.assertFalse(get_succeeded) ### # AsyncEmitter ### - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_async_emitter_input(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", buffer_size=3, thread_count=5) + ae = AsyncEmitter( + "0.0.0.0", port=9090, method="get", batch_size=3, thread_count=5 + ) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) ae.input({"b": "bb"}) - self.assertEqual(len(ae.buffer), 2) + self.assertEqual(len(ae.event_store.event_buffer), 2) self.assertTrue(ae.queue.empty()) mok_flush.assert_not_called() ae.input({"c": "cc"}) # meet buffer size self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.AsyncEmitter.send_events') + @mock.patch("snowplow_tracker.AsyncEmitter.send_events") def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", buffer_size=3, thread_count=5, byte_limit=1024) + ae = AsyncEmitter( + "0.0.0.0", + port=9090, + method="get", + batch_size=3, + thread_count=5, + byte_limit=1024, + ) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) ae.input({"b": "bb"}) - self.assertEqual(len(ae.buffer), 2) + self.assertEqual(len(ae.event_store.event_buffer), 2) self.assertTrue(ae.queue.empty()) mok_send_events.assert_not_called() ae.sync_flush() - self.assertEqual(len(ae.buffer), 0) + self.assertEqual(len(ae.event_store.event_buffer), 0) self.assertEqual(ae.bytes_queued, 0) self.assertEqual(mok_send_events.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_async_send_events_get_success(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_success + mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_failure + mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_once_with(0, evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_async_send_events_post_success(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_success + mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_failure + mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', method="post", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -379,24 +439,147 @@ def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: mok_failure.assert_called_with(0, evBuffer) # Unicode - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_input_unicode_get(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=2) + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", method="get", batch_size=2) ae.input(payload) - self.assertEqual(len(ae.buffer), 1) - self.assertDictEqual(payload, ae.buffer[0]) + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_input_unicode_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="post", buffer_size=2) + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", batch_size=2) ae.input(payload) - self.assertEqual(len(ae.buffer), 1) - self.assertDictEqual(payload, ae.buffer[0]) + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_no_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_custom_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_custom_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) diff --git a/snowplow_tracker/test/unit/test_event.py b/snowplow_tracker/test/unit/test_event.py new file mode 100644 index 00000000..e50da98d --- /dev/null +++ b/snowplow_tracker/test/unit/test_event.py @@ -0,0 +1,72 @@ +# """ +# test_event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import json +import unittest +from snowplow_tracker.events import Event +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson + +CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" + + +class TestEvent(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event = Event() + self.assertEqual(event.payload.nv_pairs, {}) + + def test_build_payload(self): + event_subject = Subject() + event = Event(event_subject=event_subject) + payload = event.build_payload(encode_base64=None, json_encoder=None) + + self.assertEqual(payload.nv_pairs, {"p": "pc"}) + + def test_build_payload_tstamp(self): + event_subject = Subject() + tstamp = 1399021242030 + + event = Event(event_subject=event_subject, true_timestamp=tstamp) + + payload = event.build_payload( + json_encoder=None, + encode_base64=None, + ) + + self.assertEqual(payload.nv_pairs, {"p": "pc", "ttm": 1399021242030}) + + def test_build_payload_context(self): + event_subject = Subject() + context = SelfDescribingJson("test.context.schema", {"user": "tester"}) + event_context = [context] + event = Event(event_subject=event_subject, context=event_context) + + payload = event.build_payload( + json_encoder=None, + encode_base64=False, + ) + + expected_context = { + "schema": CONTEXT_SCHEMA, + "data": [{"schema": "test.context.schema", "data": {"user": "tester"}}], + } + actual_context = json.loads(payload.nv_pairs["co"]) + + self.assertDictEqual(actual_context, expected_context) diff --git a/snowplow_tracker/test/unit/test_in_memory_event_store.py b/snowplow_tracker/test/unit/test_in_memory_event_store.py new file mode 100644 index 00000000..93a0c8b2 --- /dev/null +++ b/snowplow_tracker/test/unit/test_in_memory_event_store.py @@ -0,0 +1,106 @@ +# """ +# test_in_memory_event_store.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import unittest +from snowplow_tracker.event_store import InMemoryEventStore +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class TestInMemoryEventStore(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event_store = InMemoryEventStore(logger) + self.assertEqual(event_store.buffer_capacity, 10000) + self.assertEqual(event_store.event_buffer, []) + + def test_add_event(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + self.assertDictEqual(nvPairs, event_store.event_buffer[0]) + + def test_size(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.size(), 3) + + def test_add_failed_events_to_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPair1 = {"n0": "v0", "n1": "v1"} + nvPair2 = {"n2": "v2", "n3": "v3"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + payload_list = event_store.get_events_batch() + + event_store.cleanup(payload_list, True) + + self.assertEqual(event_store.event_buffer, payload_list) + + def test_remove_success_events_from_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + payload_list = event_store.get_events_batch() + event_store.cleanup(payload_list, False) + + self.assertEqual(event_store.event_buffer, []) + + def test_drop_new_events_buffer_full(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPair1 = {"n0": "v0"} + nvPair2 = {"n1": "v1"} + nvPair3 = {"n2": "v2"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + event_store.add_event(nvPair3) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + def test_get_events(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPairs = {"n0": "v0"} + batch = [nvPairs, nvPairs] + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.get_events_batch(), batch) diff --git a/snowplow_tracker/test/unit/test_page_ping.py b/snowplow_tracker/test/unit/test_page_ping.py new file mode 100644 index 00000000..7539ce43 --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_ping.py @@ -0,0 +1,38 @@ +import pytest + +from snowplow_tracker.events.page_ping import PagePing + + +class TestPagePing: + def test_getters(self): + pp = PagePing("url", "title", "referrer", 1, 2, 3, 4) + assert pp.page_url == "url" + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + + def test_setters(self): + pp = PagePing("url") + pp.page_title = "title" + pp.referrer = "referrer" + pp.min_x = 1 + pp.max_x = 2 + pp.min_y = 3 + pp.max_y = 4 + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + assert pp.page_url == "url" + + def test_page_url_non_empty_string(self): + pp = PagePing("url") + pp.page_url = "new_url" + assert pp.page_url == "new_url" + with pytest.raises(ValueError): + pp.page_url = "" diff --git a/snowplow_tracker/test/unit/test_page_view.py b/snowplow_tracker/test/unit/test_page_view.py new file mode 100644 index 00000000..3736710c --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_view.py @@ -0,0 +1,27 @@ +import pytest + +from snowplow_tracker.events.page_view import PageView + + +class TestPageView: + def test_getters(self): + pv = PageView("url", "title", "referrer") + assert pv.page_url == "url" + assert pv.page_title == "title" + assert pv.referrer == "referrer" + + def test_setters(self): + pv = PageView("url", "title", "referrer") + pv.page_url = "new_url" + pv.page_title = "new_title" + pv.referrer = "new_referrer" + assert pv.page_url == "new_url" + assert pv.page_title == "new_title" + assert pv.referrer == "new_referrer" + + def test_page_url_non_empty_string(self): + pv = PageView("url") + pv.page_url = "new_url" + assert pv.page_url == "new_url" + with pytest.raises(ValueError): + pv.page_url = "" diff --git a/snowplow_tracker/test/unit/test_payload.py b/snowplow_tracker/test/unit/test_payload.py index 6d89caa1..c174e8f4 100644 --- a/snowplow_tracker/test/unit/test_payload.py +++ b/snowplow_tracker/test/unit/test_payload.py @@ -1,7 +1,7 @@ # """ # test_payload.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json @@ -55,7 +51,6 @@ def date_encoder(o: Any) -> str: class TestPayload(unittest.TestCase): - def setUp(self) -> None: pass @@ -64,7 +59,12 @@ def test_object_generation(self) -> None: self.assertDictEqual({}, p.nv_pairs) def test_object_generation_2(self) -> None: - p = payload.Payload({"test1": "result1", "test2": "result2", }) + p = payload.Payload( + { + "test1": "result1", + "test2": "result2", + } + ) output = {"test1": "result1", "test2": "result2"} self.assertDictEqual(output, p.nv_pairs) @@ -72,7 +72,10 @@ def test_add(self) -> None: p = payload.Payload() p.add("name1", "value1") p.add("name2", "value2") - output = {"name1": "value1", "name2": "value2", } + output = { + "name1": "value1", + "name2": "value2", + } self.assertDictEqual(output, p.nv_pairs) def test_add_empty_val(self) -> None: @@ -88,51 +91,58 @@ def test_add_none(self) -> None: self.assertDictEqual(output, p.nv_pairs) def test_add_dict(self) -> None: - p = payload.Payload({"n1": "v1", "n2": "v2", }) - p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter + p = payload.Payload( + { + "n1": "v1", + "n2": "v2", + } + ) + p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter output = {"n1": "v1", "n2": "v2", "name3": 3, "name4": 4} self.assertDictEqual(output, p.nv_pairs) def test_add_json_empty(self) -> None: - p = payload.Payload({'name': 'value'}) + p = payload.Payload({"name": "value"}) input = {} - p.add_json(input, False, 'ue_px', 'ue_pr') - output = {'name': 'value'} + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} self.assertDictEqual(output, p.nv_pairs) def test_add_json_none(self) -> None: - p = payload.Payload({'name': 'value'}) + p = payload.Payload({"name": "value"}) input = None - p.add_json(input, False, 'ue_px', 'ue_pr') - output = {'name': 'value'} + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} self.assertDictEqual(output, p.nv_pairs) def test_add_json_encode_false(self) -> None: p = payload.Payload() - input = {'a': 1} - p.add_json(input, False, 'ue_px', 'ue_pr') - self.assertTrue('ue_pr' in p.nv_pairs.keys()) - self.assertFalse('ue_px' in p.nv_pairs.keys()) + input = {"a": 1} + p.add_json(input, False, "ue_px", "ue_pr") + self.assertTrue("ue_pr" in p.nv_pairs.keys()) + self.assertFalse("ue_px" in p.nv_pairs.keys()) def test_add_json_encode_true(self) -> None: p = payload.Payload() - input = {'a': 1} - p.add_json(input, True, 'ue_px', 'ue_pr') - self.assertFalse('ue_pr' in p.nv_pairs.keys()) - self.assertTrue('ue_px' in p.nv_pairs.keys()) + input = {"a": 1} + p.add_json(input, True, "ue_px", "ue_pr") + self.assertFalse("ue_pr" in p.nv_pairs.keys()) + self.assertTrue("ue_px" in p.nv_pairs.keys()) def test_add_json_unicode_encode_false(self) -> None: p = payload.Payload() - input = {'a': u'\u0107', u'\u0107': 'b'} - p.add_json(input, False, 'ue_px', 'ue_pr') + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, False, "ue_px", "ue_pr") ue_pr = json.loads(p.nv_pairs["ue_pr"]) self.assertDictEqual(input, ue_pr) def test_add_json_unicode_encode_true(self) -> None: p = payload.Payload() - input = {'a': '\u0107', '\u0107': 'b'} - p.add_json(input, True, 'ue_px', 'ue_pr') - ue_px = json.loads(base64.urlsafe_b64decode(p.nv_pairs["ue_px"]).decode('utf-8')) + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, True, "ue_px", "ue_pr") + ue_px = json.loads( + base64.urlsafe_b64decode(p.nv_pairs["ue_px"]).decode("utf-8") + ) self.assertDictEqual(input, ue_px) def test_add_json_with_custom_enc(self) -> None: @@ -148,5 +158,5 @@ def test_add_json_with_custom_enc(self) -> None: self.assertTrue(is_subset({"key1": "2020-02-01"}, results)) def test_subject_get(self) -> None: - p = payload.Payload({'name1': 'val1'}) + p = payload.Payload({"name1": "val1"}) self.assertDictEqual(p.get(), p.nv_pairs) diff --git a/snowplow_tracker/test/unit/test_structured_event.py b/snowplow_tracker/test/unit/test_structured_event.py new file mode 100644 index 00000000..fdf00014 --- /dev/null +++ b/snowplow_tracker/test/unit/test_structured_event.py @@ -0,0 +1,24 @@ +from snowplow_tracker.events.structured_event import StructuredEvent + + +class TestStructuredEvent: + def test_getters(self): + se = StructuredEvent("category", "action", "label", "property", 1) + assert se.category == "category" + assert se.action == "action" + assert se.label == "label" + assert se.property_ == "property" + assert se.value == 1 + + def test_setters(self): + se = StructuredEvent("category", "action") + se.category = "new_category" + se.action = "new_action" + se.label = "new_label" + se.property_ = "new_property" + se.value = 2 + assert se.category == "new_category" + assert se.action == "new_action" + assert se.label == "new_label" + assert se.property_ == "new_property" + assert se.value == 2 diff --git a/snowplow_tracker/test/unit/test_subject.py b/snowplow_tracker/test/unit/test_subject.py index 882cdb2f..953a0a74 100644 --- a/snowplow_tracker/test/unit/test_subject.py +++ b/snowplow_tracker/test/unit/test_subject.py @@ -1,7 +1,7 @@ # """ # test_subject.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest @@ -26,7 +22,6 @@ class TestSubject(unittest.TestCase): - def setUp(self) -> None: pass @@ -42,6 +37,8 @@ def test_subject_0(self) -> None: s.set_timezone("PST") s.set_lang("EN") s.set_domain_user_id("domain-user-id") + s.set_domain_session_id("domain-session-id") + s.set_domain_session_index(1) s.set_ip_address("127.0.0.1") s.set_useragent("useragent-string") s.set_network_user_id("network-user-id") @@ -57,18 +54,16 @@ def test_subject_0(self) -> None: "ip": "127.0.0.1", "ua": "useragent-string", "duid": "domain-user-id", - "tnuid": "network-user-id" + "sid": "domain-session-id", + "vid": 1, + "tnuid": "network-user-id", } self.assertDictEqual(s.standard_nv_pairs, exp) def test_subject_1(self) -> None: s = _subject.Subject().set_platform("srv").set_user_id("1234").set_lang("EN") - exp = { - "p": "srv", - "uid": "1234", - "lang": "EN" - } + exp = {"p": "srv", "uid": "1234", "lang": "EN"} self.assertDictEqual(s.standard_nv_pairs, exp) with pytest.raises(KeyError): @@ -85,5 +80,37 @@ def test_subject_1(self) -> None: s.standard_nv_pairs["ua"] with pytest.raises(KeyError): s.standard_nv_pairs["duid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["sid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["vid"] with pytest.raises(KeyError): s.standard_nv_pairs["tnuid"] + + def test_combine_subject(self) -> None: + s = _subject.Subject() + s.set_color_depth(10) + s.set_domain_session_id("domain_session_id") + + s2 = _subject.Subject() + s2.set_domain_user_id("domain_user_id") + s2.set_lang("en") + + fin_payload_dict = s.combine_subject(s2) + + expected_fin_payload_dict = { + "p": "pc", + "cd": 10, + "sid": "domain_session_id", + "duid": "domain_user_id", + "lang": "en", + } + + expected_subject = { + "p": "pc", + "cd": 10, + "sid": "domain_session_id", + } + + self.assertDictEqual(fin_payload_dict, expected_fin_payload_dict) + self.assertDictEqual(s.standard_nv_pairs, expected_subject) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 34964e0a..3009790a 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -1,7 +1,7 @@ # """ # test_tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import re @@ -25,7 +21,7 @@ import unittest.mock as mock from freezegun import freeze_time -from typing import Any +from typing import Any, Optional from snowplow_tracker.contracts import disable_contracts, enable_contracts from snowplow_tracker.tracker import Tracker @@ -33,6 +29,7 @@ from snowplow_tracker.subject import Subject from snowplow_tracker.payload import Payload from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.events import Event, SelfDescribing, ScreenView UNSTRUCT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0" CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" @@ -44,6 +41,9 @@ FORM_CHANGE_SCHEMA = "iglu:com.snowplowanalytics.snowplow/change_form/jsonschema/1-0-0" FORM_SUBMIT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/submit_form/jsonschema/1-0-0" SITE_SEARCH_SCHEMA = "iglu:com.snowplowanalytics.snowplow/site_search/jsonschema/1-0-0" +MOBILE_SCREEN_VIEW_SCHEMA = ( + "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0" +) SCREEN_VIEW_SCHEMA = "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0" # helpers @@ -58,11 +58,21 @@ def mocked_uuid() -> str: return _TEST_UUID -def mocked_track(pb: Any) -> None: +def mocked_track( + event: Any, + context: Optional[Any] = None, + tstamp: Optional[Any] = None, + event_subject: Optional[Any] = None, +) -> None: pass -def mocked_complete_payload(*args: Any, **kwargs: Any) -> None: +def mocked_complete_payload( + event: Any, + event_subject: Optional[Any], + context: Optional[Any], + tstamp: Optional[Any], +) -> Payload: pass @@ -97,7 +107,7 @@ def test_initialisation(self) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - t = Tracker([e], namespace="cloudfront", encode_base64=False, app_id="AF003") + t = Tracker("cloudfront", [e], encode_base64=False, app_id="AF003") self.assertEqual(t.standard_nv_pairs["tna"], "cloudfront") self.assertEqual(t.standard_nv_pairs["aid"], "AF003") self.assertEqual(t.encode_base64, False) @@ -106,9 +116,8 @@ def test_initialisation_default_optional(self) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - t = Tracker(e) + t = Tracker("namespace", e) self.assertEqual(t.emitters, [e]) - self.assertTrue(t.standard_nv_pairs["tna"] is None) self.assertTrue(t.standard_nv_pairs["aid"] is None) self.assertEqual(t.encode_base64, True) @@ -117,19 +126,19 @@ def test_initialisation_emitter_list(self) -> None: e1 = mokEmitter() e2 = mokEmitter() - t = Tracker([e1, e2]) + t = Tracker("namespace", [e1, e2]) self.assertEqual(t.emitters, [e1, e2]) def test_initialisation_error(self) -> None: with self.assertRaises(ValueError): - Tracker([]) + Tracker("namespace", []) def test_initialization_with_subject(self) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() s = Subject() - t = Tracker(e, subject=s) + t = Tracker("namespace", e, subject=s) self.assertIs(t.subject, s) def test_get_uuid(self) -> None: @@ -159,12 +168,12 @@ def test_get_timestamp_3(self) -> None: self.assertEqual(tstamp, 1000) # 1970-01-01 00:00:01 in ms @mock.patch("snowplow_tracker.Tracker.track") - def test_alias_of_track_unstruct_event(self, mok_track: Any) -> None: + def test_alias_of_track_self_describing_event(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) evJson = SelfDescribingJson("test.schema", {"n": "v"}) # call the alias t.track_self_describing_event(evJson) @@ -175,7 +184,7 @@ def test_flush(self) -> None: e1 = mokEmitter() e2 = mokEmitter() - t = Tracker([e1, e2]) + t = Tracker("namespace", [e1, e2]) t.flush() e1.flush.assert_not_called() self.assertEqual(e1.sync_flush.call_count, 1) @@ -187,7 +196,7 @@ def test_flush_async(self) -> None: e1 = mokEmitter() e2 = mokEmitter() - t = Tracker([e1, e2]) + t = Tracker("namespace", [e1, e2]) t.flush(is_async=True) self.assertEqual(e1.flush.call_count, 1) e1.sync_flush.assert_not_called() @@ -198,7 +207,7 @@ def test_set_subject(self) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - t = Tracker(e) + t = Tracker("namespace", e) new_subject = Subject() self.assertIsNot(t.subject, new_subject) t.set_subject(new_subject) @@ -209,7 +218,7 @@ def test_add_emitter(self) -> None: e1 = mokEmitter() e2 = mokEmitter() - t = Tracker(e1) + t = Tracker("namespace", e1) t.add_emitter(e2) self.assertEqual(t.emitters, [e1, e2]) @@ -223,151 +232,77 @@ def test_track(self) -> None: e2 = mokEmitter() e3 = mokEmitter() - t = Tracker([e1, e2, e3]) + t = Tracker("namespace", [e1, e2, e3]) - p = Payload({"test": "track"}) - t.track(p) + mok_event = self.create_patch("snowplow_tracker.events.Event") + t.track(mok_event) + mok_payload = mok_event.build_payload().nv_pairs - e1.input.assert_called_once_with({"test": "track"}) - e2.input.assert_called_once_with({"test": "track"}) - e3.input.assert_called_once_with({"test": "track"}) + e1.input.assert_called_once_with(mok_payload) + e2.input.assert_called_once_with(mok_payload) + e3.input.assert_called_once_with(mok_payload) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload(self, mok_uuid: Any, mok_track: Any) -> None: - mokEmitter = self.create_patch("snowplow_tracker.Emitter") - e = mokEmitter() - + def test_complete_payload(self, mok_uuid: Any) -> None: mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - - t = Tracker(e) - p = Payload() - t.complete_payload(p, None, None, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs - - expected = { - "eid": _TEST_UUID, - "dtm": 1618790401000, - "tv": TRACKER_VERSION, - "p": "pc", - } - self.assertDictEqual(passed_nv_pairs, expected) - @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") - @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_tstamp_int(self, mok_uuid: Any, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - - t = Tracker(e) - p = Payload() - time_in_millis = 100010001000 - t.complete_payload(p, None, time_in_millis, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs - - expected = { - "eid": _TEST_UUID, - "dtm": 1618790401000, - "ttm": time_in_millis, - "tv": TRACKER_VERSION, - "p": "pc", - } - self.assertDictEqual(passed_nv_pairs, expected) - - @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") - @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_tstamp_dtm(self, mok_uuid: Any, mok_track: Any) -> None: - mokEmitter = self.create_patch("snowplow_tracker.Emitter") - e = mokEmitter() - - mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - - t = Tracker(e) - p = Payload() - time_in_millis = 100010001000 - t.complete_payload(p, None, time_in_millis, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + t = Tracker("namespace", e) + s = Subject() + event = Event(event_subject=s) + payload = t.complete_payload(event).nv_pairs expected = { "eid": _TEST_UUID, "dtm": 1618790401000, - "ttm": time_in_millis, "tv": TRACKER_VERSION, "p": "pc", + "tna": "namespace", } - self.assertDictEqual(passed_nv_pairs, expected) + self.assertDictEqual(payload, expected) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_tstamp_ttm(self, mok_uuid: Any, mok_track: Any) -> None: + def test_complete_payload_tstamp(self, mok_uuid: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - - t = Tracker(e) - p = Payload() + t = Tracker("namespace", e) + s = Subject() time_in_millis = 100010001000 - t.complete_payload(p, None, time_in_millis, None) + event = Event(true_timestamp=time_in_millis, event_subject=s) - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + payload = t.complete_payload(event=event).nv_pairs expected = { + "tna": "namespace", "eid": _TEST_UUID, "dtm": 1618790401000, "ttm": time_in_millis, "tv": TRACKER_VERSION, "p": "pc", } - self.assertDictEqual(passed_nv_pairs, expected) + self.assertDictEqual(payload, expected) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_co(self, mok_uuid: Any, mok_track: Any) -> None: + def test_complete_payload_co(self, mok_uuid: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - t = Tracker(e, encode_base64=False) - p = Payload() + t = Tracker("namespace", e, encode_base64=False) geo_ctx = SelfDescribingJson(geoSchema, geoData) mov_ctx = SelfDescribingJson(movSchema, movData) ctx_array = [geo_ctx, mov_ctx] - t.complete_payload(p, ctx_array, None, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + event = Event(context=ctx_array) + payload = t.complete_payload(event=event).nv_pairs expected_co = { "schema": CONTEXT_SCHEMA, @@ -376,57 +311,42 @@ def test_complete_payload_co(self, mok_uuid: Any, mok_track: Any) -> None: {"schema": movSchema, "data": movData}, ], } - self.assertIn("co", passed_nv_pairs) - self.assertDictEqual(json.loads(passed_nv_pairs["co"]), expected_co) + self.assertIn("co", payload) + self.assertDictEqual(json.loads(payload["co"]), expected_co) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_cx(self, mok_uuid: Any, mok_track: Any) -> None: + def test_complete_payload_cx(self, mok_uuid: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - t = Tracker(e, encode_base64=True) - p = Payload() + t = Tracker("namespace", e, encode_base64=True) geo_ctx = SelfDescribingJson(geoSchema, geoData) mov_ctx = SelfDescribingJson(movSchema, movData) ctx_array = [geo_ctx, mov_ctx] - t.complete_payload(p, ctx_array, None, None) - - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + event = Event(context=ctx_array) + payload = t.complete_payload(event=event).nv_pairs - self.assertIn("cx", passed_nv_pairs) + self.assertIn("cx", payload) @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 - @mock.patch("snowplow_tracker.Tracker.track") @mock.patch("snowplow_tracker.Tracker.get_uuid") - def test_complete_payload_event_subject( - self, mok_uuid: Any, mok_track: Any - ) -> None: + def test_complete_payload_event_subject(self, mok_uuid: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_uuid.side_effect = mocked_uuid - mok_track.side_effect = mocked_track - - t = Tracker(e) - p = Payload() - evSubject = Subject().set_lang("EN").set_user_id("tester") - t.complete_payload(p, None, None, evSubject) - self.assertEqual(mok_track.call_count, 1) - trackArgsTuple = mok_track.call_args_list[0][0] - self.assertEqual(len(trackArgsTuple), 1) - passed_nv_pairs = trackArgsTuple[0].nv_pairs + t = Tracker("namespace", e) + event_subject = Subject().set_lang("EN").set_user_id("tester") + event = Event(event_subject=event_subject) + payload = t.complete_payload(event=event).nv_pairs expected = { + "tna": "namespace", "eid": _TEST_UUID, "dtm": 1618790401000, "tv": TRACKER_VERSION, @@ -434,70 +354,70 @@ def test_complete_payload_event_subject( "lang": "EN", "uid": "tester", } - self.assertDictEqual(passed_nv_pairs, expected) + self.assertDictEqual(payload, expected) ### # test track_x methods ### - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track - t = Tracker(e, encode_base64=False) - evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) - t.track_unstruct_event(evJson) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + t = Tracker("namespace", e, encode_base64=False) + event_json = SelfDescribingJson("test.sde.schema", {"n": "v"}) + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json) + self.assertEqual(mok_track.call_count, 1) + + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) # payload - actualPayloadArg = completeArgsList[0] - actualPairs = actualPayloadArg.nv_pairs - actualUePr = json.loads(actualPairs["ue_pr"]) - # context - actualContextArg = completeArgsList[1] - # tstamp - actualTstampArg = completeArgsList[2] + actual_ue_pr = json.loads(actual_pairs["ue_pr"]) expectedUePr = { "data": {"data": {"n": "v"}, "schema": "test.sde.schema"}, "schema": UNSTRUCT_SCHEMA, } - self.assertDictEqual(actualUePr, expectedUePr) - self.assertEqual(actualPairs["e"], "ue") - self.assertTrue(actualContextArg is None) - self.assertTrue(actualTstampArg is None) + self.assertDictEqual(actual_ue_pr, expectedUePr) + self.assertEqual(actual_pairs["e"], "ue") - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event_all_args(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event_all_args(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track - t = Tracker(e, encode_base64=False) - evJson = SelfDescribingJson("test.schema", {"n": "v"}) + t = Tracker("namespace", e, encode_base64=False) + event_json = SelfDescribingJson("test.schema", {"n": "v"}) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evContext = [ctx] - evTstamp = 1399021242030 - t.track_unstruct_event(evJson, evContext, evTstamp) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + event_context = [ctx] + event_tstamp = 1399021242030 + + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json, event_context, event_tstamp) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) # payload - actualPayloadArg = completeArgsList[0] - actualPairs = actualPayloadArg.nv_pairs - actualUePr = json.loads(actualPairs["ue_pr"]) - # context - actualContextArg = completeArgsList[1] - # tstamp - actualTstampArg = completeArgsList[2] + actualUePr = json.loads(actual_pairs["ue_pr"]) expectedUePr = { "data": {"data": {"n": "v"}, "schema": "test.schema"}, @@ -505,38 +425,40 @@ def test_track_unstruct_event_all_args(self, mok_complete_payload: Any) -> None: } self.assertDictEqual(actualUePr, expectedUePr) - self.assertEqual(actualPairs["e"], "ue") - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) + self.assertEqual(actual_pairs["e"], "ue") - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event_encode(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event_encode(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track - t = Tracker(e, encode_base64=True) - evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) - t.track_unstruct_event(evJson) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + t = Tracker("namespace", e, encode_base64=True) + event_json = SelfDescribingJson("test.sde.schema", {"n": "v"}) - actualPayloadArg = completeArgsList[0] - actualPairs = actualPayloadArg.nv_pairs - self.assertTrue("ue_px" in actualPairs.keys()) + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_struct_event(self, mok_complete_payload: Any) -> None: + t.track_self_describing_event(event_json) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + self.assertTrue("ue_px" in actual_pairs.keys()) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_struct_event(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 + ev_tstamp = 1399021242030 t.track_struct_event( "Mixes", "Play", @@ -544,18 +466,16 @@ def test_track_struct_event(self, mok_complete_payload: Any) -> None: "TestProp", value=3.14, context=[ctx], - tstamp=evTstamp, + tstamp=ev_tstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] - actualPairs = actualPayloadArg.nv_pairs + actual_payload_arg = complete_args_dict["event"].payload + actual_pairs = actual_payload_arg.nv_pairs - expectedPairs = { + expected_pairs = { "e": "se", "se_ca": "Mixes", "se_ac": "Play", @@ -563,31 +483,31 @@ def test_track_struct_event(self, mok_complete_payload: Any) -> None: "se_pr": "TestProp", "se_va": 3.14, } - self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) + self.assertDictEqual(actual_pairs, expected_pairs) - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_page_view(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_page_view(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 + ev_tstamp = 1399021242030 t.track_page_view( - "example.com", "Example", "docs.snowplow.io", context=[ctx], tstamp=evTstamp + "example.com", + "Example", + "docs.snowplow.io", + context=[ctx], + tstamp=ev_tstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] - actualPairs = actualPayloadArg.nv_pairs + actual_payload_arg = complete_args_dict["event"].payload + actualPairs = actual_payload_arg.nv_pairs expectedPairs = { "e": "pv", @@ -596,19 +516,17 @@ def test_track_page_view(self, mok_complete_payload: Any) -> None: "refr": "docs.snowplow.io", } self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_page_ping(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_page_ping(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 + ev_tstamp = 1399021242030 t.track_page_ping( "example.com", "Example", @@ -618,16 +536,14 @@ def test_track_page_ping(self, mok_complete_payload: Any) -> None: 2, 3, context=[ctx], - tstamp=evTstamp, + tstamp=ev_tstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] - actualPairs = actualPayloadArg.nv_pairs + actual_payload_arg = complete_args_dict["event"].payload + actual_pairs = actual_payload_arg.nv_pairs expectedPairs = { "e": "pp", @@ -639,39 +555,35 @@ def test_track_page_ping(self, mok_complete_payload: Any) -> None: "pp_miy": 2, "pp_may": 3, } - self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) + self.assertDictEqual(actual_pairs, expectedPairs) - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_ecommerce_transaction_item(self, mok_complete_payload: Any) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_item(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) - evTstamp = 1399021242030 + ev_tstamp = 1399021242030 t.track_ecommerce_transaction_item( - "1234", - "sku1234", - 3.14, - 1, - "itemName", - "itemCategory", - "itemCurrency", + order_id="1234", + sku="sku1234", + price=3.14, + quantity=1, + name="itemName", + category="itemCategory", + currency="itemCurrency", context=[ctx], - tstamp=evTstamp, + tstamp=ev_tstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) + self.assertEqual(mok_track.call_count, 1) + complete_args_list = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_list), 1) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] - actualPairs = actualPayloadArg.nv_pairs + actual_payload_arg = complete_args_list["event"].payload + actual_pairs = actual_payload_arg.nv_pairs expectedPairs = { "e": "ti", @@ -683,20 +595,16 @@ def test_track_ecommerce_transaction_item(self, mok_complete_payload: Any) -> No "ti_qu": 1, "ti_cu": "itemCurrency", } - self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) + self.assertDictEqual(actual_pairs, expectedPairs) - @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_ecommerce_transaction_no_items( - self, mok_complete_payload: Any - ) -> None: + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_no_items(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 t.track_ecommerce_transaction( @@ -712,12 +620,11 @@ def test_track_ecommerce_transaction_no_items( context=[ctx], tstamp=evTstamp, ) - self.assertEqual(mok_complete_payload.call_count, 1) - completeArgsList = mok_complete_payload.call_args_list[0][0] - self.assertEqual(len(completeArgsList), 4) - actualPayloadArg = completeArgsList[0] - actualContextArg = completeArgsList[1] - actualTstampArg = completeArgsList[2] + self.assertEqual(mok_track.call_count, 1) + completeArgsList = mok_track.call_args_list[0][1] + self.assertEqual(len(completeArgsList), 1) + + actualPayloadArg = completeArgsList["event"].payload actualPairs = actualPayloadArg.nv_pairs expectedPairs = { @@ -733,21 +640,19 @@ def test_track_ecommerce_transaction_no_items( "tr_cu": "transCurrency", } self.assertDictEqual(actualPairs, expectedPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) @mock.patch("snowplow_tracker.Tracker.track_ecommerce_transaction_item") - @mock.patch("snowplow_tracker.Tracker.complete_payload") + @mock.patch("snowplow_tracker.Tracker.track") def test_track_ecommerce_transaction_with_items( - self, mok_complete_payload: Any, mok_track_trans_item: Any + self, mok_track: Any, mok_track_trans_item: Any ) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() - mok_complete_payload.side_effect = mocked_complete_payload + mok_track.side_effect = mocked_track mok_track_trans_item.side_effect = mocked_track_trans_item - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 transItems = [ @@ -755,27 +660,26 @@ def test_track_ecommerce_transaction_with_items( {"sku": "sku5678", "quantity": 1, "price": 2.72}, ] t.track_ecommerce_transaction( - "1234", - 10, - "transAffiliation", - 2.5, - 1.5, - "transCity", - "transState", - "transCountry", - "transCurrency", + order_id="1234", + total_value=10, + affiliation="transAffiliation", + tax_value=2.5, + shipping=1.5, + city="transCity", + state="transState", + country="transCountry", + currency="transCurrency", items=transItems, context=[ctx], tstamp=evTstamp, ) # Transaction - callCompleteArgsList = mok_complete_payload.call_args_list - firstCallArgsList = callCompleteArgsList[0][0] - self.assertEqual(len(firstCallArgsList), 4) - actualPayloadArg = firstCallArgsList[0] - actualContextArg = firstCallArgsList[1] - actualTstampArg = firstCallArgsList[2] + callCompleteArgsList = mok_track.call_args_list + firstCallArgsList = callCompleteArgsList[0][1] + self.assertEqual(len(firstCallArgsList), 1) + + actualPayloadArg = firstCallArgsList["event"].payload actualPairs = actualPayloadArg.nv_pairs expectedTransPairs = { @@ -791,8 +695,6 @@ def test_track_ecommerce_transaction_with_items( "tr_cu": "transCurrency", } self.assertDictEqual(actualPairs, expectedTransPairs) - self.assertIs(actualContextArg[0], ctx) - self.assertEqual(actualTstampArg, evTstamp) # Items calls_to_track_trans_item = mok_track_trans_item.call_count @@ -804,14 +706,16 @@ def test_track_ecommerce_transaction_with_items( firstItemCallKwargs = callTrackItemsArgsList[0][1] expectedFirstItemPairs = { - "tstamp": evTstamp, - "order_id": "1234", - "currency": "transCurrency", "sku": "sku1234", "quantity": 3, "price": 3.14, + "order_id": "1234", + "currency": "transCurrency", + "tstamp": evTstamp, "event_subject": None, + "context": [ctx], } + self.assertDictEqual(firstItemCallKwargs, expectedFirstItemPairs) # 2nd item secItemCallArgs = callTrackItemsArgsList[1][0] @@ -819,24 +723,26 @@ def test_track_ecommerce_transaction_with_items( secItemCallKwargs = callTrackItemsArgsList[1][1] expectedSecItemPairs = { - "tstamp": evTstamp, - "order_id": "1234", - "currency": "transCurrency", "sku": "sku5678", "quantity": 1, "price": 2.72, + "order_id": "1234", + "currency": "transCurrency", + "tstamp": evTstamp, "event_subject": None, + "context": [ctx], } + self.assertDictEqual(secItemCallKwargs, expectedSecItemPairs) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_link_click(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -861,20 +767,20 @@ def test_track_link_click(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_link_click("example.com") @@ -885,20 +791,20 @@ def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -925,20 +831,20 @@ def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_add_to_cart("sku1234", 1) @@ -947,20 +853,20 @@ def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: "data": {"sku": "sku1234", "quantity": 1}, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -987,13 +893,13 @@ def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart_optional_none( self, mok_track_unstruct: Any ) -> None: @@ -1002,7 +908,7 @@ def test_track_remove_from_cart_optional_none( mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_remove_from_cart("sku1234", 1) @@ -1011,20 +917,20 @@ def test_track_remove_from_cart_optional_none( "data": {"sku": "sku1234", "quantity": 1}, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -1051,20 +957,20 @@ def test_track_form_change(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_form_change("testFormId", "testElemId", "INPUT", "testValue") expected = { @@ -1077,20 +983,20 @@ def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 elems = [ @@ -1119,13 +1025,13 @@ def test_track_form_submit(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_invalid_element_type( self, mok_track_unstruct: Any ) -> None: @@ -1134,7 +1040,7 @@ def test_track_form_submit_invalid_element_type( mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 elems = [ @@ -1155,7 +1061,7 @@ def test_track_form_submit_invalid_element_type( tstamp=evTstamp, ) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_invalid_element_type_disabled_contracts( self, mok_track_unstruct: Any ) -> None: @@ -1164,7 +1070,7 @@ def test_track_form_submit_invalid_element_type_disabled_contracts( mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 elems = [ @@ -1194,54 +1100,54 @@ def test_track_form_submit_invalid_element_type_disabled_contracts( }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_form_submit("testFormId") expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_form_submit("testFormId", elements=[]) expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -1259,20 +1165,21 @@ def test_track_site_search(self, mok_track_unstruct: Any) -> None: }, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) t.track_site_search(["track", "search"]) expected = { @@ -1280,20 +1187,51 @@ def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: "data": {"terms": ["track", "search"]}, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertTrue(callArgs[1] is None) - self.assertTrue(callArgs[2] is None) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_mobile_screen_view(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + + screen_view = ScreenView(name="screenName", id_="screenId") + actual_pairs = screen_view.build_payload( + encode_base64=False, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track(screen_view) + + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][0] + self.assertEqual(len(complete_args_dict), 1) + actual_ue_pr = json.loads(actual_pairs["ue_pr"]) + + expected = { + "schema": MOBILE_SCREEN_VIEW_SCHEMA, + "data": {"id": "screenId", "name": "screenName"}, + } + + complete_args_dict = mok_track.call_args_list[0][1] + complete_args_dict = mok_track.call_args_list[0][1] + self.assertDictEqual(actual_ue_pr["data"], expected) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_screen_view(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() mok_track_unstruct.side_effect = mocked_track_unstruct - t = Tracker(e) + t = Tracker("namespace", e) ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evTstamp = 1399021242030 @@ -1304,8 +1242,8 @@ def test_track_screen_view(self, mok_track_unstruct: Any) -> None: "data": {"name": "screenName", "id": "screenId"}, } - callArgs = mok_track_unstruct.call_args_list[0][0] + callArgs = mok_track_unstruct.call_args_list[0][1] self.assertEqual(len(callArgs), 4) - self.assertDictEqual(callArgs[0].to_json(), expected) - self.assertIs(callArgs[1][0], ctx) - self.assertEqual(callArgs[2], evTstamp) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index f693e41a..4dc489dc 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -1,7 +1,7 @@ # """ # tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,33 +13,40 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import time import uuid from typing import Any, Optional, Union, List, Dict, Sequence +from warnings import warn -from snowplow_tracker import payload, _version, SelfDescribingJson -from snowplow_tracker import subject as _subject +from snowplow_tracker import payload, SelfDescribingJson +from snowplow_tracker.subject import Subject from snowplow_tracker.contracts import non_empty_string, one_of, non_empty, form_element -from snowplow_tracker.typing import JsonEncoderFunction, EmitterProtocol,\ - FORM_NODE_NAMES, FORM_TYPES, FormNodeName, ElementClasses, FormClasses - -""" -Constants & config -""" - -VERSION = "py-%s" % _version.__version__ -DEFAULT_ENCODE_BASE64 = True -BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" -SCHEMA_TAG = "jsonschema" -CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) -UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) -ContextArray = List[SelfDescribingJson] +from snowplow_tracker.constants import ( + VERSION, + DEFAULT_ENCODE_BASE64, + BASE_SCHEMA_PATH, + SCHEMA_TAG, +) + +from snowplow_tracker.events import ( + Event, + PagePing, + PageView, + SelfDescribing, + StructuredEvent, + ScreenView, +) +from snowplow_tracker.typing import ( + JsonEncoderFunction, + EmitterProtocol, + FORM_NODE_NAMES, + FORM_TYPES, + FormNodeName, + ElementClasses, + FormClasses, +) """ Tracker class @@ -47,67 +54,69 @@ class Tracker: - def __init__( - self, - emitters: Union[List[EmitterProtocol], EmitterProtocol], - subject: Optional[_subject.Subject] = None, - namespace: Optional[str] = None, - app_id: Optional[str] = None, - encode_base64: bool = DEFAULT_ENCODE_BASE64, - json_encoder: Optional[JsonEncoderFunction] = None) -> None: - """ - :param emitters: Emitters to which events will be sent - :type emitters: list[>0](emitter) | emitter - :param subject: Subject to be tracked - :type subject: subject | None - :param namespace: Identifier for the Tracker instance - :type namespace: string_or_none - :param app_id: Application ID - :type app_id: string_or_none - :param encode_base64: Whether JSONs in the payload should be base-64 encoded - :type encode_base64: bool - :param json_encoder: Custom JSON serializer that gets called on non-serializable object - :type json_encoder: function | None + self, + namespace: str, + emitters: Union[List[EmitterProtocol], EmitterProtocol], + subject: Optional[Subject] = None, + app_id: Optional[str] = None, + encode_base64: bool = DEFAULT_ENCODE_BASE64, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + :param namespace: Identifier for the Tracker instance + :type namespace: string + :param emitters: Emitters to which events will be sent + :type emitters: list[>0](emitter) | emitter + :param subject: Subject to be tracked + :type subject: subject | None + :param app_id: Application ID + :type app_id: string_or_none + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ if subject is None: - subject = _subject.Subject() + subject = Subject() - if type(emitters) is list: + if isinstance(emitters, list): non_empty(emitters) self.emitters = emitters else: self.emitters = [emitters] - self.subject = subject + self.subject: Optional[Subject] = subject self.encode_base64 = encode_base64 self.json_encoder = json_encoder - self.standard_nv_pairs = { - "tv": VERSION, - "tna": namespace, - "aid": app_id - } + self.standard_nv_pairs = {"tv": VERSION, "tna": namespace, "aid": app_id} self.timer = None @staticmethod def get_uuid() -> str: """ - Set transaction ID for the payload once during the lifetime of the - event. + Set transaction ID for the payload once during the lifetime of the + event. - :rtype: string + :rtype: string """ return str(uuid.uuid4()) @staticmethod def get_timestamp(tstamp: Optional[float] = None) -> int: """ - :param tstamp: User-input timestamp or None - :type tstamp: int | float | None - :rtype: int + :param tstamp: User-input timestamp or None + :type tstamp: int | float | None + :rtype: int """ - if isinstance(tstamp, (int, float, )): + if isinstance( + tstamp, + ( + int, + float, + ), + ): return int(tstamp) return int(time.time() * 1000) @@ -115,170 +124,183 @@ def get_timestamp(tstamp: Optional[float] = None) -> int: Tracking methods """ - def track(self, pb: payload.Payload) -> 'Tracker': + def track( + self, + event: Event, + ) -> Optional[str]: """ - Send the payload to a emitter - - :param pb: Payload builder - :type pb: payload - :rtype: tracker + Send the event payload to a emitter. Returns the tracked event ID. + :param event: Event + :type event: events.Event + :rtype: String """ - for emitter in self.emitters: - emitter.input(pb.nv_pairs) - return self - def complete_payload( - self, - pb: payload.Payload, - context: Optional[List[SelfDescribingJson]], - tstamp: Optional[float], - event_subject: Optional[_subject.Subject]) -> 'Tracker': - """ - Called by all tracking events to add the standard name-value pairs - to the Payload object irrespective of the tracked event. + payload = self.complete_payload( + event=event, + ) - :param pb: Payload builder - :type pb: payload - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker - """ - pb.add("eid", Tracker.get_uuid()) + for emitter in self.emitters: + emitter.input(payload.nv_pairs) - pb.add("dtm", Tracker.get_timestamp()) - if tstamp is not None: - pb.add("ttm", Tracker.get_timestamp(tstamp)) + if "eid" in payload.nv_pairs.keys(): + return payload.nv_pairs["eid"] - if context is not None: - context_jsons = list(map(lambda c: c.to_json(), context)) - context_envelope = SelfDescribingJson(CONTEXT_SCHEMA, context_jsons).to_json() - pb.add_json(context_envelope, self.encode_base64, "cx", "co", self.json_encoder) + return None - pb.add_dict(self.standard_nv_pairs) + def complete_payload( + self, + event: Event, + ) -> payload.Payload: + payload = event.build_payload( + encode_base64=self.encode_base64, + json_encoder=self.json_encoder, + subject=self.subject, + ) - fin_subject = event_subject if event_subject is not None else self.subject - pb.add_dict(fin_subject.standard_nv_pairs) + payload.add("eid", Tracker.get_uuid()) + payload.add("dtm", Tracker.get_timestamp()) + payload.add_dict(self.standard_nv_pairs) - return self.track(pb) + return payload def track_page_view( - self, - page_url: str, - page_title: Optional[str] = None, - referrer: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker - """ - non_empty_string(page_url) - - pb = payload.Payload() - pb.add("e", "pv") # pv: page view - pb.add("url", page_url) - pb.add("page", page_title) - pb.add("refr", referrer) - - return self.complete_payload(pb, context, tstamp, event_subject) + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_page_view will be removed in future versions. Please use the new PageView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + + pv = PageView( + page_url=page_url, + page_title=page_title, + referrer=referrer, + event_subject=event_subject, + context=context, + true_timestamp=tstamp, + ) + + self.track(event=pv) + return self def track_page_ping( - self, - page_url: str, - page_title: Optional[str] = None, - referrer: Optional[str] = None, - min_x: Optional[int] = None, - max_x: Optional[int] = None, - min_y: Optional[int] = None, - max_y: Optional[int] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - :param min_x: Minimum page x offset seen in the last ping period - :type min_x: int | None - :param max_x: Maximum page x offset seen in the last ping period - :type max_x: int | None - :param min_y: Minimum page y offset seen in the last ping period - :type min_y: int | None - :param max_y: Maximum page y offset seen in the last ping period - :type max_y: int | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker - """ - non_empty_string(page_url) - - pb = payload.Payload() - pb.add("e", "pp") # pp: page ping - pb.add("url", page_url) - pb.add("page", page_title) - pb.add("refr", referrer) - pb.add("pp_mix", min_x) - pb.add("pp_max", max_x) - pb.add("pp_miy", min_y) - pb.add("pp_may", max_y) - - return self.complete_payload(pb, context, tstamp, event_subject) + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_page_ping will be removed in future versions. Please use the new PagePing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + + pp = PagePing( + page_url=page_url, + page_title=page_title, + referrer=referrer, + min_x=min_x, + max_x=max_x, + min_y=min_y, + max_y=max_y, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + + self.track(event=pp) + return self def track_link_click( - self, - target_url: str, - element_id: Optional[str] = None, - element_classes: Optional[ElementClasses] = None, - element_target: Optional[str] = None, - element_content: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param target_url: Target URL of the link - :type target_url: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\*) | None - :param element_target: ID attribute of the HTML element - :type element_target: string_or_none - :param element_content: The content of the HTML element - :type element_content: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + target_url: str, + element_id: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + element_target: Optional[str] = None, + element_content: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param target_url: Target URL of the link + :type target_url: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param element_target: ID attribute of the HTML element + :type element_target: string_or_none + :param element_content: The content of the HTML element + :type element_content: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_link_click will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(target_url) - properties = {} + properties: Dict[str, Union[str, ElementClasses]] = {} properties["targetUrl"] = target_url if element_id is not None: properties["elementId"] = element_id @@ -289,45 +311,59 @@ def track_link_click( if element_content is not None: properties["elementContent"] = element_content - event_json = SelfDescribingJson("%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self def track_add_to_cart( - self, - sku: str, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - unit_price: Optional[float] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param sku: Item SKU or ID - :type sku: non_empty_string - :param quantity: Number added to cart - :type quantity: int - :param name: Item's name - :type name: string_or_none - :param category: Item's category - :type category: string_or_none - :param unit_price: Item's price - :type unit_price: int | float | None - :param currency: Type of currency the price is in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_add_to_cart will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(sku) - properties = {} + properties: Union[Dict[str, Union[str, float, int]]] = {} properties["sku"] = sku properties["quantity"] = quantity if name is not None: @@ -339,45 +375,59 @@ def track_add_to_cart( if currency is not None: properties["currency"] = currency - event_json = SelfDescribingJson("%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self def track_remove_from_cart( - self, - sku: str, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - unit_price: Optional[float] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param sku: Item SKU or ID - :type sku: non_empty_string - :param quantity: Number added to cart - :type quantity: int - :param name: Item's name - :type name: string_or_none - :param category: Item's category - :type category: string_or_none - :param unit_price: Item's price - :type unit_price: int | float | None - :param currency: Type of currency the price is in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_remove_from_cart will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(sku) - properties = {} + properties: Dict[str, Union[str, float, int]] = {} properties["sku"] = sku properties["quantity"] = quantity if name is not None: @@ -389,48 +439,63 @@ def track_remove_from_cart( if currency is not None: properties["currency"] = currency - event_json = SelfDescribingJson("%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self def track_form_change( - self, - form_id: str, - element_id: Optional[str], - node_name: FormNodeName, - value: Optional[str], - type_: Optional[str] = None, - element_classes: Optional[ElementClasses] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param node_name: Type of input element - :type node_name: form_node_name - :param value: Value of the input element - :type value: string_or_none - :param type_: Type of data the element represents - :type type_: non_empty_string, form_type - :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\*) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + form_id: str, + element_id: Optional[str], + node_name: FormNodeName, + value: Optional[str], + type_: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param node_name: Type of input element + :type node_name: form_node_name + :param value: Value of the input element + :type value: string_or_none + :param type_: Type of data the element represents + :type type_: non_empty_string, form_type + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_form_change will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(form_id) one_of(node_name, FORM_NODE_NAMES) if type_ is not None: one_of(type_.lower(), FORM_TYPES) - properties = dict() + properties: Dict[str, Union[Optional[str], ElementClasses]] = dict() properties["formId"] = form_id properties["elementId"] = element_id properties["nodeName"] = node_name @@ -440,77 +505,110 @@ def track_form_change( if element_classes is not None: properties["elementClasses"] = element_classes - event_json = SelfDescribingJson("%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self def track_form_submit( - self, - form_id: str, - form_classes: Optional[FormClasses] = None, - elements: Optional[List[Dict[str, Any]]] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param form_classes: Classes of the HTML form - :type form_classes: list(str) | tuple(str,\*) | None - :param elements: Classes of the HTML form - :type elements: list(form_element) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + form_id: str, + form_classes: Optional[FormClasses] = None, + elements: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param form_classes: Classes of the HTML form + :type form_classes: list(str) | tuple(str,\\*) | None + :param elements: Classes of the HTML form + :type elements: list(form_element) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_form_submit will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(form_id) + for element in elements or []: form_element(element) - properties = dict() - properties['formId'] = form_id + properties: Dict[ + str, Union[str, ElementClasses, FormClasses, List[Dict[str, Any]]] + ] = dict() + properties["formId"] = form_id if form_classes is not None: - properties['formClasses'] = form_classes + properties["formClasses"] = form_classes if elements is not None and len(elements) > 0: - properties['elements'] = elements - - event_json = SelfDescribingJson("%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) - - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + properties["elements"] = elements + + event_json = SelfDescribingJson( + "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self def track_site_search( - self, - terms: Sequence[str], - filters: Optional[Dict[str, Union[str, bool]]] = None, - total_results: Optional[int] = None, - page_results: Optional[int] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param terms: Search terms - :type terms: seq[>=1](str) - :param filters: Filters applied to the search - :type filters: dict(str:str|bool) | None - :param total_results: Total number of results returned - :type total_results: int | None - :param page_results: Total number of pages of results - :type page_results: int | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + terms: Sequence[str], + filters: Optional[Dict[str, Union[str, bool]]] = None, + total_results: Optional[int] = None, + page_results: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param terms: Search terms + :type terms: seq[>=1](str) + :param filters: Filters applied to the search + :type filters: dict(str:str|bool) | None + :param total_results: Total number of results returned + :type total_results: int | None + :param page_results: Total number of pages of results + :type page_results: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_site_search will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) non_empty(terms) - properties = {} + properties: Dict[ + str, Union[Sequence[str], Dict[str, Union[str, bool]], int] + ] = {} properties["terms"] = terms if filters is not None: properties["filters"] = filters @@ -519,277 +617,435 @@ def track_site_search( if page_results is not None: properties["pageResults"] = page_results - event_json = SelfDescribingJson("%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self def track_ecommerce_transaction_item( - self, - order_id: str, - sku: str, - price: float, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - This is an internal method called by track_ecommerce_transaction. - It is not for public use. - - :param order_id: Order ID - :type order_id: non_empty_string - :param sku: Item SKU - :type sku: non_empty_string - :param price: Item price - :type price: int | float - :param quantity: Item quantity - :type quantity: int - :param name: Item name - :type name: string_or_none - :param category: Item category - :type category: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + order_id: str, + sku: str, + price: float, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + This is an internal method called by track_ecommerce_transaction. + It is not for public use. + + :param order_id: Order ID + :type order_id: non_empty_string + :param sku: Item SKU + :type sku: non_empty_string + :param price: Item price + :type price: int | float + :param quantity: Item quantity + :type quantity: int + :param name: Item name + :type name: string_or_none + :param category: Item category + :type category: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_ecommerce_transaction_item will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(order_id) non_empty_string(sku) - pb = payload.Payload() - pb.add("e", "ti") - pb.add("ti_id", order_id) - pb.add("ti_sk", sku) - pb.add("ti_nm", name) - pb.add("ti_ca", category) - pb.add("ti_pr", price) - pb.add("ti_qu", quantity) - pb.add("ti_cu", currency) - - return self.complete_payload(pb, context, tstamp, event_subject) + event = Event( + event_subject=event_subject, context=context, true_timestamp=tstamp + ) + event.payload.add("e", "ti") + event.payload.add("ti_id", order_id) + event.payload.add("ti_sk", sku) + event.payload.add("ti_nm", name) + event.payload.add("ti_ca", category) + event.payload.add("ti_pr", price) + event.payload.add("ti_qu", quantity) + event.payload.add("ti_cu", currency) + + self.track(event=event) + return self def track_ecommerce_transaction( - self, - order_id: str, - total_value: float, - affiliation: Optional[str] = None, - tax_value: Optional[float] = None, - shipping: Optional[float] = None, - city: Optional[str] = None, - state: Optional[str] = None, - country: Optional[str] = None, - currency: Optional[str] = None, - items: Optional[List[Dict[str, Any]]] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param order_id: ID of the eCommerce transaction - :type order_id: non_empty_string - :param total_value: Total transaction value - :type total_value: int | float - :param affiliation: Transaction affiliation - :type affiliation: string_or_none - :param tax_value: Transaction tax value - :type tax_value: int | float | None - :param shipping: Delivery cost charged - :type shipping: int | float | None - :param city: Delivery address city - :type city: string_or_none - :param state: Delivery address state - :type state: string_or_none - :param country: Delivery address country - :type country: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param items: The items in the transaction - :type items: list(dict(str:\*)) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + order_id: str, + total_value: float, + affiliation: Optional[str] = None, + tax_value: Optional[float] = None, + shipping: Optional[float] = None, + city: Optional[str] = None, + state: Optional[str] = None, + country: Optional[str] = None, + currency: Optional[str] = None, + items: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ + :param order_id: ID of the eCommerce transaction + :type order_id: non_empty_string + :param total_value: Total transaction value + :type total_value: int | float + :param affiliation: Transaction affiliation + :type affiliation: string_or_none + :param tax_value: Transaction tax value + :type tax_value: int | float | None + :param shipping: Delivery cost charged + :type shipping: int | float | None + :param city: Delivery address city + :type city: string_or_none + :param state: Delivery address state + :type state: string_or_none + :param country: Delivery address country + :type country: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param items: The items in the transaction + :type items: list(dict(str:\\*)) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_ecommerce_transaction will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) non_empty_string(order_id) - pb = payload.Payload() - pb.add("e", "tr") - pb.add("tr_id", order_id) - pb.add("tr_tt", total_value) - pb.add("tr_af", affiliation) - pb.add("tr_tx", tax_value) - pb.add("tr_sh", shipping) - pb.add("tr_ci", city) - pb.add("tr_st", state) - pb.add("tr_co", country) - pb.add("tr_cu", currency) + event = Event( + event_subject=event_subject, context=context, true_timestamp=tstamp + ) + event.payload.add("e", "tr") + event.payload.add("tr_id", order_id) + event.payload.add("tr_tt", total_value) + event.payload.add("tr_af", affiliation) + event.payload.add("tr_tx", tax_value) + event.payload.add("tr_sh", shipping) + event.payload.add("tr_ci", city) + event.payload.add("tr_st", state) + event.payload.add("tr_co", country) + event.payload.add("tr_cu", currency) tstamp = Tracker.get_timestamp(tstamp) - self.complete_payload(pb, context, tstamp, event_subject) + self.track(event=event) if items is None: items = [] for item in items: - item["tstamp"] = tstamp - item["event_subject"] = event_subject item["order_id"] = order_id item["currency"] = currency + item["tstamp"] = tstamp + item["event_subject"] = event_subject + item["context"] = context self.track_ecommerce_transaction_item(**item) return self def track_screen_view( - self, - name: Optional[str] = None, - id_: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param name: The name of the screen view event - :type name: string_or_none - :param id_: Screen view ID - :type id_: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + name: Optional[str] = None, + id_: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param name: The name of the screen view event + :type name: string_or_none + :param id_: Screen view ID + :type id_: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_screen_view will be removed in future versions. Please use the new ScreenView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) screen_view_properties = {} if name is not None: screen_view_properties["name"] = name if id_ is not None: screen_view_properties["id"] = id_ - event_json = SelfDescribingJson("%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), screen_view_properties) + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), + screen_view_properties, + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + def track_mobile_screen_view( + self, + name: str, + id_: Optional[str] = None, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param name: The name of the screen view event + :type name: string_or_none + :param id_: Screen view ID. This must be of type UUID. + :type id_: string | None + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_mobile_screen_view will be removed in future versions. Please use the new ScreenView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + if id_ is None: + id_ = self.get_uuid() + + sv = ScreenView( + name=name, + id_=id_, + type=type, + previous_name=previous_name, + previous_id=previous_id, + previous_type=previous_type, + transition_type=transition_type, + event_subject=event_subject, + context=context, + true_timestamp=tstamp, + ) + + self.track(event=sv) + return self def track_struct_event( - self, - category: str, - action: str, - label: Optional[str] = None, - property_: Optional[str] = None, - value: Optional[float] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param category: Category of the event - :type category: non_empty_string - :param action: The event itself - :type action: non_empty_string - :param label: Refer to the object the action is - performed on - :type label: string_or_none - :param property_: Property associated with either the action - or the object - :type property_: string_or_none - :param value: A value associated with the user action - :type value: int | float | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker - """ - non_empty_string(category) - non_empty_string(action) - - pb = payload.Payload() - pb.add("e", "se") - pb.add("se_ca", category) - pb.add("se_ac", action) - pb.add("se_la", label) - pb.add("se_pr", property_) - pb.add("se_va", value) - - return self.complete_payload(pb, context, tstamp, event_subject) - - def track_unstruct_event( - self, - event_json: SelfDescribingJson, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[Union[int, float]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param event_json: The properties of the event. Has two field: - A "data" field containing the event properties and - A "schema" field identifying the schema against which the data is validated - :type event_json: self_describing_json - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_struct_event will be removed in future versions. Please use the new Structured class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + se = StructuredEvent( + category=category, + action=action, + label=label, + property_=property_, + value=value, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + + self.track( + event=se, + ) + return self - envelope = SelfDescribingJson(UNSTRUCT_EVENT_SCHEMA, event_json.to_json()).to_json() - - pb = payload.Payload() - - pb.add("e", "ue") - pb.add_json(envelope, self.encode_base64, "ue_px", "ue_pr", self.json_encoder) - - return self.complete_payload(pb, context, tstamp, event_subject) + def track_self_describing_event( + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_self_describing_event will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + + sd = SelfDescribing( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + self.track( + event=sd, + ) + return self # Alias - track_self_describing_event = track_unstruct_event + def track_unstruct_event( + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.", + DeprecationWarning, + stacklevel=2, + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self - def flush(self, is_async: bool = False) -> 'Tracker': + def flush(self, is_async: bool = False) -> "Tracker": """ - Flush the emitter + Flush the emitter - :param is_async: Whether the flush is done asynchronously. Default is False - :type is_async: bool - :rtype: tracker + :param is_async: Whether the flush is done asynchronously. Default is False + :type is_async: bool + :rtype: tracker """ for emitter in self.emitters: if is_async: - if hasattr(emitter, 'flush'): + if hasattr(emitter, "flush"): emitter.flush() else: - if hasattr(emitter, 'sync_flush'): + if hasattr(emitter, "sync_flush"): emitter.sync_flush() return self - def set_subject(self, subject: Optional[_subject.Subject]) -> 'Tracker': + def set_subject(self, subject: Optional[Subject]) -> "Tracker": """ - Set the subject of the events fired by the tracker + Set the subject of the events fired by the tracker - :param subject: Subject to be tracked - :type subject: subject | None - :rtype: tracker + :param subject: Subject to be tracked + :type subject: subject | None + :rtype: tracker """ self.subject = subject return self - def add_emitter(self, emitter: EmitterProtocol) -> 'Tracker': + def add_emitter(self, emitter: EmitterProtocol) -> "Tracker": """ - Add a new emitter to which events should be passed + Add a new emitter to which events should be passed - :param emitter: New emitter - :type emitter: emitter - :rtype: tracker + :param emitter: New emitter + :type emitter: emitter + :rtype: tracker """ self.emitters.append(emitter) return self + + def get_namespace(self) -> str: + # As app_id is added to the standard_nv_pairs dict above with a type of Optional[str], the type for + # the whole standard_nv_pairs dict is inferred to be dict[str, Optional[str]]. + # But, we know that "tna" should always be present in the dict, since namespace is a required argument. + # + # This ignores MyPy saying Incompatible return value type (got "str | None", expected "str") + return self.standard_nv_pairs["tna"] # type: ignore diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py new file mode 100644 index 00000000..6a574dc2 --- /dev/null +++ b/snowplow_tracker/tracker_configuration.py @@ -0,0 +1,60 @@ +# """ +# tracker_configuration.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional +from snowplow_tracker.typing import JsonEncoderFunction + + +class TrackerConfiguration(object): + def __init__( + self, + encode_base64: bool = True, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + Configuration for additional tracker configuration options. + :param encode_base64: Whether JSONs in the payload should be base-64 encoded. Default is True. + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object. + :type json_encoder: function | None + """ + + self.encode_base64 = encode_base64 + self.json_encoder = json_encoder + + @property + def encode_base64(self) -> bool: + """ + Whether JSONs in the payload should be base-64 encoded. Default is True. + """ + return self._encode_base64 + + @encode_base64.setter + def encode_base64(self, value: bool): + if isinstance(value, bool) or value is None: + self._encode_base64 = value + + @property + def json_encoder(self) -> Optional[JsonEncoderFunction]: + """ + Custom JSON serializer that gets called on non-serializable object. + """ + return self._json_encoder + + @json_encoder.setter + def json_encoder(self, value: Optional[JsonEncoderFunction]): + self._json_encoder = value diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py index 7800a657..3e973562 100644 --- a/snowplow_tracker/typing.py +++ b/snowplow_tracker/typing.py @@ -1,7 +1,7 @@ # """ # typing.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock, Matus Tomlein -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing import Dict, List, Callable, Any, Optional, Union, Tuple @@ -29,10 +25,29 @@ # tracker FORM_NODE_NAMES = {"INPUT", "TEXTAREA", "SELECT"} FORM_TYPES = { - "button", "checkbox", "color", "date", "datetime", - "datetime-local", "email", "file", "hidden", "image", "month", - "number", "password", "radio", "range", "reset", "search", - "submit", "tel", "text", "time", "url", "week" + "button", + "checkbox", + "color", + "date", + "datetime", + "datetime-local", + "email", + "file", + "hidden", + "image", + "month", + "number", + "password", + "radio", + "range", + "reset", + "search", + "submit", + "tel", + "text", + "time", + "url", + "week", } FormNodeName = Literal["INPUT", "TEXTAREA", "SELECT"] ElementClasses = Union[List[str], Tuple[str, Any]] @@ -50,13 +65,10 @@ class EmitterProtocol(Protocol): - def input(self, payload: PayloadDict) -> None: - ... + def input(self, payload: PayloadDict) -> None: ... + def flush(self) -> None: ... -class RedisProtocol(Protocol): - def rpush(self, name: Any, *values: Any) -> int: - ... + def async_flush(self) -> None: ... - def lpop(self, name: Any, count: Optional[int] = ...) -> Any: - ... + def sync_flush(self) -> None: ...