diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..4edd7b1a --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +relative_files = True diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..8b8914db --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,27 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior or code snippets that produce the issue. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Environment (please complete the following information):** + - OS: [e.g. Ubuntu 20.04] + - Version [e.g. 3.8] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..11fc491e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 00000000..2a0fcff1 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,113 @@ +name: Deploy + +on: + push: + tags: + - '*.*.*' + +jobs: + version_check: + runs-on: ubuntu-latest + outputs: + v_tracker: ${{ steps.version.outputs.PYTHON_TRACKER_VERSION}} + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + + - name: Get tag and tracker versions + id: version + run: | + echo "TAG_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT + echo "PYTHON_TRACKER_VERSION=$(python setup.py --version)" >> $GITHUB_OUTPUT + + - name: Fail if version mismatch + if: ${{ steps.version.outputs.TAG_VERSION != steps.version.outputs.PYTHON_TRACKER_VERSION }} + run: | + echo "Tag version (${{ steps.version.outputs.TAG_VERSION }}) doesn't match version in project (${{ steps.version.outputs.PYTHON_TRACKER_VERSION }})" + exit 1 + + build: + needs: ["version_check"] + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: [3.8] + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Pin pip version + run: | + echo "pip_v=pip" >> $GITHUB_ENV + + - name: Build + run: | + python -m pip install --upgrade "${{ env.pip_v }}" setuptools wheel + python setup.py sdist bdist_wheel + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: distfiles_${{ github.run_id }} + path: dist + + publish: + needs: ["build"] + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: distfiles_${{ github.run_id }} + path: ${{ github.workspace }}/dist + + - name: Twine check + run: | + python -m pip install --upgrade pip twine + twine check ${{ github.workspace }}/dist/* + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@v1.4.2 + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} + packages_dir: ${{ github.workspace }}/dist/ + verbose: true + + release: + needs: ["publish", "version_check"] + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Release + uses: softprops/action-gh-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + name: Version ${{ needs.version_check.outputs.v_tracker }} + draft: false + prerelease: ${{ contains(needs.version_check.outputs.v_tracker, 'rc') }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..9c14d2fe --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,81 @@ +name: build + +on: + push: + branches: + - master + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: + - 3.8 + - 3.9 + - "3.10" + - "3.11" + - "3.12" + - "3.13" + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Pin pip version + run: | + echo "pip_v=pip" >> $GITHUB_ENV + + - name: Install dependencies + run: | + python -m pip install --upgrade "${{ env.pip_v }}" setuptools wheel + python -m pip install -e . + python -m pip install -r requirements-test.txt + + - name: Build + run: | + python setup.py sdist bdist_wheel + + - name: Tests + run: | + pytest --cov=snowplow_tracker --cov-report=xml + + - name: MyPy + run: | + python -m pip install -e .[typing] + mypy snowplow_tracker --exclude '/test' + + - name: Demo + run: | + cd examples + cd tracker_api_example + python app.py "localhost:9090" + + - name: Snowplow Demo + run: | + cd examples + cd snowplow_api_example + python snowplow_app.py "localhost:9090" + + - name: Coveralls + uses: AndreMiras/coveralls-python-action@develop + with: + parallel: true + + coveralls_finish: + needs: ["build"] + runs-on: ubuntu-latest + + steps: + - name: Coveralls finished + uses: AndreMiras/coveralls-python-action@develop + with: + parallel-finished: true diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 00000000..c50ac5e2 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,20 @@ +name: documentation + +on: + push: + branches: + - master +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ammaraskar/sphinx-action@master + with: + docs-folder: "docs/" + - name: Deploy to GitHub Pages + if: success() + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs/_build/html \ No newline at end of file diff --git a/.github/workflows/snyk.yml b/.github/workflows/snyk.yml new file mode 100644 index 00000000..b2e36c27 --- /dev/null +++ b/.github/workflows/snyk.yml @@ -0,0 +1,27 @@ +name: Snyk + +on: + push: + branches: [ master ] + +jobs: + security: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: snyk/actions/setup@master + + - name: Set up Python 3.8 + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install . + + - name: Run Snyk to check for vulnerabilities + run: snyk monitor --file=setup.py --project-name=snowplow-python-tracker + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} diff --git a/.gitignore b/.gitignore index 4900fee3..deb1bace 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,6 @@ nosetests.xml # Vagrant .vagrant VERSION + +#Docs +docs/_build \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7424bc4e..00000000 --- a/.travis.yml +++ /dev/null @@ -1,24 +0,0 @@ -language: python -services: -- redis-server -python: -- '2.7' -- '3.3' -- '3.4' -- '3.5' -install: -- pip install -r requirements-test.txt -- pip install release-manager -- pip install -e . -script: pytest --cov=snowplow_tracker -after_success: coveralls -deploy: - skip_cleanup: true - provider: script - script: ./.travis/deploy.py - on: - tags: true - python: '2.7' -env: - global: - secure: SnavmHqH0sB6xmyqSiN7HscdVpNr6pk+bWs/8Oin2FZ0Kp52V9vIQf1A9TjDQY67P3YgUsMFJKWmCIpbsWFUl65Nos2LXGK6oTe9mt7O5fsR6BsI1IiiZoC8wNZwmaUjqyJa3/Y5KUYvWXlTLYChbyiqxcTCtkBDnLaiFFDOEis= diff --git a/.travis/deploy.py b/.travis/deploy.py deleted file mode 100755 index 7efde113..00000000 --- a/.travis/deploy.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python - - -import os -from os.path import expanduser -import sys - -from release_manager import utils, logger - -from snowplow_tracker import _version - -# --- Constants - - -HOME = expanduser("~") -DEFAULT_SERVER = 'https://pypi.python.org/pypi' -DEFAULT_REPO = 'pypi' -PYPIRC_FILE = '%s/.pypirc' % HOME - -if 'TRAVIS_TAG' in os.environ: - TRAVIS_TAG = os.environ.get('TRAVIS_TAG') -else: - sys.exit("Environment variable TRAVIS_TAG is unavailable") - -if 'TRAVIS_BUILD_DIR' in os.environ: - TRAVIS_BUILD_DIR = os.environ.get('TRAVIS_BUILD_DIR') -else: - sys.exit("Environment variable TRAVIS_BUILD_DIR is unavailable") - -if 'PYPI_PASSWORD' in os.environ: - PYPI_PASSWORD = os.environ.get('PYPI_PASSWORD') -else: - sys.exit("Environment variable PYPI_PASSWORD is unavailable") - - -# --- Helpers - - -def check_version(): - """Fail deploy if tag version doesn't match version""" - logger.log_start("Checking versions") - if TRAVIS_TAG != _version.__build_version__: - sys.exit("Version extracted from project doesn't match the TRAVIS_TAG variable. TRAVIS_TAG: {}, __build_version__: {}!".format(TRAVIS_TAG, _version.__build_version__)) - else: - logger.log_info("Versions match!") - logger.log_done() - - -def write_config(): - """Writes an array of lines to the PyPi config file""" - logger.log_start("Writing ~/.pypirc file") - lines = [ - '[distutils]\n', - 'index-servers =\n', - ' %s\n' % DEFAULT_REPO, - '\n', - '[%s]\n' % DEFAULT_REPO, - 'repository=%s\n' % DEFAULT_SERVER, - 'username=snowplow\n', - 'password=%s\n' % PYPI_PASSWORD - ] - - with open(PYPIRC_FILE, 'w') as outfile: - for line in lines: - outfile.write(line) - logger.log_info("The ~/.pypirc file has been written!") - logger.log_done() - - -def deploy_to_pypi(): - """Deploys the release to PyPi""" - logger.log_start("Deploying to PyPi") - os.chdir(TRAVIS_BUILD_DIR) - utils.execute("python setup.py register -r pypi", shell=True) - utils.execute("python setup.py sdist upload -r pypi", shell=True) - logger.log_info("Module deployed to PyPi!") - logger.log_done() - - -# --- Main - - -if __name__ == "__main__": - logger.log_header("Deploying snowplow-python-tracker to PyPi") - check_version() - write_config() - deploy_to_pypi() - logger.log_footer("Deployed version %s to PyPi!" % TRAVIS_TAG) diff --git a/CHANGES.txt b/CHANGES.txt index fe6d5d23..6a56dedb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,140 @@ -Version 0.8.0 (2016-09-xx) +Version 1.1.0 (2025-02-20) +-------------------------- +Bump Ubuntu Version in GH Actions (#375) +Avoid installing types-requests at run-time (#370) (Thanks to @edgarrmondragon) + +Version 1.0.4 (2024-11-19) +-------------------------- +Test with Python 3.13 (#365) (Thanks to @edgarrmondragon) +Shorten automatic github release title (#352) + +Version 1.0.3 (2024-08-27) +-------------------------- +Fix docs action (close #367) +Update `on_success` docstring (close #358) +Add py.typed to package (close #360) (Thanks to @edgarrmondragon) +Update typing +Fix `PagePing`, `PageView`, and `StructuredEvent` property getters (close #361) + +Version 1.0.2 (2024-02-26) +-------------------------- +Add Python 3.12 to CI tests (#356) (Thanks to @edgarrmondragon) + +Version 1.0.1 (2023-07-12) +-------------------------- +Fix tstamp parameter in track_self_describing_event (#350) (Thanks to @andehen) + +Version 1.0.0 (2023-06-16) +-------------------------- +Remove Redis and Celery Emitters (#335) +Make tracker namespace mandatory (#337) +Track function to return event_id (#338) +Fix namespace assignment in Snowplow API (#341) +Refactor track_xxx() methods (#343) +Update payload builder to combine event subjects (#347) + +Version 0.15.0 (2023-04-19) +--------------------------- +Use Requests Session for sending eventss (#221) +Add Redis example app (#322) + +Version 0.14.0 (2023-03-21) +--------------------------- +Adds deprecation warnings for V1 changes (#315) +Update GH actions to use Node16 (#317) +Adds event store parameter to Snowplow interface (#320) +Adds missing parameters to async emitter (#323) + +Version 0.13.0 (2023-01-24) +--------------------------- +Adds Snowplow Interface (#295) +Adds retry for failed events (#296) +Adds customisable retry codes (#297) +Adds EventStore with max limit (#309) +Adds Snowplow Example App (#302) +Fix Collector URL with trailing '/' (#300) +Rename unstruct_event to self_describing_event (#298) +Upgrade `set-output` in cd (#294) + +Version 0.12.0 (2022-11-03) +--------------------------- +Adds Domain Session ID and Domain Session Index to Subject class (#282) (Thanks to @cpnat) +Add support for Python 3.11 (#286) +Change default protocol to HTTPS in the Emitter (#14) +Change default method to POST in the Emitter (#289) +Update Docker base image (#283) (Thanks to @cpnat) + +Version 0.11.0 (2022-10-06) +--------------------------- +Update README file (#264) +Update CONTRIBUTING.md file (#265) +Add API doc generation in CI (#277) +Add Demo App (#279) +Fix failing build in Dockerfile (#266) +Bump Copyright to 2022 (#271) +Update README.rst to README.md (#270) + +Version 0.10.0 (2021-12-16) +-------------------------- +Add Python 3.10 support (#254) +Add configurable timeout for HTTP requests (#258) + +Version 0.9.1 (2021-10-26) +-------------------------- +Update python versions in run-tests script (#256) +Fix pycontracts incompatibility with pyparsing v3 (#255) + +Version 0.9.0 (2021-04-23) +-------------------------- +Fix items default value issue in track_ecommerce_transaction (#252) +Make optional timestamp argument set 'true timestamp' rather than 'device created timestamp' (#251) +Pass successfully sent events through to on_success callback (#228) +Restrict endpoint parameter as non-empty string for Emitter (#249) +Add ability to set Subject per event (#158) +Extract correct version in deploy workflow (#182) +Support Unicode in adding events to buffer (#162) +Use mocks for unit tests (#250) +Replace regular file test operator in run-tests.sh (#247) +Bump dependencies (#246) +Add a Python 3 wheel to pypi (#244) +Make sure that tracker attaches timestamp even if wrong type was passed (#190) +Correct capitalization of Content-Type header (#152) +Replace deprecating syntax (#243) +Allow celery and redis to be optional dependencies (#232) +Update gevent to the last major version (#233) +Switch to GitHub Actions (#234) +Allow custom json encoder to be passed when configuring tracker (#242) +Update Copyright notices to 2021 (#241) +Pin decorator package version for Python2 platforms (#245) + +Version 0.8.4 (2020-10-10) +-------------------------- +Fix incompatible versions of greenlet and gevent (closes #236) +Update build to Active Python Releases (closes #237) +Add Snyk monitoring (closes #238) +Update Copyright notices to 2020 (closes #235) + +Version 0.8.3 (2019-06-28) +-------------------------- +Fix test_bytelimit test (#227) +Initialize Celery in CeleryEmitter (#226) +Allow non ascii characters to be encoded using Base64 (#194) +Allow empty strings in string_or_none contract (#184) +Fix on_failure param docstring description (#225) +Bump max version requirement of redis (#223) +Remove Vagrant & Ansible (#222) + +Version 0.8.2 (2018-12-01) +-------------------------- +Fix date for 0.8.0 release in CHANGELOG (#183) +Remove Python 3.3 from tests (#206) +Update PyPI deployment to use Twine (#207) +Add version bounds for all dependencies (#208) +Upgrade Celery to 4.x (#210) +Fix flush argument clash with async keyword (#211) +Add Python 3.7 to test suite (close #212) + +Version 0.8.0 (2016-10-13) -------------------------- Add byte_limit to Emitter (#170) Add support for dvce_sent_tstamp (#159) @@ -71,7 +207,7 @@ Version 0.4.0 (2014-06-10) -------------------------- Migrated unstructured events to self-describing JSON (#87) Migrated custom contexts to self-describing JSON (#88) -Gave separate events within an ecommerce transaction different transaction IDs (#89) +Gave separate events within an ecommerce transaction different transaction IDs (#89) Added https support for tracker (#81) Added callbacks for flushing (#78) Added Python-logging (#76) @@ -103,7 +239,7 @@ Version 0.2.0 (2014-04-15) Fixed Pycontracts dependency (#63) Made unrequired tracker method arguments optional (#40) Linked the Technical Docs and Setup Guide images to the appropriate pages (#60) -Changed API to no longer specify a collector URL option (#57) +Changed API to no longer specify a collector URL option (#57) Removed the "URL from Cloudfront subdomain" option (#56) Started sending event vendor parameter through on querystring (#55) Changed track screen view to use an unstructured event (#53) diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..dd4a535e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,370 @@ +# Snowplow Python Tracker - CLAUDE.md + +## Project Overview + +The Snowplow Python Tracker is a public Python library for sending analytics events to Snowplow collectors. It enables developers to integrate Snowplow analytics into Python applications, games, and web servers. The library provides a robust event tracking system with support for various event types, custom contexts, and reliable event delivery through configurable emitters. + +**Key Technologies:** +- Python 3.8+ (supported versions: 3.8-3.13) +- requests library for HTTP communication +- typing_extensions for enhanced type hints +- Event-driven architecture with schema validation +- Asynchronous and synchronous event emission + +## Development Commands + +```bash +# Install dependencies +pip install -r requirements-test.txt + +# Run tests +./run-tests.sh + +# Run specific test module +python -m pytest snowplow_tracker/test/unit/test_tracker.py + +# Run integration tests +python -m pytest snowplow_tracker/test/integration/ + +# Install package in development mode +pip install -e . + +# Build Docker image for testing +docker build -t snowplow-python-tracker . +docker run snowplow-python-tracker +``` + +## Architecture + +The tracker follows a layered architecture with clear separation of concerns: + +``` +snowplow_tracker/ +├── Core Components +│ ├── tracker.py # Main Tracker class orchestrating events +│ ├── snowplow.py # High-level API for tracker management +│ └── subject.py # User/device context management +├── Event Layer (events/) +│ ├── event.py # Base Event class +│ ├── page_view.py # PageView event +│ ├── structured_event.py # Structured events +│ └── self_describing.py # Custom schema events +├── Emission Layer +│ ├── emitters.py # Sync/Async event transmission +│ ├── event_store.py # Event buffering and persistence +│ └── payload.py # Event payload construction +├── Configuration +│ ├── tracker_configuration.py +│ └── emitter_configuration.py +└── Validation + ├── contracts.py # Runtime validation + └── typing.py # Type definitions +``` + +## Core Architectural Principles + +1. **Schema-First Design**: All events conform to Iglu schemas for consistency +2. **Separation of Concerns**: Event creation, validation, and emission are separate +3. **Configuration Objects**: Use dedicated configuration classes, not raw dictionaries +4. **Type Safety**: Extensive use of type hints and Protocol classes +5. **Fail-Safe Delivery**: Events are buffered and retried on failure +6. **Immutability**: Event objects are largely immutable after creation + +## Layer Organization & Responsibilities + +### Application Layer (snowplow.py) +- Singleton pattern for global tracker management +- Factory methods for tracker creation +- Namespace-based tracker registry + +### Domain Layer (tracker.py, events/) +- Event creation and validation +- Subject (user/device) context management +- Event enrichment with standard fields + +### Infrastructure Layer (emitters.py, event_store.py) +- HTTP communication with collectors +- Event buffering and retry logic +- Async/sync emission strategies + +### Cross-Cutting (contracts.py, typing.py) +- Runtime validation with togglable contracts +- Shared type definitions and protocols + +## Critical Import Patterns + +```python +# ✅ Import from package root for public API +from snowplow_tracker import Snowplow, Tracker, Subject +from snowplow_tracker import EmitterConfiguration, TrackerConfiguration + +# ✅ Import specific event classes +from snowplow_tracker.events import PageView, StructuredEvent + +# ❌ Don't import from internal modules +from snowplow_tracker.emitters import Requester # Internal class + +# ✅ Use typing module for type hints +from snowplow_tracker.typing import PayloadDict, Method +``` + +## Essential Library Patterns + +### Tracker Initialization Pattern +```python +# ✅ Use Snowplow factory with configuration objects +tracker = Snowplow.create_tracker( + namespace="my_app", + endpoint="https://collector.example.com", + tracker_config=TrackerConfiguration(encode_base64=True), + emitter_config=EmitterConfiguration(batch_size=10) +) + +# ❌ Don't instantiate Tracker directly without Snowplow +tracker = Tracker("namespace", emitter) # Missing registration +``` + +### Event Creation Pattern +```python +# ✅ Use event classes with named parameters +page_view = PageView( + page_url="https://example.com", + page_title="Homepage" +) + +# ✅ Add contexts to events +event.context = [SelfDescribingJson(schema, data)] + +# ❌ Don't modify event payload directly +event.payload.add("custom", "value") # Breaks schema validation +``` + +### Subject Management Pattern +```python +# ✅ Set subject at tracker or event level +subject = Subject() +subject.set_user_id("user123") +tracker = Snowplow.create_tracker(..., subject=subject) + +# ✅ Override subject per event +event = PageView(..., event_subject=Subject()) + +# ❌ Don't modify subject after tracker creation +tracker.subject.set_user_id("new_id") # Not thread-safe +``` + +### Emitter Configuration Pattern +```python +# ✅ Configure retry and buffering behavior +config = EmitterConfiguration( + batch_size=50, + buffer_capacity=10000, + custom_retry_codes={429: True, 500: True} +) + +# ❌ Don't use magic numbers +emitter = Emitter(endpoint, 443, "post", 100) # Use config object +``` + +## Model Organization Pattern + +### Event Hierarchy +```python +Event (base class) +├── PageView # Web page views +├── PagePing # Page engagement tracking +├── ScreenView # Mobile screen views +├── StructuredEvent # Category/action/label/property/value events +└── SelfDescribing # Custom schema events +``` + +### Data Structures +```python +# SelfDescribingJson for custom contexts +context = SelfDescribingJson( + "iglu:com.example/context/jsonschema/1-0-0", + {"key": "value"} +) + +# Payload for event data assembly +payload = Payload() +payload.add("e", "pv") # Event type +payload.add_dict({"aid": "app_id"}) +``` + +## Common Pitfalls & Solutions + +### Contract Validation +```python +# ❌ Passing invalid parameters silently fails in production +tracker.track_page_view("") # Empty URL + +# ✅ Enable contracts during development +from snowplow_tracker import enable_contracts +enable_contracts() +``` + +### Event Buffering +```python +# ❌ Not flushing events before shutdown +tracker.track(event) +sys.exit() # Events lost! + +# ✅ Always flush before exit +tracker.track(event) +tracker.flush() +``` + +### Thread Safety +```python +# ❌ Sharing emitter across threads +emitter = Emitter(endpoint) +# Multiple threads using same emitter + +# ✅ Use AsyncEmitter for concurrent scenarios +emitter = AsyncEmitter(endpoint, thread_count=2) +``` + +### Schema Validation +```python +# ❌ Hardcoding schema strings +schema = "iglu:com.snowplow/event/1-0-0" + +# ✅ Use constants for schemas +from snowplow_tracker.constants import CONTEXT_SCHEMA +``` + +## File Structure Template + +``` +project/ +├── tracker_app.py # Application entry point +├── config/ +│ └── tracker_config.py # Tracker configuration +├── events/ +│ ├── __init__.py +│ └── custom_events.py # Custom event definitions +├── contexts/ +│ └── custom_contexts.py # Custom context schemas +└── tests/ + ├── unit/ + │ └── test_events.py + └── integration/ + └── test_emission.py +``` + +## Testing Patterns + +### Unit Testing +```python +# ✅ Mock emitters for unit tests +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_track_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(...)) + mock_emitter.input.assert_called_once() +``` + +### Contract Testing +```python +# ✅ Use ContractsDisabled context manager +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) +``` + +### Integration Testing +```python +# ✅ Test against mock collector +def test_event_delivery(): + with requests_mock.Mocker() as m: + m.post("https://collector.test/com.snowplow/tp2") + # Track and verify delivery +``` + +## Configuration Best Practices + +### Environment-Based Configuration +```python +# ✅ Use environment variables +import os +endpoint = os.getenv("SNOWPLOW_COLLECTOR_URL") +namespace = os.getenv("SNOWPLOW_NAMESPACE", "default") +``` + +### Retry Configuration +```python +# ✅ Configure intelligent retry behavior +EmitterConfiguration( + max_retry_delay_seconds=120, + custom_retry_codes={ + 429: True, # Retry rate limits + 500: True, # Retry server errors + 400: False # Don't retry bad requests + } +) +``` + +## Quick Reference + +### Import Checklist +- [ ] Import from `snowplow_tracker` package root +- [ ] Use `EmitterConfiguration` and `TrackerConfiguration` +- [ ] Import specific event classes from `snowplow_tracker.events` +- [ ] Use type hints from `snowplow_tracker.typing` + +### Event Tracking Checklist +- [ ] Create tracker with `Snowplow.create_tracker()` +- [ ] Configure emitter with appropriate batch size +- [ ] Set subject context if tracking users +- [ ] Use appropriate event class for the use case +- [ ] Add custom contexts as `SelfDescribingJson` +- [ ] Call `flush()` before application shutdown +- [ ] Handle failures with callbacks + +### Common Event Types +- `PageView`: Web page views +- `ScreenView`: Mobile app screens +- `StructuredEvent`: Generic events with 5 parameters +- `SelfDescribing`: Custom schema events +- `PagePing`: Engagement tracking + +## Contributing to CLAUDE.md + +When adding or updating content in this document, please follow these guidelines: + +### File Size Limit +- **CLAUDE.md must not exceed 40KB** (currently ~19KB) +- Check file size after updates: `wc -c CLAUDE.md` +- Remove outdated content if approaching the limit + +### Code Examples +- Keep all code examples **4 lines or fewer** +- Focus on the essential pattern, not complete implementations +- Use `// ❌` and `// ✅` to clearly show wrong vs right approaches + +### Content Organization +- Add new patterns to existing sections when possible +- Create new sections sparingly to maintain structure +- Update the architectural principles section for major changes +- Ensure examples follow current codebase conventions + +### Quality Standards +- Test any new patterns in actual code before documenting +- Verify imports and syntax are correct for the codebase +- Keep language concise and actionable +- Focus on "what" and "how", minimize "why" explanations + +### Multiple CLAUDE.md Files +- **Directory-specific CLAUDE.md files** can be created for specialized modules +- Follow the same structure and guidelines as this root CLAUDE.md +- Keep them focused on directory-specific patterns and conventions +- Maximum 20KB per directory-specific CLAUDE.md file + +### Instructions for LLMs +When editing files in this repository, **always check for CLAUDE.md guidance**: + +1. **Look for CLAUDE.md in the same directory** as the file being edited +2. **If not found, check parent directories** recursively up to project root +3. **Follow the patterns and conventions** described in the applicable CLAUDE.md +4. **Prioritize directory-specific guidance** over root-level guidance when conflicts exist \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..254afe73 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,80 @@ +# Contributing + +The Snowplow Python Tracker is maintained by the Engineering team at Snowplow Analytics. We welcome suggestions for improvements and bug fixes to all Snowplow Trackers. + +We are extremely grateful for all contributions we receive, whether that is reporting an issue or a change to the code which can be made in the form of a pull request. + +For support requests, please use our community support Discourse forum: https://discourse.snowplow.io/. + +## Setting up an Environment + +Instructions on how to build and run tests are available in the [README.md](README.md). The README will also list any requirements that you will need to install first before being able to build and run the tests. + +You should ensure you are comfortable building and testing the existing release before adding new functionality or fixing issues. + +## Issues + +### Creating an issue + +The project contains an issue template which should help guiding you through the process. However, please keep in mind that support requests should go to our Discourse forum: https://discourse.snowplow.io/ and not GitHub issues. + +It's also a good idea to log an issue before starting to work on a pull request to discuss it with the maintainers. A pull request is just one solution to a problem and it is often a good idea to talk about the problem with the maintainers first. + +### Working on an issue + +If you see an issue you would like to work on, please let us know in the issue! That will help us in terms of scheduling and +not doubling the amount of work. + +If you don't know where to start contributing, you can look at +[the issues labeled `good first issue`](https://github.com/snowplow/snowplow-python-tracker/labels/good%20first%20issue). + +## Pull requests + +These are a few guidelines to keep in mind when opening pull requests. + +### Guidelines + +Please supply a good PR description. These are very helpful and help the maintainers to understand _why_ the change has been made, not just _what_ changes have been made. + +Please try and keep your PR to a single feature of fix. This might mean breaking up a feature into multiple PRs but this makes it easier for the maintainers to review and also reduces the risk in each change. + +Please review your own PR as you would do it you were a reviewer first. This is a great way to spot any mistakes you made when writing the change. Additionally, ensure your code compiles and all tests pass. + +### Commit hygiene + +We keep a strict 1-to-1 correspondance between commits and issues, as such our commit messages are formatted in the following +fashion: + +`Issue Description (closes #1234)` + +for example: + +`Fix Issue with Tracker (closes #1234)` + +### Writing tests + +Whenever necessary, it's good practice to add the corresponding tests to whichever feature you are working on. +Any non-trivial PR must have tests and will not be accepted without them. + +### Feedback cycle + +Reviews should happen fairly quickly during weekdays. +If you feel your pull request has been forgotten, please ping one or more maintainers in the pull request. + +### Getting your pull request merged + +If your pull request is fairly chunky, there might be a non-trivial delay between the moment the pull request is approved and the moment it gets merged. This is because your pull request will have been scheduled for a specific milestone which might or might not be actively worked on by a maintainer at the moment. + +### Contributor license agreement + +We require outside contributors to sign a Contributor license agreement (or CLA) before we can merge their pull requests. +You can find more information on the topic in [the dedicated wiki page](https://docs.snowplow.io/docs/contributing/contributor-license-agreement/). +The @snowplowcla bot will guide you through the process. + +## Getting in touch + +### Community support requests + +Please do not log an issue if you are asking for support, all of our community support requests go through our Discourse forum: https://discourse.snowplow.io/. + +Posting your problem there ensures more people will see it and you should get support faster than creating a new issue on GitHub. Please do create a new issue on GitHub if you think you've found a bug though! \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..d79a72cd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM debian:bullseye-slim + +RUN apt-get update && apt-get install -y --no-install-recommends make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev \ + libsqlite3-dev wget curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ + mecab-ipadic-utf8 git ca-certificates + +ENV HOME /root +ENV PYENV_ROOT $HOME/.pyenv +ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH +RUN git clone --depth=1 https://github.com/pyenv/pyenv.git $PYENV_ROOT +RUN git clone --depth=1 https://github.com/pyenv/pyenv-virtualenv.git $PYENV_ROOT/plugins/pyenv-virtualenv + +RUN pyenv install 3.5.10 && pyenv install 3.6.15 && pyenv install 3.7.17 && pyenv install 3.8.20 && pyenv install 3.9.20 && pyenv install 3.10.15 && pyenv install 3.11.10 && pyenv install 3.12.7 && pyenv install 3.13.0 + +WORKDIR /app +COPY . . +RUN [ "./run-tests.sh", "deploy"] +CMD [ "./run-tests.sh", "test"] diff --git a/LICENSE-2.0.txt b/LICENSE similarity index 99% rename from LICENSE-2.0.txt rename to LICENSE index 7a4a3ea2..db047f7e 100644 --- a/LICENSE-2.0.txt +++ b/LICENSE @@ -1,4 +1,3 @@ - Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -187,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2013-2023 Snowplow Analytics Ltd. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -199,4 +198,4 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..694d3ce9 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +Python Analytics for Snowplow +============================= + +[![Early Release](https://img.shields.io/static/v1?style=flat&label=Snowplow&message=Early%20Release&color=014477&labelColor=9ba0aa&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAAAeFBMVEVMaXGXANeYANeXANZbAJmXANeUANSQAM+XANeMAMpaAJhZAJeZANiXANaXANaOAM2WANVnAKWXANZ9ALtmAKVaAJmXANZaAJlXAJZdAJxaAJlZAJdbAJlbAJmQAM+UANKZANhhAJ+EAL+BAL9oAKZnAKVjAKF1ALNBd8J1AAAAKHRSTlMAa1hWXyteBTQJIEwRgUh2JjJon21wcBgNfmc+JlOBQjwezWF2l5dXzkW3/wAAAHpJREFUeNokhQOCA1EAxTL85hi7dXv/E5YPCYBq5DeN4pcqV1XbtW/xTVMIMAZE0cBHEaZhBmIQwCFofeprPUHqjmD/+7peztd62dWQRkvrQayXkn01f/gWp2CrxfjY7rcZ5V7DEMDQgmEozFpZqLUYDsNwOqbnMLwPAJEwCopZxKttAAAAAElFTkSuQmCC)](https://docs.snowplow.io/docs/collecting-data/collecting-from-own-applications/tracker-maintenance-classification/)[![Build Status](https://github.com/snowplow/snowplow-python-tracker/actions/workflows/ci.yml/badge.svg)](https://github.com/snowplow/snowplow-python-tracker/actions)[![Test Coverage](https://img.shields.io/coveralls/github/snowplow/snowplow-python-tracker)](https://coveralls.io/github/snowplow/snowplow-python-tracker?branch=master) [![image](http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat)](http://www.apache.org/licenses/LICENSE-2.0) + + +[![Pypi Snowplow Tracker](https://img.shields.io/pypi/v/snowplow-tracker)](https://pypi.org/project/snowplow-tracker/)[![Python Versions](https://img.shields.io/pypi/pyversions/snowplow-tracker)](https://pypi.org/project/snowplow-tracker/)[![Monthly Downloads](https://img.shields.io/pypi/dm/snowplow-tracker)](https://pypi.org/project/snowplow-tracker/) + +Overview +-------- + +Add analytics to your Python apps and Python games with the +[Snowplow](http://snowplow.io) event tracker for +[Python](http://python.org). + +With this tracker you can collect event data from your Python-based +applications, games or Python web servers/frameworks. + +Find out more +------------- + + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + + | Snowplow Docs | API Docs | Contributing | + | :----: | :----: | :----: | + | ![techdocs](https://d3i6fms1cm1j0i.cloudfront.net/github/images/techdocs.png) | ![setup](https://d3i6fms1cm1j0i.cloudfront.net/github/images/setup.png) | ![contributing](https://d3i6fms1cm1j0i.cloudfront.net/github/images/contributing.png) | + | [Snowplow Docs](https://docs.snowplow.io/docs/collecting-data/collecting-from-own-applications/python-tracker/) | [API Docs](https://snowplow.github.io/snowplow-python-tracker/index.html)| [Contributing](https://github.com/snowplow/snowplow-python-tracker/blob/master/CONTRIBUTING.md) | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Python Support +-------------- + +| Python version | snowplow-tracker version | +| :----: | :----: | +| \>=3.8 | > 1.1.0 | +| \>=3.5 | > 0.10.0 | +| 2.7 | > 0.9.1 | + +Maintainer Quickstart +--------------------- + +Assuming [docker](https://www.docker.com/) is installed + + host$ git clone git@github.com:snowplow/snowplow-python-tracker.git + host$ cd snowplow-python-tracker + host$ docker build -t snowplow-python-tracker . && docker run snowplow-python-tracker + +Copyright and license +--------------------- + +The Snowplow Python Tracker is copyright 2013-2023 Snowplow Analytics +Ltd. + +Licensed under the [Apache License, Version +2.0](http://www.apache.org/licenses/LICENSE-2.0) (the \"License\"); you +may not use this software except in compliance with the License. + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an \"AS IS\" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.rst b/README.rst deleted file mode 100644 index b3d5079e..00000000 --- a/README.rst +++ /dev/null @@ -1,87 +0,0 @@ -====================================================== -Python Analytics for Snowplow -====================================================== -.. image:: https://travis-ci.org/snowplow/snowplow-python-tracker.png?branch=master - :alt: Build Status - :target: https://travis-ci.org/snowplow/snowplow-python-tracker -.. image:: https://badge.fury.io/py/snowplow-tracker.png - :target: http://badge.fury.io/py/snowplow-tracker -.. image:: https://coveralls.io/repos/snowplow/snowplow-python-tracker/badge.png - :target: https://coveralls.io/r/snowplow/snowplow-python-tracker -.. image:: http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat - :target: http://www.apache.org/licenses/LICENSE-2.0 - - -Overview -######## - -Add analytics to your Python apps and Python games with the Snowplow_ event tracker for Python_. - -.. _Snowplow: http://snowplowanalytics.com -.. _Python: http://python.org - -With this tracker you can collect event data from your Python-based applications, games or Python web servers/frameworks. - -Find out more -############# - -+---------------------------------+---------------------------+-------------------------+-----------------------------------+ -| Technical Docs | Setup Guide | Roadmap | Contributing | -+=================================+===========================+=========================+===================================+ -| |techdocs|_ | |setup|_ | |roadmap| | |contributing| | -+---------------------------------+---------------------------+-------------------------+-----------------------------------+ -| `Technical Docs`_ | `Setup Guide`_ | `Roadmap`_ | `Contributing`_ | -+---------------------------------+---------------------------+-------------------------+-----------------------------------+ - -.. |techdocs| image:: https://d3i6fms1cm1j0i.cloudfront.net/github/images/techdocs.png -.. |setup| image:: https://d3i6fms1cm1j0i.cloudfront.net/github/images/setup.png -.. |roadmap| image:: https://d3i6fms1cm1j0i.cloudfront.net/github/images/roadmap.png -.. |contributing| image:: https://d3i6fms1cm1j0i.cloudfront.net/github/images/contributing.png - -.. _techdocs: https://github.com/snowplow/snowplow/wiki/Python-Tracker -.. _setup: https://github.com/snowplow/snowplow/wiki/Python-Tracker-Setup - -.. _`Technical Docs`: https://github.com/snowplow/snowplow/wiki/Python-Tracker -.. _`Setup Guide`: https://github.com/snowplow/snowplow/wiki/Python-Tracker-Setup -.. _`Roadmap`: https://github.com/snowplow/snowplow/wiki/Python-Tracker-Roadmap -.. _`Contributing`: https://github.com/snowplow/snowplow/wiki/Python-Tracker-Contributing - -Contributing quickstart -####################### - -Assuming Git, Vagrant_ and VirtualBox_ are installed: - -:: - - host$ git clone git@github.com:snowplow/snowplow-python-tracker.git - host$ vagrant up && vagrant ssh - guest$ cd /vagrant - guest$ ./run-tests.sh deploy - guest$ ./run-tests.sh test - -.. _Vagrant: http://docs.vagrantup.com/v2/installation/index.html -.. _VirtualBox: https://www.virtualbox.org/wiki/Downloads - -Publishing -########## - -:: - - host$ vagrant push - -Copyright and license -##################### - -The Snowplow Python Tracker is copyright 2013-2014 Snowplow Analytics Ltd. - -Licensed under the `Apache License, Version 2.0`_ (the "License"); -you may not use this software except in compliance with the License. - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - - -.. _Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 diff --git a/Vagrantfile b/Vagrantfile deleted file mode 100644 index 04da4b6d..00000000 --- a/Vagrantfile +++ /dev/null @@ -1,23 +0,0 @@ -Vagrant.configure("2") do |config| - - config.vm.box = "ubuntu/trusty64" - config.vm.hostname = "snowplow-python-tracker" - config.ssh.forward_agent = true - - config.vm.provider :virtualbox do |vb| - vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s - vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] - vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ] - vb.memory = 2048 - end - - config.vm.provision :shell do |sh| - sh.path = "vagrant/up.bash" - end - - # Requires Vagrant 1.7.0+ - config.push.define "publish", strategy: "local-exec" do |push| - push.script = "vagrant/push.bash" - end - -end diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..92dd33a1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..83cf06c1 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..36f69b0c --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,15 @@ + +sphinx==7.1.2 +sphinx_rtd_theme==2.0.0 +sphinx_copybutton==0.5.2 +sphinx_minipres==0.2.1 +sphinx_tabs==3.4.5 + +sphinx_togglebutton==0.3.2 +# Transitive dependency of togglebutton causing: +# https://security.snyk.io/vuln/SNYK-PYTHON-SETUPTOOLS-7448482 +setuptools==70.0.0 + +sphinx-autobuild==2021.3.14 +myst_nb>0.8.3 +sphinx_rtd_theme_ext_color_contrast==0.3.2 diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..88d210c3 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,63 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +import os +import sys +sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../..')) + + + +# -- Project information ----------------------------------------------------- + +project = 'Snowplow Python Tracker' +copyright = "2023, Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene" +author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' + +# The full version, including alpha/beta/rc tags +release = "1.1.0" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + 'sphinx_rtd_theme' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..4404d24c --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,25 @@ +.. Snowplow Python Tracker documentation master file, created by + sphinx-quickstart on Wed Jul 20 14:00:53 2022. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Snowplow Python Tracker API Documentation +========================================== + +Overview +######## +Add analytics to your Python apps and Python games with the `Snowplow`_ event tracker for `Python`_. + +.. _Snowplow: https://snowplow.io +.. _Python: https://www.python.org/ + +With this tracker you can collect event data from your Python-based applications, games or Python web servers/frameworks. + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + modules + +* :ref:`genindex` +* :ref:`modindex` diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 00000000..8e98bdff --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +snowplow_tracker +================ + +.. toctree:: + :maxdepth: 4 + + snowplow_tracker diff --git a/docs/source/snowplow_tracker.rst b/docs/source/snowplow_tracker.rst new file mode 100644 index 00000000..a1368439 --- /dev/null +++ b/docs/source/snowplow_tracker.rst @@ -0,0 +1,59 @@ +snowplow\_tracker package +========================= + +snowplow\_tracker.contracts module +---------------------------------- + +.. automodule:: snowplow_tracker.contracts + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.emitters module +--------------------------------- + +.. automodule:: snowplow_tracker.emitters + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.payload module +-------------------------------- + +.. automodule:: snowplow_tracker.payload + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.self\_describing\_json module +----------------------------------------------- + +.. automodule:: snowplow_tracker.self_describing_json + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.subject module +-------------------------------- + +.. automodule:: snowplow_tracker.subject + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.tracker module +-------------------------------- + +.. automodule:: snowplow_tracker.tracker + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.typing module +------------------------------- + +.. automodule:: snowplow_tracker.typing + :members: + :undoc-members: + :show-inheritance: + diff --git a/examples/redis_example/README.md b/examples/redis_example/README.md new file mode 100644 index 00000000..59b1aeb7 --- /dev/null +++ b/examples/redis_example/README.md @@ -0,0 +1,26 @@ +# Redis Example App + +This example shows how to set up the Python tracker with a Redis database and a Redis worker to forward events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +- Install redis for your machine. More information can be found [here](https://redis.io/docs/getting-started/installation/) + +`brew install redis` + +- Run `redis-server` to check your redis installation, to stop the server enter `ctrl+c`. + +#### Usage +Navigate to the example folder. + +`cd examples/redis_example` + +This example has two programmes, `redis_app.py` tracks events and sends them to a redis database, `redis_worker.py` then forwards these events onto a Snowplow pipeline. + +To send events to your pipeline, run `redis-server`, followed by the `redis_worker.py {{your_collector_endpoint}}` and finally `redis_app.py`. You should see 3 events in your pipleine. + + + diff --git a/examples/redis_example/redis_app.py b/examples/redis_example/redis_app.py new file mode 100644 index 00000000..553a547f --- /dev/null +++ b/examples/redis_example/redis_app.py @@ -0,0 +1,87 @@ +from snowplow_tracker import ( + Tracker, + ScreenView, + PagePing, + PageView, + SelfDescribing, + StructuredEvent, + SelfDescribingJson, +) +from snowplow_tracker.typing import PayloadDict +import json +import redis +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class RedisEmitter(object): + """ + Sends Snowplow events to a Redis database + """ + + def __init__(self, rdb=None, key: str = "redis_key") -> None: + """ + :param rdb: Optional custom Redis database + :type rdb: redis | None + :param key: The Redis key for the list of events + :type key: string + """ + + if rdb is None: + rdb = redis.StrictRedis() + + self.rdb = rdb + self.key = key + + def input(self, payload: PayloadDict) -> None: + """ + :param payload: The event properties + :type payload: dict(string:*) + """ + logger.info("Pushing event to Redis queue...") + self.rdb.rpush(self.key, json.dumps(payload)) + logger.info("Finished sending event to Redis.") + + def flush(self) -> None: + logger.warning("The RedisEmitter class does not need to be flushed") + return + + def sync_flush(self) -> None: + self.flush() + + +def main(): + emitter = RedisEmitter() + + t = Tracker(namespace="snowplow_tracker", emitters=emitter) + + page_view = PageView(page_url="https://www.snowplow.io", page_title="Homepage") + t.track(page_view) + + page_ping = PagePing(page_url="https://www.snowplow.io", page_title="Homepage") + t.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ) + ) + t.track(link_click) + + id = t.get_uuid() + screen_view = ScreenView(id_=id, name="name") + t.track(screen_view) + + struct_event = StructuredEvent( + category="shop", action="add-to-basket", property_="pcs", value=2 + ) + t.track(struct_event) + + +if __name__ == "__main__": + main() diff --git a/examples/redis_example/redis_worker.py b/examples/redis_example/redis_worker.py new file mode 100644 index 00000000..6a190683 --- /dev/null +++ b/examples/redis_example/redis_worker.py @@ -0,0 +1,74 @@ +import sys +from snowplow_tracker import Emitter +from typing import Any +from snowplow_tracker.typing import PayloadDict +import json +import redis +import signal +import gevent +from gevent.pool import Pool + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +class RedisWorker: + def __init__(self, emitter: Emitter, key) -> None: + self.pool = Pool(5) + self.emitter = emitter + self.rdb = redis.StrictRedis() + self.key = key + + signal.signal(signal.SIGTERM, self.request_shutdown) + signal.signal(signal.SIGINT, self.request_shutdown) + signal.signal(signal.SIGQUIT, self.request_shutdown) + + def send(self, payload: PayloadDict) -> None: + """ + Send an event to an emitter + """ + self.emitter.input(payload) + + def pop_payload(self) -> None: + """ + Get a single event from Redis and send it + If the Redis queue is empty, sleep to avoid making continual requests + """ + payload = self.rdb.lpop(self.key) + if payload: + self.pool.spawn(self.send, json.loads(payload.decode("utf-8"))) + else: + gevent.sleep(5) + + def run(self) -> None: + """ + Run indefinitely + """ + self._shutdown = False + while not self._shutdown: + self.pop_payload() + self.pool.join(timeout=20) + + def request_shutdown(self, *args: Any) -> None: + """ + Halt the worker + """ + self._shutdown = True + + +def main(): + collector_url = get_url_from_args() + + # Configure Emitter + emitter = Emitter(collector_url, batch_size=1) + + # Setup worker + worker = RedisWorker(emitter=emitter, key="redis_key") + worker.run() + + +if __name__ == "__main__": + main() diff --git a/examples/redis_example/requirements.txt b/examples/redis_example/requirements.txt new file mode 100644 index 00000000..ac10dd44 --- /dev/null +++ b/examples/redis_example/requirements.txt @@ -0,0 +1,2 @@ +redis~=4.5 +gevent~=22.10 \ No newline at end of file diff --git a/examples/snowplow_api_example/README.md b/examples/snowplow_api_example/README.md new file mode 100644 index 00000000..6819757b --- /dev/null +++ b/examples/snowplow_api_example/README.md @@ -0,0 +1,18 @@ +# Snowplow API Example App + +This example shows how to set up the Python tracker with the Snowplow API to send events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +#### Usage +Navigate to the example folder. + +`cd examples/snowplow_api_example` + +To send events to your pipeline, run `snowplow_app.py {{your_collector_endpoint}}`. You should see 6 events in your pipleine. + + + diff --git a/examples/snowplow_api_example/snowplow_app.py b/examples/snowplow_api_example/snowplow_app.py new file mode 100644 index 00000000..1bbd21c8 --- /dev/null +++ b/examples/snowplow_api_example/snowplow_app.py @@ -0,0 +1,74 @@ +import sys +from snowplow_tracker import ( + Snowplow, + EmitterConfiguration, + Subject, + TrackerConfiguration, + SelfDescribingJson, + PagePing, + PageView, + ScreenView, + SelfDescribing, + StructuredEvent, +) + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +def main(): + collector_url = get_url_from_args() + # Configure Emitter + custom_retry_codes = {500: False, 401: True} + emitter_config = EmitterConfiguration( + batch_size=5, custom_retry_codes=custom_retry_codes + ) + + # Configure Tracker + tracker_config = TrackerConfiguration(encode_base64=True) + + # Initialise subject + subject = Subject() + subject.set_user_id("uid") + + Snowplow.create_tracker( + namespace="ns", + endpoint=collector_url, + app_id="app1", + subject=subject, + tracker_config=tracker_config, + emitter_config=emitter_config, + ) + + tracker = Snowplow.get_tracker("ns") + + page_view = PageView(page_url="https://www.snowplow.io", page_title="Homepage") + tracker.track(page_view) + + page_ping = PagePing(page_url="https://www.snowplow.io", page_title="Homepage") + tracker.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ) + ) + tracker.track(link_click) + + id = tracker.get_uuid() + screen_view = ScreenView(id_=id, name="name") + tracker.track(screen_view) + + struct_event = StructuredEvent( + category="shop", action="add-to-basket", property_="pcs", value=2 + ) + tracker.track(struct_event) + tracker.flush() + + +if __name__ == "__main__": + main() diff --git a/examples/tracker_api_example/README.md b/examples/tracker_api_example/README.md new file mode 100644 index 00000000..10392b17 --- /dev/null +++ b/examples/tracker_api_example/README.md @@ -0,0 +1,18 @@ +# Example App + +This example shows how to set up the Python tracker with the tracker API to send events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +#### Usage +Navigate to the example folder. + +`cd examples/tracker_api_example` + +To send events to your pipeline, run `app.py {{your_collector_endpoint}}`. You should see 5 events in your pipleine. + + + diff --git a/examples/tracker_api_example/app.py b/examples/tracker_api_example/app.py new file mode 100644 index 00000000..41f520ce --- /dev/null +++ b/examples/tracker_api_example/app.py @@ -0,0 +1,76 @@ +from distutils.log import error +from snowplow_tracker import ( + Tracker, + Emitter, + Subject, + SelfDescribingJson, + PageView, + PagePing, + SelfDescribing, + ScreenView, + StructuredEvent, +) +import sys + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +def main(): + collector_url = get_url_from_args() + + e = Emitter(collector_url) + + s = Subject().set_platform("pc") + s.set_lang("en").set_user_id("test_user") + + t = Tracker(namespace="snowplow_tracker", emitters=e, subject=s) + + print("Sending events to " + e.endpoint) + + event_subject = Subject() + event_subject.set_color_depth(10) + + page_view = PageView( + page_url="https://www.snowplow.io", + page_title="Homepage", + event_subject=event_subject, + ) + t.track(page_view) + + page_ping = PagePing( + page_url="https://www.snowplow.io", + page_title="Homepage", + event_subject=t.subject, + ) + t.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ), + event_subject=t.subject, + ) + t.track(link_click) + + id = t.get_uuid() + screen_view = ScreenView(id_=id, name="name", event_subject=t.subject) + t.track(screen_view) + + struct_event = StructuredEvent( + category="shop", + action="add-to-basket", + property_="pcs", + value=2, + event_subject=t.subject, + ) + t.track(struct_event) + t.flush() + + +if __name__ == "__main__": + main() diff --git a/requirements-test.txt b/requirements-test.txt index 2741eba4..cde305f6 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,8 @@ -pytest==3.0.2 -httmock==1.2.5 -freezegun==0.3.7 -pytest-cov==2.3.1 -coveralls==1.1 - +pytest==4.6.11; python_version < '3.10.0' +pytest==8.3.2; python_version >= '3.10.0' +attrs==21.2.0 +httmock==1.4.0 +freezegun==1.1.0; python_version < '3.13' +freezegun==1.5.1; python_version >= '3.13' +pytest-cov +coveralls==3.3.1 diff --git a/run-tests.sh b/run-tests.sh index 27cb18fa..bb938e85 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -15,64 +15,138 @@ eval "$(pyenv init -)" eval "$(pyenv virtualenv-init -)" function deploy { + # pyenv install 3.5.10 + if [ ! -e ~/.pyenv/versions/tracker35 ]; then + pyenv virtualenv 3.5.10 tracker35 + pyenv activate tracker35 + pip install . + pip install -r requirements-test.txt + source deactivate + fi - # pyenv install 2.7.12 - if [ ! -f ~/.pyenv/versions/tracker27 ]; then - pyenv virtualenv 2.7.12 tracker27 - pyenv activate tracker27 - pip install -r requirements.txt + # pyenv install 3.6.15 + if [ ! -e ~/.pyenv/versions/tracker36 ]; then + pyenv virtualenv 3.6.15 tracker36 + pyenv activate tracker36 + pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.3.6 - if [ ! -f ~/.pyenv/versions/tracker33 ]; then - pyenv virtualenv 3.3.6 tracker33 - pyenv activate tracker33 - pip install -r requirements.txt + # pyenv install 3.7.17 + if [ ! -e ~/.pyenv/versions/tracker37 ]; then + pyenv virtualenv 3.7.17 tracker37 + pyenv activate tracker37 + pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.4.5 - if [ ! -f ~/.pyenv/versions/tracker34 ]; then - pyenv virtualenv 3.4.5 tracker34 - pyenv activate tracker34 - pip install -r requirements.txt + # pyenv install 3.8.20 + if [ ! -e ~/.pyenv/versions/tracker38 ]; then + pyenv virtualenv 3.8.20 tracker38 + pyenv activate tracker38 + pip install . pip install -r requirements-test.txt source deactivate fi - # pyenv install 3.5.2 - if [ ! -f ~/.pyenv/versions/tracker35 ]; then - pyenv virtualenv 3.5.2 tracker35 - pyenv activate tracker35 - pip install -r requirements.txt + # pyenv install 3.9.20 + if [ ! -e ~/.pyenv/versions/tracker39 ]; then + pyenv virtualenv 3.9.20 tracker39 + pyenv activate tracker39 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.10.15 + if [ ! -e ~/.pyenv/versions/tracker310 ]; then + pyenv virtualenv 3.10.15 tracker310 + pyenv activate tracker310 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.11.10 + if [ ! -e ~/.pyenv/versions/tracker311 ]; then + pyenv virtualenv 3.11.10 tracker311 + pyenv activate tracker311 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.12.7 + if [ ! -e ~/.pyenv/versions/tracker312 ]; then + pyenv virtualenv 3.12.7 tracker312 + pyenv activate tracker312 + pip install . pip install -r requirements-test.txt source deactivate fi + # pyenv install 3.13.0 + if [ ! -e ~/.pyenv/versions/tracker313 ]; then + pyenv virtualenv 3.13.0 tracker313 + pyenv activate tracker313 + pip install . + pip install -r requirements-test.txt + source deactivate + fi } function run_tests { - - pyenv activate tracker27 - pytest -s + pyenv activate tracker35 + pytest source deactivate - - pyenv activate tracker33 + + pyenv activate tracker36 pytest source deactivate - - pyenv activate tracker34 + + pyenv activate tracker37 pytest source deactivate - - pyenv activate tracker35 + + pyenv activate tracker38 + pytest + source deactivate + + pyenv activate tracker39 + pytest + source deactivate + + pyenv activate tracker310 pytest - source deactivate -} + source deactivate + + pyenv activate tracker311 + pytest + source deactivate + + pyenv activate tracker312 + pytest + source deactivate + + pyenv activate tracker313 + pytest + source deactivate +} + +function refresh_deploy { + pyenv uninstall -f tracker35 + pyenv uninstall -f tracker36 + pyenv uninstall -f tracker37 + pyenv uninstall -f tracker38 + pyenv uninstall -f tracker39 + pyenv uninstall -f tracker310 + pyenv uninstall -f tracker311 + pyenv uninstall -f tracker312 + pyenv uninstall -f tracker313 +} case "$1" in @@ -80,9 +154,13 @@ case "$1" in "deploy") echo "Deploying python environments. This can take few minutes" deploy ;; - "test") echo "Running tests" + "test") echo "Running tests" run_tests ;; + "refresh") echo "Refreshing python environments" + refresh_deploy + deploy + ;; *) echo "Unknown subcommand. Specify deploy or test" exit 1 ;; diff --git a/setup.py b/setup.py index 88392f47..efaf6536 100644 --- a/setup.py +++ b/setup.py @@ -1,24 +1,19 @@ -""" - setup.py +# +# setup.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. - - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ #!/usr/bin/env python # -*- coding: utf-8 -*- @@ -28,59 +23,55 @@ except ImportError: from distutils.core import setup -import os - -version_file_path = os.path.join( - os.path.dirname(__file__), - 'snowplow_tracker', - '_version.py' - ) -exec(open(version_file_path).read(), {}, locals()) - authors_list = [ - 'Anuj More', - 'Alexander Dean', - 'Fred Blundun' - ] -authors_str = ', '.join(authors_list) + "Anuj More", + "Alexander Dean", + "Fred Blundun", + "Paul Boocock", + "Matus Tomlein", + "Jack Keene", +] +authors_str = ", ".join(authors_list) authors_email_list = [ - 'support@snowplowanalytics.com', - ] -authors_email_str = ', '.join(authors_email_list) + "support@snowplow.io", +] +authors_email_str = ", ".join(authors_email_list) setup( - name='snowplow-tracker', - version=__version__, + name="snowplow-tracker", + version="1.1.0", author=authors_str, author_email=authors_email_str, - packages=['snowplow_tracker', 'snowplow_tracker.test'], - url='http://snowplowanalytics.com', - license='Apache License 2.0', - description='Snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games', - long_description=open('README.rst').read(), - + packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], + package_data={"snowplow_tracker": ["py.typed"]}, + url="http://snowplow.io", + license="Apache License 2.0", + description="Snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", ], - install_requires=[ - "greenlet==0.4.10", - "requests==2.2.1", - "pycontracts==1.7.6", - "celery==3.1.11", - "gevent==1.0.2", - "redis==2.9.1", - "six==1.9.0" + "requests>=2.25.1,<3.0", + "typing_extensions>=3.7.4", ], + extras_require={ + "typing": [ + "mypy>=0.971", + "types-requests>=2.25.1,<3.0", + ], + }, ) diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index 8a2062dd..689b2539 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -1,6 +1,18 @@ from snowplow_tracker._version import __version__ from snowplow_tracker.subject import Subject -from snowplow_tracker.emitters import logger, Emitter, AsyncEmitter, CeleryEmitter, RedisEmitter +from snowplow_tracker.emitters import logger, Emitter, AsyncEmitter from snowplow_tracker.self_describing_json import SelfDescribingJson from snowplow_tracker.tracker import Tracker -from contracts import disable_all as disable_contracts, enable_all as enable_contracts +from snowplow_tracker.emitter_configuration import EmitterConfiguration +from snowplow_tracker.tracker_configuration import TrackerConfiguration +from snowplow_tracker.snowplow import Snowplow +from snowplow_tracker.contracts import disable_contracts, enable_contracts +from snowplow_tracker.event_store import EventStore +from snowplow_tracker.events import ( + Event, + PageView, + PagePing, + SelfDescribing, + StructuredEvent, + ScreenView, +) diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 02e250a1..f4ff17a0 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -1,25 +1,20 @@ -""" - _version.py +# """ +# _version.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2016 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" - - -__version_info__ = (0, 8, 0) +__version_info__ = (1, 1, 0) __version__ = ".".join(str(x) for x in __version_info__) -__build_version__ = __version__ + '' +__build_version__ = __version__ + "" diff --git a/snowplow_tracker/constants.py b/snowplow_tracker/constants.py new file mode 100644 index 00000000..53ecc151 --- /dev/null +++ b/snowplow_tracker/constants.py @@ -0,0 +1,27 @@ +# """ +# constants.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from typing import List +from snowplow_tracker import _version, SelfDescribingJson + +VERSION = "py-%s" % _version.__version__ +DEFAULT_ENCODE_BASE64: bool = True # Type hint required for Python 3.6 MyPy check +BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" +MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" +SCHEMA_TAG = "jsonschema" +CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) +UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) +ContextArray = List[SelfDescribingJson] diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py new file mode 100644 index 00000000..3b17e1a3 --- /dev/null +++ b/snowplow_tracker/contracts.py @@ -0,0 +1,101 @@ +# """ +# contracts.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import traceback +import re +from typing import Any, Dict, Iterable, Callable, Sized +from snowplow_tracker.typing import FORM_TYPES, FORM_NODE_NAMES + +_CONTRACTS_ENABLED = True +_MATCH_FIRST_PARAMETER_REGEX = re.compile(r"\(([\w.]+)[,)]") + + +def disable_contracts() -> None: + global _CONTRACTS_ENABLED + _CONTRACTS_ENABLED = False + + +def enable_contracts() -> None: + global _CONTRACTS_ENABLED + _CONTRACTS_ENABLED = True + + +def contracts_enabled() -> bool: + global _CONTRACTS_ENABLED + return _CONTRACTS_ENABLED + + +def greater_than(value: float, compared_to: float) -> None: + if contracts_enabled() and value <= compared_to: + raise ValueError( + "{0} must be greater than {1}.".format(_get_parameter_name(), compared_to) + ) + + +def non_empty(seq: Sized) -> None: + if contracts_enabled() and len(seq) == 0: + raise ValueError("{0} is empty.".format(_get_parameter_name())) + + +def non_empty_string(s: str) -> None: + if contracts_enabled() and type(s) is not str or not s: + raise ValueError("{0} is empty.".format(_get_parameter_name())) + + +def one_of(value: Any, supported: Iterable) -> None: + if contracts_enabled() and value not in supported: + raise ValueError("{0} is not supported.".format(_get_parameter_name())) + + +def satisfies(value: Any, check: Callable[[Any], bool]) -> None: + if contracts_enabled() and not check(value): + raise ValueError("{0} is not allowed.".format(_get_parameter_name())) + + +def form_element(element: Dict[str, Any]) -> None: + satisfies(element, lambda x: _check_form_element(x)) + + +def _get_parameter_name() -> str: + stack = traceback.extract_stack() + _, _, _, code = stack[-3] + + match = _MATCH_FIRST_PARAMETER_REGEX.search(code) + if not match: + return "Unnamed parameter" + return str(match.groups(0)[0]) + + +def _check_form_element(element: Dict[str, Any]) -> bool: + """ + Helper method to check that dictionary conforms element + in sumbit_form and change_form schemas + """ + all_present = ( + isinstance(element, dict) + and "name" in element + and "value" in element + and "nodeName" in element + ) + try: + if element["type"] in FORM_TYPES: + type_valid = True + else: + type_valid = False + except KeyError: + type_valid = True + return all_present and element["nodeName"] in FORM_NODE_NAMES and type_valid diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py new file mode 100644 index 00000000..82626fa4 --- /dev/null +++ b/snowplow_tracker/emitter_configuration.py @@ -0,0 +1,213 @@ +# """ +# emitter_configuration.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional, Union, Tuple, Dict +from snowplow_tracker.typing import SuccessCallback, FailureCallback +from snowplow_tracker.event_store import EventStore +import requests + + +class EmitterConfiguration(object): + def __init__( + self, + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + Configuration for the emitter that sends events to the Snowplow collector. + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None + :param session: Persist parameters across requests by using a session object + :type session: request.Session | None + """ + + self.batch_size = batch_size + self.on_success = on_success + self.on_failure = on_failure + self.byte_limit = byte_limit + self.request_timeout = request_timeout + self.buffer_capacity = buffer_capacity + self.custom_retry_codes = custom_retry_codes + self.event_store = event_store + self.session = session + + @property + def batch_size(self) -> Optional[int]: + """ + The maximum number of queued events before the buffer is flushed. Default is 10. + """ + return self._batch_size + + @batch_size.setter + def batch_size(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("batch_size must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("batch_size must be of type int") + self._batch_size = value + + @property + def on_success(self) -> Optional[SuccessCallback]: + """ + Callback executed after every HTTP request in a flush has status code 200. Gets passed the number of events flushed. + """ + return self._on_success + + @on_success.setter + def on_success(self, value: Optional[SuccessCallback]): + self._on_success = value + + @property + def on_failure(self) -> Optional[FailureCallback]: + """ + Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + """ + return self._on_failure + + @on_failure.setter + def on_failure(self, value: Optional[FailureCallback]): + self._on_failure = value + + @property + def byte_limit(self) -> Optional[int]: + """ + The size event list after reaching which queued events will be flushed + """ + return self._byte_limit + + @byte_limit.setter + def byte_limit(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("byte_limit must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("byte_limit must be of type int") + self._byte_limit = value + + @property + def request_timeout(self) -> Optional[Union[float, Tuple[float, float]]]: + """ + Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + """ + return self._request_timeout + + @request_timeout.setter + def request_timeout(self, value: Optional[Union[float, Tuple[float, float]]]): + self._request_timeout = value + + @property + def buffer_capacity(self) -> Optional[int]: + """ + The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + When the buffer is full new events are lost. + """ + return self._buffer_capacity + + @buffer_capacity.setter + def buffer_capacity(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("buffer_capacity must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("buffer_capacity must be of type int") + self._buffer_capacity = value + + @property + def custom_retry_codes(self) -> Dict[int, bool]: + """ + Custom retry rules for HTTP status codes received in emit responses from the Collector. + """ + return self._custom_retry_codes + + @custom_retry_codes.setter + def custom_retry_codes(self, value: Dict[int, bool]): + self._custom_retry_codes = value + + def set_retry_code(self, status_code: int, retry=True) -> bool: + """ + Add a retry rule for HTTP status code received from emit responses from the Collector. + :param status_code: HTTP response code + :type status_code: int + :param retry: Set the status_code to retry (True) or not retry (False). Default is True + :type retry: bool + """ + if not isinstance(status_code, int): + print("status_code must be of type int") + return False + + if not isinstance(retry, bool): + print("retry must be of type bool") + return False + + if 200 <= status_code < 300: + print( + "custom_retry_codes should not include codes for succesful requests (2XX codes)" + ) + return False + + self.custom_retry_codes[status_code] = retry + + return status_code in self.custom_retry_codes.keys() + + @property + def event_store(self) -> Optional[EventStore]: + return self._event_store + + @event_store.setter + def event_store(self, value: Optional[EventStore]): + self._event_store = value + + @property + def session(self) -> Optional[requests.Session]: + """ + Persist parameters across requests using a requests.Session object + """ + return self._session + + @session.setter + def session(self, value: Optional[requests.Session]): + self._session = value diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 2eccb0fb..72f451bf 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -1,136 +1,207 @@ -""" - emitters.py +# """ +# emitters.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" - -import json import logging import time import threading -try: - # Python 2 - from Queue import Queue -except ImportError: - # Python 3 - from queue import Queue - -from celery import Celery -from celery.contrib.methods import task -import redis import requests -from contracts import contract, new_contract +import random +from typing import Optional, Union, Tuple, Dict, cast, Callable +from queue import Queue from snowplow_tracker.self_describing_json import SelfDescribingJson - +from snowplow_tracker.typing import ( + PayloadDict, + PayloadDictList, + HttpProtocol, + Method, + SuccessCallback, + FailureCallback, + EmitterProtocol, +) +from snowplow_tracker.contracts import one_of +from snowplow_tracker.event_store import EventStore, InMemoryEventStore + +# logging +logging.basicConfig() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) DEFAULT_MAX_LENGTH = 10 -PAYLOAD_DATA_SCHEMA = "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" - -new_contract("protocol", lambda x: x == "http" or x == "https") - -new_contract("method", lambda x: x == "get" or x == "post") - -new_contract("function", lambda x: hasattr(x, "__call__")) +PAYLOAD_DATA_SCHEMA = ( + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" +) +PROTOCOLS = {"http", "https"} +METHODS = {"get", "post"} -new_contract("redis", lambda x: isinstance(x, (redis.Redis, redis.StrictRedis))) -try: - # Check whether a custom Celery configuration module named "snowplow_celery_config" exists - import snowplow_celery_config - app = Celery() - app.config_from_object(snowplow_celery_config) +# Unifes the two request methods under one interface +class Requester: + post: Callable + get: Callable -except ImportError: - # Otherwise configure Celery with default settings - app = Celery("Snowplow", broker="redis://guest@localhost//") + def __init__(self, post: Callable, get: Callable): + # 3.6 MyPy compatibility: + # error: Cannot assign to a method + # https://github.com/python/mypy/issues/2427 + setattr(self, "post", post) + setattr(self, "get", get) -class Emitter(object): +class Emitter(EmitterProtocol): """ - Synchronously send Snowplow events to a Snowplow collector - Supports both GET and POST requests + Synchronously send Snowplow events to a Snowplow collector + Supports both GET and POST requests """ - @contract - def __init__(self, endpoint, protocol="http", port=None, method="get", buffer_size=None, on_success=None, on_failure=None, byte_limit=None): - """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. - :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to http. - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: The HTTP request method - :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None - :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. - :type on_success: function | None - :param on_failure: Callback executed if at least one HTTP request in a flush has status code 200 - Gets passed two arguments: - 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads - :type on_failure: function | None - :param byte_limit: The size event list after reaching which queued events will be flushed - :type byte_limit: int | None - """ + def __init__( + self, + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to https. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method. Defaults to post. + :type method: method + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity: int + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None + :param session: Persist parameters across requests by using a session object + :type session: requests.Session | None + """ + one_of(protocol, PROTOCOLS) + one_of(method, METHODS) + self.endpoint = Emitter.as_collector_uri(endpoint, protocol, port, method) self.method = method - if buffer_size is None: + if event_store is None: + if buffer_capacity is None: + event_store = InMemoryEventStore(logger=logger) + else: + event_store = InMemoryEventStore( + buffer_capacity=buffer_capacity, logger=logger + ) + + self.event_store = event_store + + if batch_size is None: if method == "post": - buffer_size = DEFAULT_MAX_LENGTH + batch_size = DEFAULT_MAX_LENGTH else: - buffer_size = 1 - self.buffer_size = buffer_size - self.buffer = [] + batch_size = 1 + + if buffer_capacity is not None and batch_size > buffer_capacity: + batch_size = buffer_capacity + + self.batch_size = batch_size self.byte_limit = byte_limit self.bytes_queued = None if byte_limit is None else 0 + self.request_timeout = request_timeout self.on_success = on_success self.on_failure = on_failure self.lock = threading.RLock() - self.timer = None + self.timer = FlushTimer(emitter=self, repeating=True) + self.retry_timer = FlushTimer(emitter=self, repeating=False) + + self.max_retry_delay_seconds = max_retry_delay_seconds + self.retry_delay: Union[int, float] = 0 + self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) + if session is None: + self.request_method = Requester(post=requests.post, get=requests.get) + else: + self.request_method = Requester(post=session.post, get=session.get) + @staticmethod - @contract - def as_collector_uri(endpoint, protocol="http", port=None, method="get"): - """ - :param endpoint: The raw endpoint provided by the user - :type endpoint: string - :param protocol: The protocol to use - http or https - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: Either `get` or `post` HTTP method - :type method: method - :rtype: string - """ + def as_collector_uri( + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + ) -> str: + """ + :param endpoint: The raw endpoint provided by the user + :type endpoint: string + :param protocol: The protocol to use - http or https + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: Either `get` or `post` HTTP method + :type method: method + :rtype: string + """ + if len(endpoint) < 1: + raise ValueError("No endpoint provided.") + + endpoint = endpoint.rstrip("/") + + if endpoint.split("://")[0] in PROTOCOLS: + endpoint_arr = endpoint.split("://") + protocol = cast(HttpProtocol, endpoint_arr[0]) + endpoint = endpoint_arr[1] + if method == "get": path = "/i" else: @@ -140,299 +211,384 @@ def as_collector_uri(endpoint, protocol="http", port=None, method="get"): else: return protocol + "://" + endpoint + ":" + str(port) + path - @contract - def input(self, payload): + def input(self, payload: PayloadDict) -> None: """ - Adds an event to the buffer. - If the maximum size has been reached, flushes the buffer. + Adds an event to the buffer. + If the maximum size has been reached, flushes the buffer. - :param payload: The name-value pairs for the event - :type payload: dict(string:*) + :param payload: The name-value pairs for the event + :type payload: dict(string:\\*) """ with self.lock: if self.bytes_queued is not None: self.bytes_queued += len(str(payload)) if self.method == "post": - self.buffer.append({key: str(payload[key]) for key in payload}) + self.event_store.add_event({key: str(payload[key]) for key in payload}) else: - self.buffer.append(payload) + self.event_store.add_event(payload) if self.reached_limit(): self.flush() - def reached_limit(self): + def reached_limit(self) -> bool: """ - Checks if event-size or bytes limit are reached + Checks if event-size or bytes limit are reached - :rtype: bool + :rtype: bool """ if self.byte_limit is None: - return len(self.buffer) >= self.buffer_size + return self.event_store.size() >= self.batch_size else: - return self.bytes_queued >= self.byte_limit or len(self.buffer) >= self.buffer_size + return ( + self.bytes_queued or 0 + ) >= self.byte_limit or self.event_store.size() >= self.batch_size - @task(name="Flush") - def flush(self): + def flush(self) -> None: """ - Sends all events in the buffer to the collector. + Sends all events in the buffer to the collector. """ with self.lock: - self.send_events(self.buffer) - self.buffer = [] + if self.retry_timer.is_active(): + return + send_events = self.event_store.get_events_batch() + self.send_events(send_events) if self.bytes_queued is not None: self.bytes_queued = 0 - @contract - def http_post(self, data): + def http_post(self, data: str) -> int: """ - :param data: The array of JSONs to be sent - :type data: string + :param data: The array of JSONs to be sent + :type data: string """ logger.info("Sending POST request to %s..." % self.endpoint) logger.debug("Payload: %s" % data) - r = requests.post(self.endpoint, data=data, headers={'content-type': 'application/json; charset=utf-8'}) - getattr(logger, "info" if self.is_good_status_code(r.status_code) else "warn")("POST request finished with status code: " + str(r.status_code)) - return r + try: + r = self.request_method.post( + self.endpoint, + data=data, + headers={"Content-Type": "application/json; charset=utf-8"}, + timeout=self.request_timeout, + ) + except requests.RequestException as e: + logger.warning(e) + return -1 - @contract - def http_get(self, payload): + return r.status_code + + def http_get(self, payload: PayloadDict) -> int: """ - :param payload: The event properties - :type payload: dict(string:*) + :param payload: The event properties + :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) - r = requests.get(self.endpoint, params=payload) - getattr(logger, "info" if self.is_good_status_code(r.status_code) else "warn")("GET request finished with status code: " + str(r.status_code)) - return r + try: + r = self.request_method.get( + self.endpoint, params=payload, timeout=self.request_timeout + ) + except requests.RequestException as e: + logger.warning(e) + return -1 + + return r.status_code - def sync_flush(self): + def sync_flush(self) -> None: """ - Calls the flush method of the base Emitter class. - This is guaranteed to be blocking, not asynchronous. + Calls the flush method of the base Emitter class. + This is guaranteed to be blocking, not asynchronous. """ logger.debug("Starting synchronous flush...") - Emitter.flush(self) - logger.info("Finished synchrous flush") + self.flush() + logger.info("Finished synchronous flush") @staticmethod - @contract - def is_good_status_code(status_code): + def is_good_status_code(status_code: int) -> bool: """ - :param status_code: HTTP status code - :type status_code: int - :rtype: bool + :param status_code: HTTP status code + :type status_code: int + :rtype: bool """ - return 200 <= status_code < 400 + return 200 <= status_code < 300 - @contract - def send_events(self, evts): + def send_events(self, evts: PayloadDictList) -> None: """ - :param evts: Array of events to be sent - :type evts: list(dict(string:*)) + :param evts: Array of events to be sent + :type evts: list(dict(string:\\*)) """ if len(evts) > 0: - logger.info("Attempting to send %s requests" % len(evts)) + logger.info("Attempting to send %s events" % len(evts)) + Emitter.attach_sent_timestamp(evts) - if self.method == 'post': + success_events = [] + failure_events = [] + + if self.method == "post": data = SelfDescribingJson(PAYLOAD_DATA_SCHEMA, evts).to_string() - post_succeeded = False - try: - status_code = self.http_post(data).status_code - post_succeeded = self.is_good_status_code(status_code) - except requests.RequestException as e: - logger.warn(e) - if post_succeeded: - if self.on_success is not None: - self.on_success(len(evts)) - elif self.on_failure is not None: - self.on_failure(0, evts) - - elif self.method == 'get': - success_count = 0 - unsent_requests = [] + status_code = self.http_post(data) + request_succeeded = Emitter.is_good_status_code(status_code) + if request_succeeded: + success_events += evts + else: + failure_events += evts + + elif self.method == "get": for evt in evts: - get_succeeded = False - try: - status_code = self.http_get(evt).status_code - get_succeeded = self.is_good_status_code(status_code) - except requests.RequestException as e: - logger.warn(e) - if get_succeeded: - success_count += 1 + status_code = self.http_get(evt) + request_succeeded = Emitter.is_good_status_code(status_code) + + if request_succeeded: + success_events += [evt] else: - unsent_requests.append(evt) - if len(unsent_requests) == 0: - if self.on_success is not None: - self.on_success(success_count) - elif self.on_failure is not None: - self.on_failure(success_count, unsent_requests) + failure_events += [evt] + + if self.on_success is not None and len(success_events) > 0: + self.on_success(success_events) + if self.on_failure is not None and len(failure_events) > 0: + self.on_failure(len(success_events), failure_events) + + if self._should_retry(status_code): + self._set_retry_delay() + self._retry_failed_events(failure_events) + else: + self.event_store.cleanup(success_events, False) + self._reset_retry_delay() else: logger.info("Skipping flush since buffer is empty") - @contract - def set_flush_timer(self, timeout, flush_now=False): + def _set_retry_timer(self, timeout: float) -> None: """ - Set an interval at which the buffer will be flushed + Set an interval at which failed events will be retried - :param timeout: interval in seconds - :type timeout: int | float - :param flush_now: immediately flush buffer - :type flush_now: bool + :param timeout: interval in seconds + :type timeout: int | float """ + self.retry_timer.start(timeout=timeout) - # Repeatable create new timer - if flush_now: - self.flush() - self.timer = threading.Timer(timeout, self.set_flush_timer, [timeout, True]) - self.timer.daemon = True - self.timer.start() - - def cancel_flush_timer(self): + def set_flush_timer(self, timeout: float) -> None: """ - Abort automatic async flushing + Set an interval at which the buffer will be flushed + :param timeout: interval in seconds + :type timeout: int | float """ + self.timer.start(timeout=timeout) - if self.timer is not None: - self.timer.cancel() + def cancel_flush_timer(self) -> None: + """ + Abort automatic async flushing + """ + self.timer.cancel() @staticmethod - def attach_sent_timestamp(events): + def attach_sent_timestamp(events: PayloadDictList) -> None: """ - Attach (by mutating in-place) current timestamp in milliseconds - as `stm` param + Attach (by mutating in-place) current timestamp in milliseconds + as `stm` param - :param events: Array of events to be sent - :type events: list(dict(string:*)) - :rtype: None + :param events: Array of events to be sent + :type events: list(dict(string:\\*)) + :rtype: None """ - def update(e): - e.update({'stm': str(int(time.time()) * 1000)}) - [update(event) for event in events] + def update(e: PayloadDict) -> None: + e.update({"stm": str(int(time.time()) * 1000)}) + + for event in events: + update(event) + + def _should_retry(self, status_code: int) -> bool: + """ + Checks if a request should be retried + + :param status_code: Response status code + :type status_code: int + :rtype: bool + """ + if Emitter.is_good_status_code(status_code): + return False + + if status_code in self.custom_retry_codes.keys(): + return self.custom_retry_codes[status_code] + + return status_code not in [400, 401, 403, 410, 422] + + def _set_retry_delay(self) -> None: + """ + Sets a delay to retry failed events + """ + random_noise = random.random() + self.retry_delay = min( + self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds + ) + + def _reset_retry_delay(self) -> None: + """ + Resets retry delay to 0 + """ + self.retry_delay = 0 + + def _retry_failed_events(self, failed_events) -> None: + """ + Adds failed events back to the buffer to retry + + :param failed_events: List of failed events + :type List + """ + self.event_store.cleanup(failed_events, True) + self._set_retry_timer(self.retry_delay) + + def _cancel_retry_timer(self) -> None: + """ + Cancels a retry timer + """ + self.retry_timer.cancel() + + # This is only here to satisfy the `EmitterProtocol` interface + def async_flush(self) -> None: + return class AsyncEmitter(Emitter): """ - Uses threads to send HTTP requests asynchronously + Uses threads to send HTTP requests asynchronously """ - @contract def __init__( self, - endpoint, - protocol="http", - port=None, - method="get", - buffer_size=None, - on_success=None, - on_failure=None, - thread_count=1, - byte_limit=None): - """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. - :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to http. - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: The HTTP request method - :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None - :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. - :type on_success: function | None - :param on_failure: Callback executed if at least one HTTP request in a flush has status code 200 - Gets passed two arguments: - 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads - :type on_failure: function | None - :param thread_count: Number of worker threads to use for HTTP requests - :type thread_count: int - :param byte_limit: The size event list after reaching which queued events will be flushed - :type byte_limit: int | None - """ - super(AsyncEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, on_success, on_failure, byte_limit) - self.queue = Queue() + endpoint: str, + protocol: HttpProtocol = "http", + port: Optional[int] = None, + method: Method = "post", + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + thread_count: int = 1, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to http. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method + :type method: method + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param thread_count: Number of worker threads to use for HTTP requests + :type thread_count: int + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity: int + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore + :param session: Persist parameters across requests by using a session object + :type session: requests.Session | None + """ + super(AsyncEmitter, self).__init__( + endpoint=endpoint, + protocol=protocol, + port=port, + method=method, + batch_size=batch_size, + on_success=on_success, + on_failure=on_failure, + byte_limit=byte_limit, + request_timeout=request_timeout, + max_retry_delay_seconds=max_retry_delay_seconds, + buffer_capacity=buffer_capacity, + custom_retry_codes=custom_retry_codes, + event_store=event_store, + session=session, + ) + self.queue: Queue = Queue() for i in range(thread_count): t = threading.Thread(target=self.consume) t.daemon = True t.start() - def sync_flush(self): + def sync_flush(self) -> None: while True: self.flush() self.queue.join() - if len(self.buffer) < 1: + if self.event_store.size() < 1: break - def flush(self): + def flush(self) -> None: """ - Removes all dead threads, then creates a new thread which - executes the flush method of the base Emitter class + Removes all dead threads, then creates a new thread which + executes the flush method of the base Emitter class """ with self.lock: - self.queue.put(self.buffer) - self.buffer = [] + self.queue.put(self.event_store.get_events_batch()) if self.bytes_queued is not None: self.bytes_queued = 0 - def consume(self): + def consume(self) -> None: while True: evts = self.queue.get() self.send_events(evts) self.queue.task_done() -class CeleryEmitter(Emitter): +class FlushTimer(object): """ - Uses a Celery worker to send HTTP requests asynchronously. - Works like the base Emitter class, - but on_success and on_failure callbacks cannot be set. + Internal class used by the Emitter to schedule flush calls for later. """ - def __init__(self, endpoint, protocol="http", port=None, method="get", buffer_size=None, byte_limit=None): - super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, None, None, byte_limit) - def flush(self): - """ - Schedules a flush task - """ - super(CeleryEmitter, self).flush.delay() - logger.info("Scheduled a Celery task to flush the event queue") + def __init__(self, emitter: Emitter, repeating: bool): + self.emitter = emitter + self.repeating = repeating + self.timer: Optional[threading.Timer] = None + self.lock = threading.RLock() + def start(self, timeout: float) -> bool: + with self.lock: + if self.timer is not None: + return False + else: + self._schedule_timer(timeout=timeout) + return True -class RedisEmitter(object): - """ - Sends Snowplow events to a Redis database - """ - @contract - def __init__(self, rdb=None, key="snowplow"): - """ - :param rdb: Optional custom Redis database - :type rdb: redis | None - :param key: The Redis key for the list of events - :type key: string - """ - if rdb is None: - rdb = redis.StrictRedis() - self.rdb = rdb - self.key = key + def cancel(self) -> None: + with self.lock: + if self.timer is not None: + self.timer.cancel() + self.timer = None - @contract - def input(self, payload): - """ - :param payload: The event properties - :type payload: dict(string:*) - """ - logger.debug("Pushing event to Redis queue...") - self.rdb.rpush(self.key, json.dumps(payload)) - logger.info("Finished sending event to Redis.") + def is_active(self) -> bool: + with self.lock: + return self.timer is not None - def flush(self): - logger.warn("The RedisEmitter class does not need to be flushed") + def _fire(self, timeout: float) -> None: + with self.lock: + if self.repeating: + self._schedule_timer(timeout) + else: + self.timer = None - def sync_flush(self): - self.flush() + self.emitter.flush() + + def _schedule_timer(self, timeout: float) -> None: + self.timer = threading.Timer(timeout, self._fire, [timeout]) + self.timer.daemon = True + self.timer.start() diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py new file mode 100644 index 00000000..b8d13028 --- /dev/null +++ b/snowplow_tracker/event_store.py @@ -0,0 +1,139 @@ +# """ +# event_store.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import List +from typing_extensions import Protocol +from snowplow_tracker.typing import PayloadDict, PayloadDictList +from logging import Logger + + +class EventStore(Protocol): + """ + EventStore protocol. For buffering events in the Emitter. + """ + + def add_event(self, payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. Returns True if successful. + + :param payload: The payload to add + :type payload: PayloadDict + :rtype bool + """ + ... + + def get_events_batch(self) -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the buffer. + + :rtype PayloadDictList + """ + ... + + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the event store. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + ... + + def size(self) -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + ... + + +class InMemoryEventStore(EventStore): + """ + Create a InMemoryEventStore object with custom buffer capacity. The default is 10,000 events. + """ + + def __init__(self, logger: Logger, buffer_capacity: int = 10000) -> None: + """ + :param logger: Logging module + :type logger: Logger + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity int + """ + self.event_buffer: List[PayloadDict] = [] + self.buffer_capacity = buffer_capacity + self.logger = logger + + def add_event(self, payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. + + :param payload: The payload to add + :type payload: PayloadDict + """ + if self._buffer_capacity_reached(): + self.logger.error("Event buffer is full, dropping event.") + return False + + self.event_buffer.append(payload) + return True + + def get_events_batch(self) -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the in the buffer. + + :rtype PayloadDictList + """ + batch = self.event_buffer + self.event_buffer = [] + return batch + + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the InMemoryEventStore buffer. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + if not need_retry: + return + + for event in batch: + if not event in self.event_buffer: + if not self.add_event(event): + return + + def size(self) -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + return len(self.event_buffer) + + def _buffer_capacity_reached(self) -> bool: + """ + Returns true if buffer capacity is reached + + :rtype: bool + """ + return self.size() >= self.buffer_capacity diff --git a/snowplow_tracker/events/CLAUDE.md b/snowplow_tracker/events/CLAUDE.md new file mode 100644 index 00000000..efc0f5ab --- /dev/null +++ b/snowplow_tracker/events/CLAUDE.md @@ -0,0 +1,284 @@ +# Snowplow Event Types - CLAUDE.md + +## Directory Overview + +The `events/` directory contains all event type implementations for the Snowplow Python Tracker. Each event class represents a specific type of analytics event that can be sent to Snowplow collectors. All events inherit from the base `Event` class and follow a consistent pattern for construction, validation, and payload generation. + +## Event Class Hierarchy + +``` +Event (base class) +├── PageView # Web page view tracking +├── PagePing # Page engagement/heartbeat +├── ScreenView # Mobile/app screen views +├── StructuredEvent # Generic 5-parameter events +└── SelfDescribing # Custom schema events +``` + +## Core Event Patterns + +### Event Construction Pattern +```python +# ✅ Use keyword arguments for clarity +event = PageView( + page_url="https://example.com", + page_title="Homepage", + referrer="https://google.com" +) + +# ❌ Don't use positional arguments +event = PageView("https://example.com", "Homepage") +``` + +### Event Context Pattern +```python +# ✅ Add contexts as SelfDescribingJson list +geo_context = SelfDescribingJson( + "iglu:com.acme/geolocation/jsonschema/1-0-0", + {"latitude": 40.0, "longitude": -73.0} +) +event = PageView(page_url="...", context=[geo_context]) + +# ❌ Don't use raw dictionaries for context +event.context = [{"latitude": 40.0}] # Missing schema! +``` + +### Event Subject Override Pattern +```python +# ✅ Override tracker subject for specific event +special_subject = Subject() +special_subject.set_user_id("anonymous_user") +event = StructuredEvent( + category="shop", + action="view", + event_subject=special_subject +) + +# ❌ Don't modify shared subject +tracker.subject.set_user_id("temp") # Affects all events +``` + +### True Timestamp Pattern +```python +# ✅ Use milliseconds for true_timestamp +import time +timestamp_ms = time.time() * 1000 +event = PageView( + page_url="...", + true_timestamp=timestamp_ms +) + +# ❌ Don't use seconds +event = PageView(true_timestamp=time.time()) +``` + +## Event-Specific Patterns + +### PageView Events +```python +# ✅ Complete PageView with all fields +page_view = PageView( + page_url="https://example.com/products", + page_title="Products", + referrer="https://example.com/home" +) + +# ❌ Missing required page_url +page_view = PageView(page_title="Products") +``` + +### StructuredEvent Pattern +```python +# ✅ Use descriptive category/action pairs +event = StructuredEvent( + category="ecommerce", + action="add-to-cart", + label="SKU-123", + property_="size:XL", + value=29.99 +) + +# ❌ Generic naming loses meaning +event = StructuredEvent("event", "click") +``` + +### SelfDescribing Events +```python +# ✅ Custom events with Iglu schemas +purchase_event = SelfDescribing( + SelfDescribingJson( + "iglu:com.acme/purchase/jsonschema/2-0-0", + { + "orderId": "ORD-123", + "total": 99.99, + "currency": "USD" + } + ) +) + +# ❌ Missing schema version +event = SelfDescribing( + SelfDescribingJson("iglu:com.acme/purchase", {...}) +) +``` + +### ScreenView Pattern (Mobile) +```python +# ✅ Mobile screen tracking with ID +screen = ScreenView( + name="ProductDetailScreen", + id_="screen-456", + previous_name="ProductListScreen" +) + +# ❌ Using PageView for mobile apps +page = PageView(page_url="app://product-detail") +``` + +## Event Validation Rules + +### Required Fields by Event Type +- **PageView**: `page_url` (required), `page_title`, `referrer` +- **StructuredEvent**: `category`, `action` (required), `label`, `property_`, `value` +- **SelfDescribing**: `event_json` (SelfDescribingJson required) +- **ScreenView**: `name` or `id_` (at least one required) +- **PagePing**: `page_url` (required) + +### Schema Validation Pattern +```python +# ✅ Validate schema format +SCHEMA_PATTERN = r"^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/" +SCHEMA_PATTERN += r"[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + +# ❌ Invalid schema formats +"iglu:com.acme/event" # Missing version +"com.acme/event/1-0-0" # Missing iglu: prefix +``` + +## Payload Building Pattern + +### Internal Payload Construction +```python +# ✅ Event classes handle payload internally +def build_payload(self, encode_base64, json_encoder, subject): + # Add event-specific fields + self.payload.add("e", "pv") # Page view type + self.payload.add("url", self.page_url) + + # Let base class handle common fields + return super().build_payload(encode_base64, json_encoder, subject) + +# ❌ Don't expose payload building to users +event.payload = Payload() +event.payload.add("custom", "field") +``` + +## Testing Event Classes + +### Unit Test Pattern +```python +# ✅ Test event construction and validation +def test_page_view_required_fields(): + with self.assertRaises(TypeError): + PageView() # Missing required page_url + + event = PageView(page_url="https://test.com") + assert event.page_url == "https://test.com" + +# ✅ Test payload generation +def test_event_payload(): + event = PageView(page_url="https://test.com") + payload = event.build_payload(False, None, None) + assert payload.get()["url"] == "https://test.com" +``` + +### Context Testing Pattern +```python +# ✅ Test context attachment +def test_event_context(): + context = SelfDescribingJson(schema, data) + event = PageView(page_url="...", context=[context]) + + payload = event.build_payload(True, None, None) + assert "cx" in payload.get() # Base64 context +``` + +## Common Event Pitfalls + +### Timestamp Confusion +```python +# ❌ Mixing timestamp types +event.true_timestamp = "2024-01-01" # String not allowed +event.true_timestamp = datetime.now() # Use milliseconds + +# ✅ Consistent millisecond timestamps +event.true_timestamp = int(time.time() * 1000) +``` + +### Context Array Management +```python +# ❌ Modifying context after creation +event.context.append(new_context) # Unexpected behavior + +# ✅ Set complete context at creation +all_contexts = [context1, context2] +event = PageView(page_url="...", context=all_contexts) +``` + +### Schema Version Control +```python +# ❌ Hardcoding schema versions +schema = "iglu:com.acme/event/jsonschema/1-0-0" + +# ✅ Centralize schema definitions +PURCHASE_SCHEMA = "iglu:com.acme/purchase/jsonschema/2-1-0" +event = SelfDescribing(SelfDescribingJson(PURCHASE_SCHEMA, data)) +``` + +## Event Migration Guide + +### Upgrading Event Schemas +```python +# From version 1-0-0 to 2-0-0 +# ✅ Handle backward compatibility +def create_purchase_event(data): + if "items" in data: # New schema + schema = "iglu:.../purchase/jsonschema/2-0-0" + else: # Old schema + schema = "iglu:.../purchase/jsonschema/1-0-0" + + return SelfDescribing(SelfDescribingJson(schema, data)) +``` + +## Quick Reference + +### Event Type Selection +- **PageView**: Traditional web page tracking +- **ScreenView**: Mobile app screen tracking +- **StructuredEvent**: Generic business events +- **SelfDescribing**: Complex custom events +- **PagePing**: Engagement/time-on-page tracking + +### Event Field Checklist +- [ ] Required fields provided +- [ ] Timestamps in milliseconds +- [ ] Contexts as SelfDescribingJson array +- [ ] Valid Iglu schema format +- [ ] Event-specific subject if needed + +### Common Event Methods +- `build_payload()`: Internal payload generation +- `event_subject`: Per-event user context +- `context`: Custom context array +- `true_timestamp`: User-defined timestamp + +## Contributing to events/CLAUDE.md + +When modifying event implementations or adding new event types: + +1. **Follow the Event base class pattern** - All events must inherit from Event +2. **Implement required abstract methods** - Ensure payload building works correctly +3. **Document required fields** - Update this file with new event requirements +4. **Add comprehensive tests** - Test construction, validation, and payload generation +5. **Maintain backward compatibility** - Don't break existing event APIs +6. **Update schema constants** - Add new schemas to constants.py if needed \ No newline at end of file diff --git a/snowplow_tracker/events/__init__.py b/snowplow_tracker/events/__init__.py new file mode 100644 index 00000000..0f75c84f --- /dev/null +++ b/snowplow_tracker/events/__init__.py @@ -0,0 +1,22 @@ +# """ +# __init__.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from snowplow_tracker.events.page_ping import PagePing +from snowplow_tracker.events.page_view import PageView +from snowplow_tracker.events.self_describing import SelfDescribing +from snowplow_tracker.events.structured_event import StructuredEvent +from snowplow_tracker.events.screen_view import ScreenView diff --git a/snowplow_tracker/events/event.py b/snowplow_tracker/events/event.py new file mode 100644 index 00000000..fb300b87 --- /dev/null +++ b/snowplow_tracker/events/event.py @@ -0,0 +1,136 @@ +# """ +# event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional, List +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject + +from snowplow_tracker.self_describing_json import SelfDescribingJson + +from snowplow_tracker.constants import CONTEXT_SCHEMA +from snowplow_tracker.typing import JsonEncoderFunction, PayloadDict + + +class Event(object): + """ + Event class which contains + elements that can be set in all events. These are context, trueTimestamp, and Subject. + + Context is a list of custom SelfDescribingJson entities. + TrueTimestamp is a user-defined timestamp. + Subject is an event-specific Subject. Its fields will override those of the + Tracker-associated Subject, if present. + + """ + + def __init__( + self, + dict_: Optional[PayloadDict] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + Constructor + :param dict_: Optional Dictionary to be added to the Events Payload + :type dict_: dict(string:\\*) | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + + """ + self.payload = payload.Payload(dict_=dict_) + self.event_subject = event_subject + self.context = context or [] + self.true_timestamp = true_timestamp + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + if len(self.context) > 0: + context_jsons = list(map(lambda c: c.to_json(), self.context)) + context_envelope = SelfDescribingJson( + CONTEXT_SCHEMA, context_jsons + ).to_json() + self.payload.add_json( + context_envelope, encode_base64, "cx", "co", json_encoder + ) + + if isinstance( + self.true_timestamp, + ( + int, + float, + ), + ): + self.payload.add("ttm", int(self.true_timestamp)) + + if self.event_subject is not None: + fin_payload_dict = self.event_subject.combine_subject(subject) + else: + fin_payload_dict = {} if subject is None else subject.standard_nv_pairs + + self.payload.add_dict(fin_payload_dict) + return self.payload + + @property + def event_subject(self) -> Optional[Subject]: + """ + Optional per event subject + """ + return self._event_subject + + @event_subject.setter + def event_subject(self, value: Optional[Subject]): + self._event_subject = value + + @property + def context(self) -> List[SelfDescribingJson]: + """ + Custom context for the event + """ + return self._context + + @context.setter + def context(self, value: List[SelfDescribingJson]): + self._context = value + + @property + def true_timestamp(self) -> Optional[float]: + """ + Optional event timestamp in milliseconds + """ + return self._true_timestamp + + @true_timestamp.setter + def true_timestamp(self, value: Optional[float]): + self._true_timestamp = value diff --git a/snowplow_tracker/events/page_ping.py b/snowplow_tracker/events/page_ping.py new file mode 100644 index 00000000..43bbb210 --- /dev/null +++ b/snowplow_tracker/events/page_ping.py @@ -0,0 +1,155 @@ +# """ +# page_ping.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string + + +class PagePing(Event): + """ + Constructs a PagePing event object. + + When tracked, generates a "pp" or "page_ping" event. + + """ + + def __init__( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(PagePing, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "pp") + self.page_url = page_url + self.page_title = page_title + self.referrer = referrer + self.min_x = min_x + self.max_x = max_x + self.min_y = min_y + self.max_y = max_y + + @property + def page_url(self) -> str: + """ + URL of the viewed page + """ + return self.payload.nv_pairs["url"] + + @page_url.setter + def page_url(self, value: str): + non_empty_string(value) + self.payload.add("url", value) + + @property + def page_title(self) -> Optional[str]: + """ + URL of the viewed page + """ + return self.payload.nv_pairs.get("page") + + @page_title.setter + def page_title(self, value: Optional[str]): + self.payload.add("page", value) + + @property + def referrer(self) -> Optional[str]: + """ + The referrer of the page + """ + return self.payload.nv_pairs.get("refr") + + @referrer.setter + def referrer(self, value: Optional[str]): + self.payload.add("refr", value) + + @property + def min_x(self) -> Optional[int]: + """ + Minimum page x offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_mix") + + @min_x.setter + def min_x(self, value: Optional[int]): + self.payload.add("pp_mix", value) + + @property + def max_x(self) -> Optional[int]: + """ + Maximum page x offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_max") + + @max_x.setter + def max_x(self, value: Optional[int]): + self.payload.add("pp_max", value) + + @property + def min_y(self) -> Optional[int]: + """ + Minimum page y offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_miy") + + @min_y.setter + def min_y(self, value: Optional[int]): + self.payload.add("pp_miy", value) + + @property + def max_y(self) -> Optional[int]: + """ + Maximum page y offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_may") + + @max_y.setter + def max_y(self, value: Optional[int]): + self.payload.add("pp_may", value) diff --git a/snowplow_tracker/events/page_view.py b/snowplow_tracker/events/page_view.py new file mode 100644 index 00000000..53e44bb6 --- /dev/null +++ b/snowplow_tracker/events/page_view.py @@ -0,0 +1,95 @@ +# """ +# page_view.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.contracts import non_empty_string + + +class PageView(Event): + """ + Constructs a PageView event object. + + When tracked, generates a "pv" or "page_view" event. + + """ + + def __init__( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(PageView, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "pv") + self.page_url = page_url + self.page_title = page_title + self.referrer = referrer + + @property + def page_url(self) -> str: + """ + URL of the viewed page + """ + return self.payload.nv_pairs["url"] + + @page_url.setter + def page_url(self, value: str): + non_empty_string(value) + self.payload.add("url", value) + + @property + def page_title(self) -> Optional[str]: + """ + Title of the viewed page + """ + return self.payload.nv_pairs.get("page") + + @page_title.setter + def page_title(self, value: Optional[str]): + self.payload.add("page", value) + + @property + def referrer(self) -> Optional[str]: + """ + The referrer of the page + """ + return self.payload.nv_pairs.get("refr") + + @referrer.setter + def referrer(self, value: Optional[str]): + self.payload.add("refr", value) diff --git a/snowplow_tracker/events/screen_view.py b/snowplow_tracker/events/screen_view.py new file mode 100644 index 00000000..6b4af927 --- /dev/null +++ b/snowplow_tracker/events/screen_view.py @@ -0,0 +1,199 @@ +# """ +# screen_view.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Dict, Optional, List +from snowplow_tracker.typing import JsonEncoderFunction +from snowplow_tracker.events.event import Event +from snowplow_tracker.events.self_describing import SelfDescribing +from snowplow_tracker import SelfDescribingJson +from snowplow_tracker.constants import ( + MOBILE_SCHEMA_PATH, + SCHEMA_TAG, +) +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string + + +class ScreenView(Event): + """ + Constructs a ScreenView event object. + + When tracked, generates a SelfDescribing event (event type "ue"). + + Schema: `iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0` + """ + + def __init__( + self, + id_: str, + name: str, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param id_: Screen view ID. This must be of type UUID. + :type id_: string + :param name: The name of the screen view event + :type name: string + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(ScreenView, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.screen_view_properties: Dict[str, str] = {} + self.id_ = id_ + self.name = name + self.type = type + self.previous_name = previous_name + self.previous_id = previous_id + self.previous_type = previous_type + self.transition_type = transition_type + + @property + def id_(self) -> str: + """ + Screen view ID. This must be of type UUID. + """ + return self.screen_view_properties["id"] + + @id_.setter + def id_(self, value: str): + non_empty_string(value) + self.screen_view_properties["id"] = value + + @property + def name(self) -> str: + """ + The name of the screen view event + """ + return self.screen_view_properties["name"] + + @name.setter + def name(self, value: str): + non_empty_string(value) + self.screen_view_properties["name"] = value + + @property + def type(self) -> Optional[str]: + """ + The type of screen that was viewed e.g feed / carousel + """ + return self.screen_view_properties["type"] + + @type.setter + def type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["type"] = value + + @property + def previous_name(self) -> Optional[str]: + """ + The name of the previous screen. + """ + return self.screen_view_properties["previousName"] + + @previous_name.setter + def previous_name(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousName"] = value + + @property + def previous_id(self) -> Optional[str]: + """ + The screenview ID of the previous screenview. + """ + return self.screen_view_properties["previousId"] + + @previous_id.setter + def previous_id(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousId"] = value + + @property + def previous_type(self) -> Optional[str]: + """ + The screen type of the previous screenview + """ + return self.screen_view_properties["previousType"] + + @previous_type.setter + def previous_type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousType"] = value + + @property + def transition_type(self) -> Optional[str]: + """ + The type of transition that led to the screen being viewed + """ + return self.screen_view_properties["transitionType"] + + @transition_type.setter + def transition_type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["transitionType"] = value + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (MOBILE_SCHEMA_PATH, SCHEMA_TAG), + self.screen_view_properties, + ) + self_describing = SelfDescribing( + event_json=event_json, + event_subject=self.event_subject, + context=self.context, + true_timestamp=self.true_timestamp, + ) + return self_describing.build_payload( + encode_base64, json_encoder, subject=subject + ) diff --git a/snowplow_tracker/events/self_describing.py b/snowplow_tracker/events/self_describing.py new file mode 100644 index 00000000..e560eb72 --- /dev/null +++ b/snowplow_tracker/events/self_describing.py @@ -0,0 +1,98 @@ +# """ +# self_describing.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from typing import Optional, List +from snowplow_tracker.typing import JsonEncoderFunction +from snowplow_tracker.events.event import Event +from snowplow_tracker import SelfDescribingJson +from snowplow_tracker.constants import UNSTRUCT_EVENT_SCHEMA +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty + + +class SelfDescribing(Event): + """ + Constructs a SelfDescribing event object. + + This is a customisable event type which allows you to track anything describable + by a JsonSchema. + + When tracked, generates a self-describing event (event type "ue"). + """ + + def __init__( + self, + event_json: SelfDescribingJson, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(SelfDescribing, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "ue") + self.event_json = event_json + + @property + def event_json(self) -> SelfDescribingJson: + """ + The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + """ + return self._event_json + + @event_json.setter + def event_json(self, value: SelfDescribingJson): + self._event_json = value + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + + envelope = SelfDescribingJson( + UNSTRUCT_EVENT_SCHEMA, self.event_json.to_json() + ).to_json() + self.payload.add_json(envelope, encode_base64, "ue_px", "ue_pr", json_encoder) + + return super(SelfDescribing, self).build_payload( + encode_base64=encode_base64, json_encoder=json_encoder, subject=subject + ) diff --git a/snowplow_tracker/events/structured_event.py b/snowplow_tracker/events/structured_event.py new file mode 100644 index 00000000..23abafa8 --- /dev/null +++ b/snowplow_tracker/events/structured_event.py @@ -0,0 +1,134 @@ +# """ +# struct_event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List, Union +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.contracts import non_empty_string + + +class StructuredEvent(Event): + """ + Constructs a Structured event object. + + This event type is provided to be roughly equivalent to Google Analytics-style events. + Note that it is not automatically clear what data should be placed in what field. + To aid data quality and modeling, agree on business-wide definitions when designing + your tracking strategy. + + We recommend using SelfDescribing - fully custom - events instead. + + When tracked, generates a "struct" or "se" event. + """ + + def __init__( + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[Union[int, float]] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(StructuredEvent, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "se") + self.category = category + self.action = action + self.label = label + self.property_ = property_ + self.value = value + + @property + def category(self) -> Optional[str]: + """ + Category of the event + """ + return self.payload.nv_pairs.get("se_ca") + + @category.setter + def category(self, value: str): + non_empty_string(value) + self.payload.add("se_ca", value) + + @property + def action(self) -> Optional[str]: + """ + The event itself + """ + return self.payload.nv_pairs.get("se_ac") + + @action.setter + def action(self, value: str): + non_empty_string(value) + self.payload.add("se_ac", value) + + @property + def label(self) -> Optional[str]: + """ + Refer to the object the action is performed on + """ + return self.payload.nv_pairs.get("se_la") + + @label.setter + def label(self, value: Optional[str]): + self.payload.add("se_la", value) + + @property + def property_(self) -> Optional[str]: + """ + Property associated with either the action or the object + """ + return self.payload.nv_pairs.get("se_pr") + + @property_.setter + def property_(self, value: Optional[str]): + self.payload.add("se_pr", value) + + @property + def value(self) -> Optional[Union[int, float]]: + """ + A value associated with the user action + """ + return self.payload.nv_pairs.get("se_va") + + @value.setter + def value(self, value: Optional[Union[int, float]]): + self.payload.add("se_va", value) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index c7c788b5..18d1bf4d 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -1,36 +1,30 @@ -""" - payload.py +# """ +# payload.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" - -import random -import time import json import base64 -from contracts import contract +from typing import Any, Optional +from snowplow_tracker.typing import PayloadDict, JsonEncoderFunction class Payload: - - def __init__(self, dict_=None): + def __init__(self, dict_: Optional[PayloadDict] = None) -> None: """ - Constructor + Constructor """ self.nv_pairs = {} @@ -39,59 +33,64 @@ def __init__(self, dict_=None): for f in dict_: self.nv_pairs[f] = dict_[f] - """ Methods to add to the payload """ - def add(self, name, value): + def add(self, name: str, value: Any) -> None: """ - Add a name value pair to the Payload object + Add a name value pair to the Payload object """ if not (value == "" or value is None): self.nv_pairs[name] = value - @contract - def add_dict(self, dict_, base64=False): + def add_dict(self, dict_: PayloadDict, base64: bool = False) -> None: """ - Add a dict of name value pairs to the Payload object + Add a dict of name value pairs to the Payload object - :param dict_: Dictionary to be added to the Payload - :type dict_: dict(string:*) + :param dict_: Dictionary to be added to the Payload + :type dict_: dict(string:\\*) """ for f in dict_: self.add(f, dict_[f]) - @contract - def add_json(self, dict_, encode_base64, type_when_encoded, type_when_not_encoded): + def add_json( + self, + dict_: Optional[PayloadDict], + encode_base64: bool, + type_when_encoded: str, + type_when_not_encoded: str, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: """ - Add an encoded or unencoded JSON to the payload - - :param dict_: Custom context for the event - :type dict_: dict(string:*) | None - :param encode_base64: If the payload is base64 encoded - :type encode_base64: bool - :param type_when_encoded: Name of the field when encode_base64 is set - :type type_when_encoded: string - :param type_when_not_encoded: Name of the field when encode_base64 is not set - :type type_when_not_encoded: string + Add an encoded or unencoded JSON to the payload + + :param dict_: Custom context for the event + :type dict_: dict(string:\\*) | None + :param encode_base64: If the payload is base64 encoded + :type encode_base64: bool + :param type_when_encoded: Name of the field when encode_base64 is set + :type type_when_encoded: string + :param type_when_not_encoded: Name of the field when encode_base64 is not set + :type type_when_not_encoded: string + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ if dict_ is not None and dict_ != {}: - json_dict = json.dumps(dict_, ensure_ascii=False) + json_dict = json.dumps(dict_, ensure_ascii=False, default=json_encoder) if encode_base64: - encoded_dict = base64.urlsafe_b64encode(json_dict.encode("ascii")) - if not isinstance(encoded_dict, str): - encoded_dict = encoded_dict.decode("utf-8") - self.add(type_when_encoded, encoded_dict) + encoded_dict = base64.urlsafe_b64encode(json_dict.encode("utf-8")) + encoded_dict_str = encoded_dict.decode("utf-8") + self.add(type_when_encoded, encoded_dict_str) else: self.add(type_when_not_encoded, json_dict) - def get(self): + def get(self) -> PayloadDict: """ - Returns the context dictionary from the Payload object + Returns the context dictionary from the Payload object """ return self.nv_pairs diff --git a/snowplow_tracker/py.typed b/snowplow_tracker/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/snowplow_tracker/redis_worker.py b/snowplow_tracker/redis_worker.py deleted file mode 100644 index 60cb0ffe..00000000 --- a/snowplow_tracker/redis_worker.py +++ /dev/null @@ -1,79 +0,0 @@ -""" - redis_worker.py - - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. - - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. - - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" - - -import redis -import gevent -from gevent.pool import Pool -import json -import signal - -DEFAULT_KEY = "snowplow" - -class RedisWorker(object): - """ - Asynchronously take events from redis and send them to an emitter - """ - - def __init__(self, emitter, rdb=None, key=DEFAULT_KEY): - self.emitter = emitter - self.key = key - if rdb is None: - rdb = redis.StrictRedis() - self.rdb = rdb - self.pool = Pool(5) - - signal.signal(signal.SIGTERM, self.request_shutdown) - signal.signal(signal.SIGINT, self.request_shutdown) - signal.signal(signal.SIGQUIT, self.request_shutdown) - - def send(self, payload): - """ - Send an event to an emitter - """ - self.emitter.input(payload) - - def pop_payload(self): - """ - Get a single event from Redis and send it - If the Redis queue is empty, sleep to avoid making continual requests - """ - payload = self.rdb.lpop(self.key) - if payload: - self.pool.spawn(self.send, json.loads(payload.decode("utf-8"))) - else: - gevent.sleep(5) - - def run(self): - """ - Run indefinitely - """ - self._shutdown = False - - while not self._shutdown: - self.pop_payload() - self.pool.join(timeout=20) - - def request_shutdown(self, *args): - """ - Halt the worker - """ - self._shutdown = True diff --git a/snowplow_tracker/self_describing_json.py b/snowplow_tracker/self_describing_json.py index b47f0bf4..8f7b65ea 100644 --- a/snowplow_tracker/self_describing_json.py +++ b/snowplow_tracker/self_describing_json.py @@ -1,39 +1,43 @@ -""" - self_describing_json.py +# """ +# self_describing_json.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ import json +from typing import Union +from snowplow_tracker.typing import PayloadDict, PayloadDictList +from snowplow_tracker.contracts import non_empty_string -class SelfDescribingJson(object): - def __init__(self, schema, data): +class SelfDescribingJson(object): + def __init__(self, schema: str, data: Union[PayloadDict, PayloadDictList]) -> None: self.schema = schema self.data = data - def to_json(self): - return { - "schema": self.schema, - "data": self.data - } + @property + def schema(self) -> str: + return self._schema - def to_string(self): - return json.dumps(self.to_json()) + @schema.setter + def schema(self, value: str): + non_empty_string(value) + self._schema = value + + def to_json(self) -> PayloadDict: + return {"schema": self.schema, "data": self.data} + def to_string(self) -> str: + return json.dumps(self.to_json()) diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py new file mode 100644 index 00000000..daa1434b --- /dev/null +++ b/snowplow_tracker/snowplow.py @@ -0,0 +1,161 @@ +# """ +# snowplow.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import logging +from typing import Dict, Optional +from snowplow_tracker import ( + Tracker, + Emitter, + subject, + EmitterConfiguration, + TrackerConfiguration, +) +from snowplow_tracker.typing import Method + +# Logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +""" +Snowplow Class +""" + + +class Snowplow: + _trackers: Dict[str, Tracker] = {} + + @staticmethod + def create_tracker( + namespace: str, + endpoint: str, + method: Method = "post", + app_id: Optional[str] = None, + subject: Optional[subject.Subject] = None, + tracker_config: TrackerConfiguration = TrackerConfiguration(), + emitter_config: EmitterConfiguration = EmitterConfiguration(), + ) -> Tracker: + """ + Create a Snowplow tracker with a namespace and collector URL + + :param namespace: Name of the tracker + :type namespace: String + :param endpoint: The collector URL + :type endpoint: String + :param method: The HTTP request method. Defaults to post. + :type method: method + :param appId: Application ID + :type appId: String | None + :param subject: Subject to be tracked + :type subject: Subject | None + :param tracker_config: Tracker configuration + :type tracker_config: TrackerConfiguration + :param emitter_config: Emitter configuration + :type emitter_config: EmitterConfiguration + :rtype Tracker + """ + if endpoint is None: + raise TypeError("Emitter or Collector URL must be provided") + + emitter = Emitter( + endpoint=endpoint, + method=method, + batch_size=emitter_config.batch_size, + on_success=emitter_config.on_success, + on_failure=emitter_config.on_failure, + byte_limit=emitter_config.byte_limit, + request_timeout=emitter_config.request_timeout, + custom_retry_codes=emitter_config.custom_retry_codes, + event_store=emitter_config.event_store, + session=emitter_config.session, + ) + + tracker = Tracker( + namespace=namespace, + emitters=emitter, + app_id=app_id, + subject=subject, + encode_base64=tracker_config.encode_base64, + json_encoder=tracker_config.json_encoder, + ) + + return Snowplow.add_tracker(tracker) + + @classmethod + def add_tracker(cls, tracker: Tracker) -> Tracker: + """ + Add a Snowplow tracker to the Snowplow object + + :param tracker: Tracker object to add to Snowplow + :type tracker: Tracker + :rtype Tracker + """ + if not isinstance(tracker, Tracker): + logger.info("Tracker not provided.") + return None + + namespace = tracker.get_namespace() + + if namespace in cls._trackers.keys(): + raise TypeError("Tracker with this namespace already exists") + + cls._trackers[namespace] = tracker + logger.info("Tracker with namespace: '" + namespace + "' added to Snowplow") + return cls._trackers[namespace] + + @classmethod + def remove_tracker(cls, tracker: Tracker): + """ + Remove a Snowplow tracker from the Snowplow object if it exists + + :param tracker: Tracker object to remove from Snowplow + :type tracker: Tracker | None + """ + namespace = tracker.get_namespace() + cls.remove_tracker_by_namespace(namespace) + + @classmethod + def remove_tracker_by_namespace(cls, namespace: str): + """ + Remove a Snowplow tracker from the Snowplow object using it's namespace if it exists + + :param namespace: Tracker namespace to remove from Snowplow + :type tracker: String | None + """ + if not cls._trackers.pop(namespace, False): + logger.info("Tracker with namespace: '" + namespace + "' does not exist") + return + logger.info("Tracker with namespace: '" + namespace + "' removed from Snowplow") + + @classmethod + def reset(cls): + """ + Remove all active Snowplow trackers from the Snowplow object + """ + cls._trackers = {} + + @classmethod + def get_tracker(cls, namespace: str) -> Optional[Tracker]: + """ + Returns a Snowplow tracker from the Snowplow object if it exists + :param namespace: Snowplow tracker namespace + :type namespace: string + :rtype: Tracker + """ + if namespace in cls._trackers.keys(): + return cls._trackers[namespace] + return None diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index 85ab5e24..cbf29aa8 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -1,165 +1,188 @@ -""" - subject.py +# """ +# subject.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +from typing import Dict, Optional, Union +from snowplow_tracker.contracts import one_of, greater_than +from snowplow_tracker.typing import SupportedPlatform, SUPPORTED_PLATFORMS, PayloadDict -from contracts import contract, new_contract - -SUPPORTED_PLATFORMS = set(["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"]) DEFAULT_PLATFORM = "pc" -new_contract("subject", lambda x: isinstance(x, Subject)) - -new_contract("supported_platform", lambda x: x in SUPPORTED_PLATFORMS) - class Subject(object): """ - Class for an event subject, where we view events as of the form + Class for an event subject, where we view events as of the form - (Subject) -> (Verb) -> (Object) + (Subject) -> (Verb) -> (Object) """ - def __init__(self): - self.standard_nv_pairs = {"p": DEFAULT_PLATFORM} + def __init__(self) -> None: + self.standard_nv_pairs: Dict[str, Union[str, int]] = {"p": DEFAULT_PLATFORM} - @contract - def set_platform(self, value): + def set_platform(self, value: SupportedPlatform) -> "Subject": """ - :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] - :type value: supported_platform - :rtype: subject + :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] + :type value: supported_platform + :rtype: subject """ + one_of(value, SUPPORTED_PLATFORMS) + self.standard_nv_pairs["p"] = value return self - @contract - def set_user_id(self, user_id): + def set_user_id(self, user_id: str) -> "Subject": """ - :param user_id: User ID - :type user_id: string - :rtype: subject + :param user_id: User ID + :type user_id: string + :rtype: subject """ self.standard_nv_pairs["uid"] = user_id return self - @contract - def set_screen_resolution(self, width, height): + def set_screen_resolution(self, width: int, height: int) -> "Subject": """ - :param width: Width of the screen - :param height: Height of the screen - :type width: int,>0 - :type height: int,>0 - :rtype: subject + :param width: Width of the screen + :param height: Height of the screen + :type width: int,>0 + :type height: int,>0 + :rtype: subject """ + greater_than(width, 0) + greater_than(height, 0) + self.standard_nv_pairs["res"] = "".join([str(width), "x", str(height)]) return self - @contract - def set_viewport(self, width, height): + def set_viewport(self, width: int, height: int) -> "Subject": """ - :param width: Width of the viewport - :param height: Height of the viewport - :type width: int,>0 - :type height: int,>0 - :rtype: subject + :param width: Width of the viewport + :param height: Height of the viewport + :type width: int,>0 + :type height: int,>0 + :rtype: subject """ + greater_than(width, 0) + greater_than(height, 0) + self.standard_nv_pairs["vp"] = "".join([str(width), "x", str(height)]) return self - @contract - def set_color_depth(self, depth): + def set_color_depth(self, depth: int) -> "Subject": """ - :param depth: Depth of the color on the screen - :type depth: int - :rtype: subject + :param depth: Depth of the color on the screen + :type depth: int + :rtype: subject """ self.standard_nv_pairs["cd"] = depth return self - @contract - def set_timezone(self, timezone): + def set_timezone(self, timezone: str) -> "Subject": """ - :param timezone: Timezone as a string - :type timezone: string - :rtype: subject + :param timezone: Timezone as a string + :type timezone: string + :rtype: subject """ self.standard_nv_pairs["tz"] = timezone return self - @contract - def set_lang(self, lang): + def set_lang(self, lang: str) -> "Subject": """ - Set language. + Set language. - :param lang: Language the application is set to - :type lang: string - :rtype: subject + :param lang: Language the application is set to + :type lang: string + :rtype: subject """ self.standard_nv_pairs["lang"] = lang return self - @contract - def set_domain_user_id(self, duid): + def set_domain_user_id(self, duid: str) -> "Subject": """ - Set the domain user ID + Set the domain user ID - :param duid: Domain user ID - :type duid: string - :rtype: subject + :param duid: Domain user ID + :type duid: string + :rtype: subject """ self.standard_nv_pairs["duid"] = duid return self - @contract - def set_ip_address(self, ip): + def set_domain_session_id(self, sid: str) -> "Subject": + """ + Set the domain session ID + :param sid: Domain session ID + :type sid: string + :rtype: subject """ - Set the domain user ID + self.standard_nv_pairs["sid"] = sid + return self - :param ip: IP address - :type ip: string - :rtype: subject + def set_domain_session_index(self, vid: int) -> "Subject": + """ + Set the domain session Index + :param vid: Domain session Index + :type vid: int + :rtype: subject + """ + self.standard_nv_pairs["vid"] = vid + return self + + def set_ip_address(self, ip: str) -> "Subject": + """ + Set the domain user ID + + :param ip: IP address + :type ip: string + :rtype: subject """ self.standard_nv_pairs["ip"] = ip return self - @contract - def set_useragent(self, ua): + def set_useragent(self, ua: str) -> "Subject": """ - Set the user agent + Set the user agent - :param ua: User agent - :type ua: string - :rtype: subject + :param ua: User agent + :type ua: string + :rtype: subject """ self.standard_nv_pairs["ua"] = ua return self - @contract - def set_network_user_id(self, nuid): + def set_network_user_id(self, nuid: str) -> "Subject": """ - Set the network user ID field - This overwrites the nuid field set by the collector + Set the network user ID field + This overwrites the nuid field set by the collector - :param nuid: Network user ID - :type nuid: string - :rtype: subject + :param nuid: Network user ID + :type nuid: string + :rtype: subject """ self.standard_nv_pairs["tnuid"] = nuid return self + + def combine_subject(self, subject: Optional["Subject"]) -> PayloadDict: + """ + Merges another instance of Subject, with self taking priority + :param subject Subject to update + :type subject subject + :rtype PayloadDict + + """ + if subject is not None: + return {**subject.standard_nv_pairs, **self.standard_nv_pairs} + + return self.standard_nv_pairs diff --git a/snowplow_tracker/test/CLAUDE.md b/snowplow_tracker/test/CLAUDE.md new file mode 100644 index 00000000..08d0b042 --- /dev/null +++ b/snowplow_tracker/test/CLAUDE.md @@ -0,0 +1,365 @@ +# Snowplow Python Tracker Tests - CLAUDE.md + +## Directory Overview + +The `test/` directory contains comprehensive test suites for the Snowplow Python Tracker. Tests are organized into unit tests (isolated component testing) and integration tests (end-to-end collector communication). The test suite uses pytest and unittest.mock for mocking, with freezegun for time-based testing. + +## Test Organization + +``` +test/ +├── unit/ # Isolated component tests +│ ├── test_tracker.py # Tracker class tests +│ ├── test_emitters.py # Emitter functionality +│ ├── test_event.py # Base event class +│ ├── test_payload.py # Payload construction +│ ├── test_contracts.py # Validation logic +│ └── test_*.py # Other component tests +└── integration/ # End-to-end tests + └── test_integration.py # Collector communication +``` + +## Core Testing Patterns + +### Mock Pattern for Emitters +```python +# ✅ Mock emitter for isolated tracker testing +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_tracker_tracks_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(page_url="test.com")) + mock_emitter.input.assert_called_once() + +# ❌ Don't test with real network calls in unit tests +def test_tracker(): + emitter = Emitter("https://real-collector.com") +``` + +### Contract Testing Pattern +```python +# ✅ Use ContractsDisabled context manager +class ContractsDisabled: + def __enter__(self): + disable_contracts() + def __exit__(self, type, value, traceback): + enable_contracts() + +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) + +# ❌ Don't disable contracts globally +disable_contracts() +# ... rest of test file +``` + +### Time-Based Testing Pattern +```python +# ✅ Use freezegun for deterministic timestamps +from freezegun import freeze_time + +@freeze_time("2024-01-01 12:00:00") +def test_event_timestamp(): + event = PageView(page_url="test.com") + # Timestamp will be consistent + +# ❌ Don't use actual system time +import time +timestamp = time.time() # Non-deterministic +``` + +### UUID Mocking Pattern +```python +# ✅ Mock UUID generation for predictable IDs +@mock.patch('snowplow_tracker.tracker.Tracker.get_uuid') +def test_event_id(mock_uuid): + mock_uuid.return_value = "test-uuid-123" + tracker.track(event) + assert payload["eid"] == "test-uuid-123" + +# ❌ Don't rely on random UUIDs +event_id = tracker.get_uuid() # Different each run +``` + +## Unit Test Patterns + +### Payload Testing +```python +# ✅ Test payload field presence and values +def test_payload_construction(): + payload = Payload() + payload.add("e", "pv") + payload.add("url", "https://test.com") + + result = payload.get() + assert result["e"] == "pv" + assert result["url"] == "https://test.com" + +# ✅ Test JSON encoding +def test_payload_json_encoding(): + payload.add_json({"key": "value"}, True, "cx", "co") + assert "cx" in payload.get() # Base64 encoded +``` + +### Event Testing +```python +# ✅ Test event construction with all parameters +def test_page_view_complete(): + context = SelfDescribingJson(schema, data) + subject = Subject() + + event = PageView( + page_url="https://test.com", + page_title="Test", + context=[context], + event_subject=subject, + true_timestamp=1234567890 + ) + + assert event.page_url == "https://test.com" + assert len(event.context) == 1 + +# ❌ Don't test internal implementation details +def test_private_methods(): + event._internal_method() # Testing private methods +``` + +### Emitter Testing +```python +# ✅ Mock HTTP requests for emitter tests +@mock.patch('requests.post') +def test_emitter_sends_events(mock_post): + mock_post.return_value.status_code = 200 + + emitter = Emitter("https://collector.test") + emitter.input({"e": "pv"}) + emitter.flush() + + mock_post.assert_called_once() + +# ✅ Test retry logic +def test_emitter_retry_on_failure(mock_post): + mock_post.return_value.status_code = 500 + emitter.custom_retry_codes = {500: True} + # Verify retry behavior +``` + +### Contract Validation Testing +```python +# ✅ Test validation rules +def test_non_empty_string_validation(): + with self.assertRaises(ValueError): + non_empty_string("") + + non_empty_string("valid") # Should not raise + +# ✅ Test form element validation +def test_form_element_contract(): + valid_element = { + "name": "field1", + "value": "test", + "nodeName": "INPUT", + "type": "text" + } + form_element(valid_element) # Should not raise +``` + +## Integration Test Patterns + +### Mock Collector Pattern +```python +# ✅ Use micro mock collector for integration tests +from http.server import HTTPServer, BaseHTTPRequestHandler + +class MockCollector(BaseHTTPRequestHandler): + def do_POST(self): + # Capture and validate payload + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + # Store for assertions + self.send_response(200) + +# Start mock collector in test +server = HTTPServer(('localhost', 9090), MockCollector) +``` + +### End-to-End Testing +```python +# ✅ Test complete tracking flow +def test_end_to_end_tracking(): + tracker = Snowplow.create_tracker( + namespace="test", + endpoint="http://localhost:9090" + ) + + # Track multiple events + tracker.track(PageView(page_url="test1.com")) + tracker.track(StructuredEvent("cat", "act")) + tracker.flush() + + # Verify collector received both events + assert len(received_events) == 2 +``` + +## Testing Best Practices + +### Test Isolation +```python +# ✅ Clean up after each test +def setUp(self): + Snowplow.reset() # Clear all trackers + +def tearDown(self): + # Clean up any test artifacts + if hasattr(self, 'server'): + self.server.shutdown() + +# ❌ Don't leave state between tests +class TestSuite: + shared_tracker = Tracker(...) # Shared state! +``` + +### Assertion Patterns +```python +# ✅ Use specific assertions +assert event.page_url == "https://expected.com" +assert "e" in payload.get() +mock_func.assert_called_with(expected_arg) + +# ❌ Avoid generic assertions +assert event # Too vague +assert payload.get() # What are we checking? +``` + +### Mock Management +```python +# ✅ Use patch decorators or context managers +@mock.patch('snowplow_tracker.tracker.uuid.uuid4') +def test_with_mock(mock_uuid): + mock_uuid.return_value = "test-id" + +# ✅ Clean up patches +def create_patch(self, name): + patcher = mock.patch(name) + thing = patcher.start() + self.addCleanup(patcher.stop) + return thing +``` + +## Common Test Scenarios + +### Testing Event Contexts +```python +# ✅ Test context encoding and attachment +def test_event_with_multiple_contexts(): + contexts = [ + SelfDescribingJson(schema1, data1), + SelfDescribingJson(schema2, data2) + ] + event = PageView(page_url="test", context=contexts) + + payload = event.build_payload(True, None, None) + cx_data = json.loads(base64.b64decode(payload.get()["cx"])) + assert len(cx_data["data"]) == 2 +``` + +### Testing Failure Scenarios +```python +# ✅ Test failure callbacks +def test_emitter_failure_callback(): + failed_events = [] + + def on_failure(count, events): + failed_events.extend(events) + + emitter = Emitter( + "https://invalid.collector", + on_failure=on_failure + ) + # Trigger failure and verify callback +``` + +### Testing Async Behavior +```python +# ✅ Test async emitter threading +def test_async_emitter(): + emitter = AsyncEmitter("https://collector.test") + + # Track events + for i in range(100): + emitter.input({"e": "pv", "url": f"test{i}.com"}) + + # Wait for flush + emitter.flush() + time.sleep(1) # Allow async processing + + # Verify all events sent +``` + +## Test Utilities + +### Helper Functions +```python +# ✅ Create reusable test helpers +def create_test_tracker(namespace="test"): + emitter = mock.MagicMock() + return Tracker(namespace, emitter) + +def create_test_event(): + return PageView(page_url="https://test.com") + +# ❌ Don't duplicate test setup +def test_one(): + emitter = mock.MagicMock() + tracker = Tracker("test", emitter) + # ... repeated in every test +``` + +## Performance Testing + +### Load Testing Pattern +```python +# ✅ Test tracker under load +def test_high_volume_tracking(): + tracker = create_test_tracker() + + start = time.time() + for i in range(10000): + tracker.track(PageView(page_url=f"test{i}.com")) + + duration = time.time() - start + assert duration < 5.0 # Performance threshold +``` + +## Quick Reference + +### Test File Naming +- Unit tests: `test_.py` +- Integration tests: `test_integration_.py` +- Test classes: `Test` +- Test methods: `test_` + +### Essential Test Imports +```python +import unittest +import unittest.mock as mock +from freezegun import freeze_time +from snowplow_tracker.contracts import ContractsDisabled +``` + +### Common Mock Targets +- `snowplow_tracker.tracker.Tracker.get_uuid` +- `requests.post` / `requests.get` +- `time.time` +- `snowplow_tracker.emitters.Emitter.sync_flush` + +## Contributing to test/CLAUDE.md + +When adding or modifying tests: + +1. **Maintain test isolation** - Each test should be independent +2. **Mock external dependencies** - No real network calls in unit tests +3. **Use descriptive test names** - Clear what is being tested +4. **Test both success and failure paths** - Include edge cases +5. **Keep tests fast** - Mock time-consuming operations +6. **Document complex test scenarios** - Add comments for clarity \ No newline at end of file diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 454e1c9e..57b1a58c 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -1,205 +1,342 @@ -""" - test_integration.py +# """ +# test_integration.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ import unittest import re import json import base64 -try: - from urllib.parse import unquote_plus # Python 3 -except ImportError: - from urllib import unquote_plus # Python 2 - -import redis +from urllib.parse import unquote_plus +import pytest from httmock import all_requests, HTTMock from freezegun import freeze_time +from typing import Any, Dict, Optional from snowplow_tracker import tracker, _version, emitters, subject -from snowplow_tracker.timestamp import DeviceTimestamp, TrueTimestamp from snowplow_tracker.self_describing_json import SelfDescribingJson querystrings = [""] -default_emitter = emitters.Emitter("localhost", protocol="http", port=80) +default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=1) -post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=1) +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method="get") default_subject = subject.Subject() -def from_querystring(field, url): + +def from_querystring(field: str, url: str) -> Optional[str]: pattern = re.compile("^[^#]*[?&]" + field + "=([^&#]*)") match = pattern.match(url) if match: return match.groups()[0] + @all_requests -def pass_response_content(url, request): +def pass_response_content(url: str, request: Any) -> Dict[str, Any]: querystrings.append(request.url) - return { - "url": request.url, - "status_code": 200 - } + return {"url": request.url, "status_code": 200} + @all_requests -def pass_post_response_content(url, request): +def pass_post_response_content(url: str, request: Any) -> Dict[str, Any]: querystrings.append(json.loads(request.body)) - return { - "url": request.url, - "status_code": 200 - } + return {"url": request.url, "status_code": 200} + @all_requests -def fail_response_content(url, request): - return { - "url": request.url, - "status_code": 501 - } +def fail_response_content(url: str, request: Any) -> Dict[str, Any]: + return {"url": request.url, "status_code": 501} class IntegrationTest(unittest.TestCase): - - def test_integration_page_view(self): - t = tracker.Tracker([default_emitter], default_subject) + def test_integration_page_view(self) -> None: + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_page_view("http://savethearctic.org", "Save The Arctic", "http://referrer.com") - expected_fields = {"e": "pv", "page": "Save+The+Arctic", "url": "http%3A%2F%2Fsavethearctic.org", "refr": "http%3A%2F%2Freferrer.com"} + t.track_page_view( + "http://savethearctic.org", "Save The Arctic", "http://referrer.com" + ) + expected_fields = { + "e": "pv", + "page": "Save+The+Arctic", + "url": "http%3A%2F%2Fsavethearctic.org", + "refr": "http%3A%2F%2Freferrer.com", + } for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) - def test_integration_ecommerce_transaction_item(self): - t = tracker.Tracker([default_emitter], default_subject) + def test_integration_ecommerce_transaction_item(self) -> None: + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_ecommerce_transaction_item("12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP") - expected_fields = {"ti_ca": "tarot", "ti_id": "12345", "ti_qu": "2", "ti_sk": "pbz0025", "e": "ti", "ti_nm": "black-tarot", "ti_pr": "7.99", "ti_cu": "GBP"} + t.track_ecommerce_transaction_item( + "12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP" + ) + expected_fields = { + "ti_ca": "tarot", + "ti_id": "12345", + "ti_qu": "2", + "ti_sk": "pbz0025", + "e": "ti", + "ti_nm": "black-tarot", + "ti_pr": "7.99", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) - def test_integration_ecommerce_transaction(self): - t = tracker.Tracker([default_emitter], default_subject) + def test_integration_ecommerce_transaction(self) -> None: + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_ecommerce_transaction("6a8078be", 35, city="London", currency="GBP", items= - [{ - "sku": "pbz0026", - "price": 20, - "quantity": 1 - }, - { - "sku": "pbz0038", - "price": 15, - "quantity": 1 - }]) + t.track_ecommerce_transaction( + order_id="6a8078be", + total_value=35, + city="London", + currency="GBP", + items=[ + {"sku": "pbz0026", "price": 20, "quantity": 1}, + {"sku": "pbz0038", "price": 15, "quantity": 1}, + ], + tstamp=1399021242240, + ) - expected_fields = {"e": "tr", "tr_id": "6a8078be", "tr_tt": "35", "tr_ci": "London", "tr_cu": "GBP"} + expected_fields = { + "e": "tr", + "tr_id": "6a8078be", + "tr_tt": "35", + "tr_ci": "London", + "tr_cu": "GBP", + } for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-3]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-3]), expected_fields[key] + ) - expected_fields = {"e": "ti", "ti_id": "6a8078be", "ti_sk": "pbz0026", "ti_pr": "20", "ti_cu": "GBP"} + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0026", + "ti_pr": "20", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-2]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-2]), expected_fields[key] + ) - expected_fields = {"e": "ti", "ti_id": "6a8078be", "ti_sk": "pbz0038", "ti_pr": "15", "ti_cu": "GBP"} + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0038", + "ti_pr": "15", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) - - self.assertEquals(from_querystring("dtm", querystrings[-3]), from_querystring("dtm", querystrings[-2])) - - def test_integration_screen_view(self): - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + self.assertEqual( + from_querystring("ttm", querystrings[-3]), + from_querystring("ttm", querystrings[-2]), + ) + + def test_integration_mobile_screen_view(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - t.track_screen_view("Game HUD 2", id_="534") + t.track_mobile_screen_view(id_="534", name="Game HUD 2") expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = from_querystring("ue_pr", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEquals(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0", + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data": { - "name": "Game HUD 2", - "id": "534" - } - } - }) - - def test_integration_struct_event(self): - t = tracker.Tracker([default_emitter], default_subject) + "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", + "data": {"id": "534", "name": "Game HUD 2"}, + }, + }, + ) + + def test_integration_struct_event(self) -> None: + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_struct_event("Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99) - expected_fields = {"se_ca": "Ecomm", "se_pr": "hd", "se_la": "dog-skateboarding-video", "se_va": "13.99", "se_ac": "add-to-basket", "e": "se"} + t.track_struct_event( + "Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99 + ) + expected_fields = { + "se_ca": "Ecomm", + "se_pr": "hd", + "se_la": "dog-skateboarding-video", + "se_va": "13.99", + "se_ac": "add-to-basket", + "e": "se", + } for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) - - def test_integration_unstruct_event_non_base64(self): - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + def test_integration_self_describing_event_non_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = from_querystring("ue_pr", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEquals(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} - }) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) - def test_integration_unstruct_event_base64(self): - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + def test_integration_self_describing_event_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=True + ) with HTTMock(pass_response_content): - t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = unquote_plus(from_querystring("ue_px", querystrings[-1])) - envelope = json.loads((base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode("utf-8")) - self.assertEquals(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} - }) - - def test_integration_context_non_base64(self): - t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) + + def test_integration_context_non_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) envelope_string = from_querystring("co", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEquals(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", - "data":[{"schema": "iglu:com.example/user/jsonschema/2-0-3", "data": {"user_type": "tester"}}] - }) - - def test_integration_context_base64(self): - t = tracker.Tracker([default_emitter], default_subject, encode_base64=True) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) + + def test_integration_context_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=True + ) with HTTMock(pass_response_content): - t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) envelope_string = unquote_plus(from_querystring("cx", querystrings[-1])) - envelope = json.loads((base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode("utf-8")) - self.assertEquals(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", - "data":[{"schema": "iglu:com.example/user/jsonschema/2-0-3", "data": {"user_type": "tester"}}] - }) - - def test_integration_standard_nv_pairs(self): + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) + + def test_integration_standard_nv_pairs(self) -> None: s = subject.Subject() s.set_platform("mob") s.set_user_id("user12345") @@ -208,125 +345,233 @@ def test_integration_standard_nv_pairs(self): s.set_timezone("Europe London") s.set_lang("en") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker( + "cf", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") - expected_fields = {"tna": "cf", "res": "100x200", - "lang": "en", "aid": "angry-birds-android", "cd": "24", "tz": "Europe+London", - "p": "mob", "tv": "py-" + _version.__version__} + expected_fields = { + "tna": "cf", + "res": "100x200", + "lang": "en", + "aid": "angry-birds-android", + "cd": "24", + "tz": "Europe+London", + "p": "mob", + "tv": "py-" + _version.__version__, + } for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) self.assertIsNotNone(from_querystring("eid", querystrings[-1])) self.assertIsNotNone(from_querystring("dtm", querystrings[-1])) - def test_integration_identification_methods(self): + def test_integration_identification_methods(self) -> None: s = subject.Subject() s.set_domain_user_id("4616bfb38f872d16") + s.set_domain_session_id("59ed13b1a5724dae") + s.set_domain_session_index(1) s.set_ip_address("255.255.255.255") - s.set_useragent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)") + s.set_useragent( + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)" + ) s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") - t = tracker.Tracker([emitters.Emitter("localhost")], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker( + "cf", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = { "duid": "4616bfb38f872d16", + "sid": "59ed13b1a5724dae", + "vid": "1", "ip": "255.255.255.255", "ua": "Mozilla%2F5.0+%28compatible%3B+MSIE+9.0%3B+Windows+NT+6.0%3B+Trident%2F5.0%29", - "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5" + "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5", } for key in expected_fields: - self.assertEquals(from_querystring(key, querystrings[-1]), expected_fields[key]) - - def test_integration_redis_default(self): - r = redis.StrictRedis() - t = tracker.Tracker([emitters.RedisEmitter()], default_subject) - t.track_page_view("http://www.example.com") - event_string = r.rpop("snowplow") - event_dict = json.loads(event_string.decode("utf-8")) - self.assertEquals(event_dict["e"], "pv") - - def test_integration_redis_custom(self): - r = redis.StrictRedis(db=1) - t = tracker.Tracker([emitters.RedisEmitter(rdb=r, key="custom_key")], default_subject) - t.track_page_view("http://www.example.com") - event_string = r.rpop("custom_key") - event_dict = json.loads(event_string.decode("utf-8")) - self.assertEquals(event_dict["e"], "pv") - - def test_integration_success_callback(self): + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + def test_integration_event_subject(self) -> None: + s = subject.Subject() + s.set_domain_user_id("4616bfb38f872d16") + s.set_lang("ES") + + t = tracker.Tracker( + "namespace", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) + evSubject = ( + subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") + ) + with HTTMock(pass_response_content): + t.track_page_view("localhost", "local host", event_subject=evSubject) + expected_fields = {"duid": "1111aaa11a111a11", "lang": "EN"} + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + def test_integration_success_callback(self) -> None: callback_success_queue = [] callback_failure_queue = [] - callback_emitter = emitters.Emitter("localhost", on_success=lambda x: callback_success_queue.append(x), - on_failure=lambda x, y:callback_failure_queue.append(x)) - t = tracker.Tracker([callback_emitter], default_subject) + callback_emitter = emitters.Emitter( + "localhost", + method="get", + on_success=lambda x: callback_success_queue.append(x), + on_failure=lambda x, y: callback_failure_queue.append(x), + ) + t = tracker.Tracker("namespace", [callback_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view("http://www.example.com") - self.assertEquals(callback_success_queue[0], 1) - self.assertEquals(callback_failure_queue, []) + expected = { + "e": "pv", + "url": "http://www.example.com", + } + self.assertEqual(len(callback_success_queue), 1) + for k in expected.keys(): + self.assertEqual(callback_success_queue[0][0][k], expected[k]) + self.assertEqual(callback_failure_queue, []) - def test_integration_failure_callback(self): + def test_integration_failure_callback(self) -> None: callback_success_queue = [] callback_failure_queue = [] - callback_emitter = emitters.Emitter("localhost", on_success=lambda x: callback_success_queue.append(x), - on_failure=lambda x, y:callback_failure_queue.append(x)) - t = tracker.Tracker([callback_emitter], default_subject) + callback_emitter = emitters.Emitter( + "localhost", + method="get", + on_success=lambda x: callback_success_queue.append(x), + on_failure=lambda x, y: callback_failure_queue.append(x), + ) + t = tracker.Tracker("namespace", [callback_emitter], default_subject) with HTTMock(fail_response_content): t.track_page_view("http://www.example.com") - self.assertEquals(callback_success_queue, []) - self.assertEquals(callback_failure_queue[0], 0) + self.assertEqual(callback_success_queue, []) + self.assertEqual(callback_failure_queue[0], 0) - def test_post_page_view(self): - t = tracker.Tracker([post_emitter], default_subject) + def test_post_page_view(self) -> None: + t = tracker.Tracker("namespace", [default_emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "local host", None) expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} request = querystrings[-1] - self.assertEquals(request["schema"], "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4") + self.assertEqual( + request["schema"], + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4", + ) for key in expected_fields: - self.assertEquals(request["data"][0][key], expected_fields[key]) + self.assertEqual(request["data"][0][key], expected_fields[key]) - def test_post_batched(self): - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=2) - t = tracker.Tracker(post_emitter, default_subject) + def test_post_batched(self) -> None: + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=2 + ) + t = tracker.Tracker("namespace", default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") t.track_struct_event("Test", "B") - self.assertEquals(querystrings[-1]["data"][0]["se_ac"], "A") - self.assertEquals(querystrings[-1]["data"][1]["se_ac"], "B") + self.assertEqual(querystrings[-1]["data"][0]["se_ac"], "A") + self.assertEqual(querystrings[-1]["data"][1]["se_ac"], "B") - def test_timestamps(self): - emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=4) - t = tracker.Tracker([emitter], default_subject) + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + def test_timestamps(self) -> None: + emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=3) + t = tracker.Tracker("namespace", [emitter], default_subject) with HTTMock(pass_post_response_content): - with freeze_time("2013-01-14 03:21:34"): - t.track_page_view("localhost", "stamp0", None, tstamp=None) - t.track_page_view("localhost", "stamp1", None, tstamp=1358933694000) - with freeze_time("2013-01-14 03:22:36"): - t.track_page_view("localhost", "stamp2", None, tstamp=DeviceTimestamp(1458133694000)) - t.track_page_view("localhost", "stamp3", None, tstamp=TrueTimestamp(1458033694000)) + t.track_page_view("localhost", "stamp0", None, tstamp=None) + t.track_page_view("localhost", "stamp1", None, tstamp=1358933694000) + t.track_page_view("localhost", "stamp2", None, tstamp=1358933694000.00) expected_timestamps = [ - {"dtm": "1358133694000", "ttm": None, "stm": "1358133756000"}, - {"dtm": "1358933694000", "ttm": None, "stm": "1358133756000"}, - {"dtm": "1458133694000", "ttm": None, "stm": "1358133756000"}, - {"dtm": None, "ttm": "1458033694000", "stm": "1358133756000"}, + {"dtm": "1618790401000", "ttm": None, "stm": "1618790401000"}, + {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, + {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, ] request = querystrings[-1] for i, event in enumerate(expected_timestamps): - self.assertEquals(request["data"][i].get("dtm"), expected_timestamps[i]["dtm"]) - self.assertEquals(request["data"][i].get("ttm"), expected_timestamps[i]["ttm"]) - self.assertEquals(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) - self.assertEquals(request["data"][i].get("page"), "stamp" + str(i)) - - def test_bytelimit(self): - post_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='post', buffer_size=5, byte_limit=420) - t = tracker.Tracker(post_emitter, default_subject) + self.assertEqual( + request["data"][i].get("dtm"), expected_timestamps[i]["dtm"] + ) + self.assertEqual( + request["data"][i].get("ttm"), expected_timestamps[i]["ttm"] + ) + self.assertEqual( + request["data"][i].get("stm"), expected_timestamps[i]["stm"] + ) + + def test_bytelimit(self) -> None: + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=5, byte_limit=459 + ) + t = tracker.Tracker("namespace", default_emitter, default_subject) with HTTMock(pass_post_response_content): - t.track_struct_event("Test", "A") # 140 bytes - t.track_struct_event("Test", "A") # 280 bytes - t.track_struct_event("Test", "A") # 420 bytes. Send - t.track_struct_event("Test", "AA") # 141 - self.assertEquals(len(querystrings[-1]["data"]), 3) - self.assertEqual(post_emitter.bytes_queued, 141) + t.track_struct_event("Test", "A") # 153 bytes + t.track_struct_event("Test", "A") # 306 bytes + t.track_struct_event("Test", "A") # 459 bytes. Send + t.track_struct_event("Test", "AA") # 154 + + self.assertEqual(len(querystrings[-1]["data"]), 3) + self.assertEqual(default_emitter.bytes_queued, 156 + len(_version.__version__)) + + def test_unicode_get(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) + with HTTMock(pass_response_content): + t.track_page_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) + + url_string = unquote_plus(from_querystring("url", querystrings[-2])) + try: + self.assertEqual(url_string.decode("utf-8"), unicode_b) + except AttributeError: + # in python 3: str type contains unicode (so no 'decode') + self.assertEqual(url_string, unicode_b) + + context_string = unquote_plus(from_querystring("co", querystrings[-1])) + actual_a = json.loads(context_string)["data"][0]["data"]["test"] + self.assertEqual(actual_a, unicode_a) + + uepr_string = unquote_plus(from_querystring("ue_pr", querystrings[-1])) + actual_b = json.loads(uepr_string)["data"]["data"]["name"] + self.assertEqual(actual_b, unicode_b) + + def test_unicode_post(self) -> None: + t = tracker.Tracker( + "namespace", [default_emitter], default_subject, encode_base64=False + ) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) + with HTTMock(pass_post_response_content): + t.track_page_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) + + pv_event = querystrings[-2] + self.assertEqual(pv_event["data"][0]["url"], unicode_b) + + in_test_ctx = json.loads(pv_event["data"][0]["co"])["data"][0]["data"]["test"] + self.assertEqual(in_test_ctx, unicode_a) + + sv_event = querystrings[-1] + in_uepr_name = json.loads(sv_event["data"][0]["ue_pr"])["data"]["data"]["name"] + self.assertEqual(in_uepr_name, unicode_b) diff --git a/snowplow_tracker/test/unit/__init__.py b/snowplow_tracker/test/unit/__init__.py index 8b137891..e69de29b 100644 --- a/snowplow_tracker/test/unit/__init__.py +++ b/snowplow_tracker/test/unit/__init__.py @@ -1 +0,0 @@ - diff --git a/snowplow_tracker/test/unit/test_contracts.py b/snowplow_tracker/test/unit/test_contracts.py new file mode 100644 index 00000000..9a913bca --- /dev/null +++ b/snowplow_tracker/test/unit/test_contracts.py @@ -0,0 +1,110 @@ +# """ +# test_tracker.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import unittest + +from snowplow_tracker.contracts import ( + form_element, + greater_than, + non_empty, + non_empty_string, + one_of, + satisfies, +) + + +class TestContracts(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_greater_than_succeeds(self) -> None: + greater_than(10, 0) + + def test_greater_than_fails(self) -> None: + with self.assertRaises(ValueError): + greater_than(0, 10) + + def test_non_empty_succeeds(self) -> None: + non_empty(["something"]) + + def test_non_empty_fails(self) -> None: + with self.assertRaises(ValueError): + non_empty([]) + + def test_non_empty_string_succeeds(self) -> None: + non_empty_string("something") + + def test_non_empty_string_fails(self) -> None: + with self.assertRaises(ValueError): + non_empty_string("") + + def test_one_of_succeeds(self) -> None: + one_of("something", ["something", "something else"]) + + def test_one_of_fails(self) -> None: + with self.assertRaises(ValueError): + one_of("something", ["something else"]) + + def test_satisfies_succeeds(self) -> None: + satisfies(10, lambda v: v == 10) + + def test_satisfies_fails(self) -> None: + with self.assertRaises(ValueError): + satisfies(0, lambda v: v == 10) + + def test_form_element_no_type(self) -> None: + elem = {"name": "elemName", "value": "elemValue", "nodeName": "INPUT"} + form_element(elem) + + def test_form_element_type_valid(self) -> None: + elem = { + "name": "elemName", + "value": "elemValue", + "nodeName": "TEXTAREA", + "type": "button", + } + form_element(elem) + + def test_form_element_type_invalid(self) -> None: + elem = { + "name": "elemName", + "value": "elemValue", + "nodeName": "SELECT", + "type": "invalid", + } + with self.assertRaises(ValueError): + form_element(elem) + + def test_form_element_nodename_invalid(self) -> None: + elem = {"name": "elemName", "value": "elemValue", "nodeName": "invalid"} + with self.assertRaises(ValueError): + form_element(elem) + + def test_form_element_no_nodename(self) -> None: + elem = {"name": "elemName", "value": "elemValue"} + with self.assertRaises(ValueError): + form_element(elem) + + def test_form_element_no_value(self) -> None: + elem = {"name": "elemName", "nodeName": "INPUT"} + with self.assertRaises(ValueError): + form_element(elem) + + def test_form_element_no_name(self) -> None: + elem = {"value": "elemValue", "nodeName": "INPUT"} + with self.assertRaises(ValueError): + form_element(elem) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py new file mode 100644 index 00000000..f02be943 --- /dev/null +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -0,0 +1,585 @@ +# """ +# test_emitters.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import time +import unittest +import unittest.mock as mock +from freezegun import freeze_time +from typing import Any +from requests import ConnectTimeout + +from snowplow_tracker.emitters import Emitter, AsyncEmitter, DEFAULT_MAX_LENGTH + + +# helpers +def mocked_flush(*args: Any) -> None: + pass + + +def mocked_send_events(*args: Any) -> None: + pass + + +def mocked_http_success(*args: Any) -> bool: + return True + + +def mocked_http_failure(*args: Any) -> bool: + return False + + +def mocked_http_response_success(*args: Any) -> int: + return 200 + + +def mocked_http_response_failure(*args: Any) -> int: + return 400 + + +def mocked_http_response_failure_retry(*args: Any) -> int: + return 500 + + +class TestEmitters(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self) -> None: + e = Emitter("0.0.0.0") + self.assertEqual( + e.endpoint, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2" + ) + self.assertEqual(e.method, "post") + self.assertEqual(e.batch_size, 10) + self.assertEqual(e.event_store.event_buffer, []) + self.assertIsNone(e.byte_limit) + self.assertIsNone(e.bytes_queued) + self.assertIsNone(e.on_success) + self.assertIsNone(e.on_failure) + self.assertFalse(e.timer.is_active()) + self.assertIsNone(e.request_timeout) + + def test_init_batch_size(self) -> None: + e = Emitter("0.0.0.0", batch_size=10) + self.assertEqual(e.batch_size, 10) + + def test_init_post(self) -> None: + e = Emitter("0.0.0.0") + self.assertEqual(e.batch_size, DEFAULT_MAX_LENGTH) + + def test_init_byte_limit(self) -> None: + e = Emitter("0.0.0.0", byte_limit=512) + self.assertEqual(e.bytes_queued, 0) + + def test_init_requests_timeout(self) -> None: + e = Emitter("0.0.0.0", request_timeout=(2.5, 5)) + self.assertEqual(e.request_timeout, (2.5, 5)) + + def test_as_collector_uri(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + def test_as_collector_uri_get(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0", method="get") + self.assertEqual(uri, "https://0.0.0.0/i") + + def test_as_collector_uri_port(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0", port=9090) + self.assertEqual(uri, "https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2") + + def test_as_collector_uri_http(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0", protocol="http") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + def test_as_collector_uri_empty_string(self) -> None: + with self.assertRaises(ValueError): + Emitter.as_collector_uri("") + + def test_as_collector_uri_endpoint_protocol(self) -> None: + uri = Emitter.as_collector_uri("https://0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + def test_as_collector_uri_endpoint_protocol_http(self) -> None: + uri = Emitter.as_collector_uri("http://0.0.0.0") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_no_flush(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", method="get", batch_size=2) + nvPairs = {"n0": "v0", "n1": "v1"} + e.input(nvPairs) + + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) + self.assertIsNone(e.byte_limit) + self.assertFalse(e.reached_limit()) + mok_flush.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_flush_byte_limit(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=16) + nvPairs = {"n0": "v0", "n1": "v1"} + e.input(nvPairs) + + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) + self.assertTrue(e.reached_limit()) + self.assertEqual(mok_flush.call_count, 1) + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_flush_buffer(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) + nvPairs = {"n0": "v0", "n1": "v1"} + e.input(nvPairs) + + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertFalse(e.reached_limit()) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) + + nextPairs = {"n0": "v0"} + e.input(nextPairs) + # since we mock flush, the buffer is not empty + self.assertEqual(e.event_store.event_buffer, [nvPairs, nextPairs]) + self.assertTrue(e.reached_limit()) + self.assertEqual(mok_flush.call_count, 1) + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_bytes_queued(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) + nvPairs = {"n0": "v0", "n1": "v1"} + e.input(nvPairs) + + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertEqual(e.bytes_queued, 24) + + e.input(nvPairs) + self.assertEqual(e.bytes_queued, 48) + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_bytes_post(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0") + nvPairs = {"testString": "test", "testNum": 2.72} + e.input(nvPairs) + + self.assertEqual( + e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}] + ) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_flush(self, mok_send_events: Any) -> None: + mok_send_events.side_effect = mocked_http_response_success + + e = Emitter("0.0.0.0", batch_size=2, byte_limit=None) + nvPairs = {"n": "v"} + e.input(nvPairs) + e.input(nvPairs) + + self.assertEqual(mok_send_events.call_count, 1) + self.assertEqual(len(e.event_store.event_buffer), 0) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_flush_bytes_queued(self, mok_send_events: Any) -> None: + mok_send_events.side_effect = mocked_http_response_success + + e = Emitter("0.0.0.0", batch_size=2, byte_limit=256) + nvPairs = {"n": "v"} + e.input(nvPairs) + e.input(nvPairs) + + self.assertEqual(mok_send_events.call_count, 1) + self.assertEqual(len(e.event_store.event_buffer), 0) + self.assertEqual(e.bytes_queued, 0) + + @freeze_time("2021-04-14 00:00:02") # unix: 1618358402000 + def test_attach_sent_tstamp(self) -> None: + e = Emitter("0.0.0.0") + ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + + e.attach_sent_timestamp(ev_list) + reduced = True + for ev in ev_list: + reduced = reduced and "stm" in ev.keys() and ev["stm"] == "1618358402000" + self.assertTrue(reduced) + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_flush_timer(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", batch_size=10) + ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + for i in ev_list: + e.input(i) + + e.set_flush_timer(3) + self.assertEqual(len(e.event_store.event_buffer), 3) + time.sleep(5) + self.assertGreaterEqual(mok_flush.call_count, 1) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_success(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_success + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + mok_success.assert_called_once_with(evBuffer) + mok_failure.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_failure(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + mok_success.assert_not_called() + mok_failure.assert_called_once_with(0, evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_success(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_success + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + mok_success.assert_called_once_with(evBuffer) + mok_failure.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_failure(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + mok_success.assert_not_called() + mok_failure.assert_called_with(0, evBuffer) + + @mock.patch("snowplow_tracker.emitters.requests.post") + def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: + mok_post_request.side_effect = ConnectTimeout + e = Emitter("0.0.0.0") + response = e.http_post("dummy_string") + post_succeeded = Emitter.is_good_status_code(response) + + self.assertFalse(post_succeeded) + + @mock.patch("snowplow_tracker.emitters.requests.post") + def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: + mok_post_request.side_effect = ConnectTimeout + e = Emitter("0.0.0.0", method="get") + response = e.http_get({"a": "b"}) + get_succeeded = Emitter.is_good_status_code(response) + self.assertFalse(get_succeeded) + + ### + # AsyncEmitter + ### + @mock.patch("snowplow_tracker.AsyncEmitter.flush") + def test_async_emitter_input(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + ae = AsyncEmitter( + "0.0.0.0", port=9090, method="get", batch_size=3, thread_count=5 + ) + self.assertTrue(ae.queue.empty()) + + ae.input({"a": "aa"}) + ae.input({"b": "bb"}) + self.assertEqual(len(ae.event_store.event_buffer), 2) + self.assertTrue(ae.queue.empty()) + mok_flush.assert_not_called() + + ae.input({"c": "cc"}) # meet buffer size + self.assertEqual(mok_flush.call_count, 1) + + @mock.patch("snowplow_tracker.AsyncEmitter.send_events") + def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: + mok_send_events.side_effect = mocked_send_events + + ae = AsyncEmitter( + "0.0.0.0", + port=9090, + method="get", + batch_size=3, + thread_count=5, + byte_limit=1024, + ) + self.assertTrue(ae.queue.empty()) + + ae.input({"a": "aa"}) + ae.input({"b": "bb"}) + self.assertEqual(len(ae.event_store.event_buffer), 2) + self.assertTrue(ae.queue.empty()) + mok_send_events.assert_not_called() + + ae.sync_flush() + self.assertEqual(len(ae.event_store.event_buffer), 0) + self.assertEqual(ae.bytes_queued, 0) + self.assertEqual(mok_send_events.call_count, 1) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_async_send_events_get_success(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_success + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + ae.send_events(evBuffer) + mok_success.assert_called_once_with(evBuffer) + mok_failure.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + ae.send_events(evBuffer) + mok_success.assert_not_called() + mok_failure.assert_called_once_with(0, evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_async_send_events_post_success(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_success + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + ae.send_events(evBuffer) + mok_success.assert_called_once_with(evBuffer) + mok_failure.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + ae.send_events(evBuffer) + mok_success.assert_not_called() + mok_failure.assert_called_with(0, evBuffer) + + # Unicode + @mock.patch("snowplow_tracker.AsyncEmitter.flush") + def test_input_unicode_get(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", method="get", batch_size=2) + ae.input(payload) + + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) + + @mock.patch("snowplow_tracker.AsyncEmitter.flush") + def test_input_unicode_post(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", batch_size=2) + ae.input(payload) + + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_no_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_custom_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_custom_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) diff --git a/snowplow_tracker/test/unit/test_event.py b/snowplow_tracker/test/unit/test_event.py new file mode 100644 index 00000000..e50da98d --- /dev/null +++ b/snowplow_tracker/test/unit/test_event.py @@ -0,0 +1,72 @@ +# """ +# test_event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import json +import unittest +from snowplow_tracker.events import Event +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson + +CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" + + +class TestEvent(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event = Event() + self.assertEqual(event.payload.nv_pairs, {}) + + def test_build_payload(self): + event_subject = Subject() + event = Event(event_subject=event_subject) + payload = event.build_payload(encode_base64=None, json_encoder=None) + + self.assertEqual(payload.nv_pairs, {"p": "pc"}) + + def test_build_payload_tstamp(self): + event_subject = Subject() + tstamp = 1399021242030 + + event = Event(event_subject=event_subject, true_timestamp=tstamp) + + payload = event.build_payload( + json_encoder=None, + encode_base64=None, + ) + + self.assertEqual(payload.nv_pairs, {"p": "pc", "ttm": 1399021242030}) + + def test_build_payload_context(self): + event_subject = Subject() + context = SelfDescribingJson("test.context.schema", {"user": "tester"}) + event_context = [context] + event = Event(event_subject=event_subject, context=event_context) + + payload = event.build_payload( + json_encoder=None, + encode_base64=False, + ) + + expected_context = { + "schema": CONTEXT_SCHEMA, + "data": [{"schema": "test.context.schema", "data": {"user": "tester"}}], + } + actual_context = json.loads(payload.nv_pairs["co"]) + + self.assertDictEqual(actual_context, expected_context) diff --git a/snowplow_tracker/test/unit/test_in_memory_event_store.py b/snowplow_tracker/test/unit/test_in_memory_event_store.py new file mode 100644 index 00000000..93a0c8b2 --- /dev/null +++ b/snowplow_tracker/test/unit/test_in_memory_event_store.py @@ -0,0 +1,106 @@ +# """ +# test_in_memory_event_store.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import unittest +from snowplow_tracker.event_store import InMemoryEventStore +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class TestInMemoryEventStore(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event_store = InMemoryEventStore(logger) + self.assertEqual(event_store.buffer_capacity, 10000) + self.assertEqual(event_store.event_buffer, []) + + def test_add_event(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + self.assertDictEqual(nvPairs, event_store.event_buffer[0]) + + def test_size(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.size(), 3) + + def test_add_failed_events_to_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPair1 = {"n0": "v0", "n1": "v1"} + nvPair2 = {"n2": "v2", "n3": "v3"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + payload_list = event_store.get_events_batch() + + event_store.cleanup(payload_list, True) + + self.assertEqual(event_store.event_buffer, payload_list) + + def test_remove_success_events_from_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + payload_list = event_store.get_events_batch() + event_store.cleanup(payload_list, False) + + self.assertEqual(event_store.event_buffer, []) + + def test_drop_new_events_buffer_full(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPair1 = {"n0": "v0"} + nvPair2 = {"n1": "v1"} + nvPair3 = {"n2": "v2"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + event_store.add_event(nvPair3) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + def test_get_events(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPairs = {"n0": "v0"} + batch = [nvPairs, nvPairs] + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.get_events_batch(), batch) diff --git a/snowplow_tracker/test/unit/test_page_ping.py b/snowplow_tracker/test/unit/test_page_ping.py new file mode 100644 index 00000000..7539ce43 --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_ping.py @@ -0,0 +1,38 @@ +import pytest + +from snowplow_tracker.events.page_ping import PagePing + + +class TestPagePing: + def test_getters(self): + pp = PagePing("url", "title", "referrer", 1, 2, 3, 4) + assert pp.page_url == "url" + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + + def test_setters(self): + pp = PagePing("url") + pp.page_title = "title" + pp.referrer = "referrer" + pp.min_x = 1 + pp.max_x = 2 + pp.min_y = 3 + pp.max_y = 4 + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + assert pp.page_url == "url" + + def test_page_url_non_empty_string(self): + pp = PagePing("url") + pp.page_url = "new_url" + assert pp.page_url == "new_url" + with pytest.raises(ValueError): + pp.page_url = "" diff --git a/snowplow_tracker/test/unit/test_page_view.py b/snowplow_tracker/test/unit/test_page_view.py new file mode 100644 index 00000000..3736710c --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_view.py @@ -0,0 +1,27 @@ +import pytest + +from snowplow_tracker.events.page_view import PageView + + +class TestPageView: + def test_getters(self): + pv = PageView("url", "title", "referrer") + assert pv.page_url == "url" + assert pv.page_title == "title" + assert pv.referrer == "referrer" + + def test_setters(self): + pv = PageView("url", "title", "referrer") + pv.page_url = "new_url" + pv.page_title = "new_title" + pv.referrer = "new_referrer" + assert pv.page_url == "new_url" + assert pv.page_title == "new_title" + assert pv.referrer == "new_referrer" + + def test_page_url_non_empty_string(self): + pv = PageView("url") + pv.page_url = "new_url" + assert pv.page_url == "new_url" + with pytest.raises(ValueError): + pv.page_url = "" diff --git a/snowplow_tracker/test/unit/test_payload.py b/snowplow_tracker/test/unit/test_payload.py index 93b2d7cb..c174e8f4 100644 --- a/snowplow_tracker/test/unit/test_payload.py +++ b/snowplow_tracker/test/unit/test_payload.py @@ -1,30 +1,29 @@ -""" - test_payload.py +# """ +# test_payload.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. - - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +import json +import base64 import unittest +from typing import Dict, Any + from snowplow_tracker import payload -def is_subset(dict1, dict2): +def is_subset(dict1: Dict[Any, Any], dict2: Dict[Any, Any]) -> bool: """ * is_subset(smaller_dict, larger_dict) Checks if dict1 has name, value pairs that also exist in dict2. @@ -40,29 +39,124 @@ def is_subset(dict1, dict2): return False -class TestPayload(unittest.TestCase): +def date_encoder(o: Any) -> str: + """Sample custom JSON encoder which converts dates into their ISO format""" + from datetime import date + from json.encoder import JSONEncoder - def setUp(self): + if isinstance(o, date): + return o.isoformat() + + return JSONEncoder.default(o) + + +class TestPayload(unittest.TestCase): + def setUp(self) -> None: pass - def test_object_generation(self): + def test_object_generation(self) -> None: p = payload.Payload() - self.assertTrue(is_subset({}, p.nv_pairs)) + self.assertDictEqual({}, p.nv_pairs) - def test_object_generation_2(self): - p = payload.Payload({"test1": "result1", "test2": "result2", }) + def test_object_generation_2(self) -> None: + p = payload.Payload( + { + "test1": "result1", + "test2": "result2", + } + ) output = {"test1": "result1", "test2": "result2"} - self.assertTrue(is_subset(output, p.nv_pairs)) + self.assertDictEqual(output, p.nv_pairs) - def test_add(self): + def test_add(self) -> None: p = payload.Payload() p.add("name1", "value1") p.add("name2", "value2") - output = {"name1": "value1", "name2": "value2", } - self.assertTrue(is_subset(output, p.nv_pairs)) + output = { + "name1": "value1", + "name2": "value2", + } + self.assertDictEqual(output, p.nv_pairs) - def test_add_dict(self): - p = payload.Payload({"n1": "v1", "n2": "v2", }) - p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter + def test_add_empty_val(self) -> None: + p = payload.Payload() + p.add("name", "") + output = {} + self.assertDictEqual(output, p.nv_pairs) + + def test_add_none(self) -> None: + p = payload.Payload() + p.add("name", None) + output = {} + self.assertDictEqual(output, p.nv_pairs) + + def test_add_dict(self) -> None: + p = payload.Payload( + { + "n1": "v1", + "n2": "v2", + } + ) + p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter output = {"n1": "v1", "n2": "v2", "name3": 3, "name4": 4} - self.assertTrue(is_subset(output, p.nv_pairs)) + self.assertDictEqual(output, p.nv_pairs) + + def test_add_json_empty(self) -> None: + p = payload.Payload({"name": "value"}) + input = {} + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} + self.assertDictEqual(output, p.nv_pairs) + + def test_add_json_none(self) -> None: + p = payload.Payload({"name": "value"}) + input = None + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} + self.assertDictEqual(output, p.nv_pairs) + + def test_add_json_encode_false(self) -> None: + p = payload.Payload() + input = {"a": 1} + p.add_json(input, False, "ue_px", "ue_pr") + self.assertTrue("ue_pr" in p.nv_pairs.keys()) + self.assertFalse("ue_px" in p.nv_pairs.keys()) + + def test_add_json_encode_true(self) -> None: + p = payload.Payload() + input = {"a": 1} + p.add_json(input, True, "ue_px", "ue_pr") + self.assertFalse("ue_pr" in p.nv_pairs.keys()) + self.assertTrue("ue_px" in p.nv_pairs.keys()) + + def test_add_json_unicode_encode_false(self) -> None: + p = payload.Payload() + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, False, "ue_px", "ue_pr") + ue_pr = json.loads(p.nv_pairs["ue_pr"]) + self.assertDictEqual(input, ue_pr) + + def test_add_json_unicode_encode_true(self) -> None: + p = payload.Payload() + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, True, "ue_px", "ue_pr") + ue_px = json.loads( + base64.urlsafe_b64decode(p.nv_pairs["ue_px"]).decode("utf-8") + ) + self.assertDictEqual(input, ue_px) + + def test_add_json_with_custom_enc(self) -> None: + from datetime import date + + p = payload.Payload() + + input = {"key1": date(2020, 2, 1)} + + p.add_json(input, False, "name1", "name1", date_encoder) + + results = json.loads(p.nv_pairs["name1"]) + self.assertTrue(is_subset({"key1": "2020-02-01"}, results)) + + def test_subject_get(self) -> None: + p = payload.Payload({"name1": "val1"}) + self.assertDictEqual(p.get(), p.nv_pairs) diff --git a/snowplow_tracker/test/unit/test_structured_event.py b/snowplow_tracker/test/unit/test_structured_event.py new file mode 100644 index 00000000..fdf00014 --- /dev/null +++ b/snowplow_tracker/test/unit/test_structured_event.py @@ -0,0 +1,24 @@ +from snowplow_tracker.events.structured_event import StructuredEvent + + +class TestStructuredEvent: + def test_getters(self): + se = StructuredEvent("category", "action", "label", "property", 1) + assert se.category == "category" + assert se.action == "action" + assert se.label == "label" + assert se.property_ == "property" + assert se.value == 1 + + def test_setters(self): + se = StructuredEvent("category", "action") + se.category = "new_category" + se.action = "new_action" + se.label = "new_label" + se.property_ = "new_property" + se.value = 2 + assert se.category == "new_category" + assert se.action == "new_action" + assert se.label == "new_label" + assert se.property_ == "new_property" + assert se.value == 2 diff --git a/snowplow_tracker/test/unit/test_subject.py b/snowplow_tracker/test/unit/test_subject.py new file mode 100644 index 00000000..953a0a74 --- /dev/null +++ b/snowplow_tracker/test/unit/test_subject.py @@ -0,0 +1,116 @@ +# """ +# test_subject.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import unittest +import pytest + +from snowplow_tracker import subject as _subject + + +class TestSubject(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_subject_0(self) -> None: + s = _subject.Subject() + self.assertDictEqual(s.standard_nv_pairs, {"p": _subject.DEFAULT_PLATFORM}) + + s.set_platform("srv") + s.set_user_id("1234") + s.set_screen_resolution(1920, 1080) + s.set_viewport(1080, 1080) + s.set_color_depth(1080) + s.set_timezone("PST") + s.set_lang("EN") + s.set_domain_user_id("domain-user-id") + s.set_domain_session_id("domain-session-id") + s.set_domain_session_index(1) + s.set_ip_address("127.0.0.1") + s.set_useragent("useragent-string") + s.set_network_user_id("network-user-id") + + exp = { + "p": "srv", + "uid": "1234", + "res": "1920x1080", + "vp": "1080x1080", + "cd": 1080, + "tz": "PST", + "lang": "EN", + "ip": "127.0.0.1", + "ua": "useragent-string", + "duid": "domain-user-id", + "sid": "domain-session-id", + "vid": 1, + "tnuid": "network-user-id", + } + self.assertDictEqual(s.standard_nv_pairs, exp) + + def test_subject_1(self) -> None: + s = _subject.Subject().set_platform("srv").set_user_id("1234").set_lang("EN") + + exp = {"p": "srv", "uid": "1234", "lang": "EN"} + self.assertDictEqual(s.standard_nv_pairs, exp) + + with pytest.raises(KeyError): + s.standard_nv_pairs["res"] + with pytest.raises(KeyError): + s.standard_nv_pairs["vp"] + with pytest.raises(KeyError): + s.standard_nv_pairs["cd"] + with pytest.raises(KeyError): + s.standard_nv_pairs["tz"] + with pytest.raises(KeyError): + s.standard_nv_pairs["ip"] + with pytest.raises(KeyError): + s.standard_nv_pairs["ua"] + with pytest.raises(KeyError): + s.standard_nv_pairs["duid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["sid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["vid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["tnuid"] + + def test_combine_subject(self) -> None: + s = _subject.Subject() + s.set_color_depth(10) + s.set_domain_session_id("domain_session_id") + + s2 = _subject.Subject() + s2.set_domain_user_id("domain_user_id") + s2.set_lang("en") + + fin_payload_dict = s.combine_subject(s2) + + expected_fin_payload_dict = { + "p": "pc", + "cd": 10, + "sid": "domain_session_id", + "duid": "domain_user_id", + "lang": "en", + } + + expected_subject = { + "p": "pc", + "cd": 10, + "sid": "domain_session_id", + } + + self.assertDictEqual(fin_payload_dict, expected_fin_payload_dict) + self.assertDictEqual(s.standard_nv_pairs, expected_subject) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index cbf910ea..3009790a 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -1,91 +1,1249 @@ -""" - test_tracker.py +# """ +# test_tracker.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. - - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ import re -import time +import json import unittest +import unittest.mock as mock -from contracts.interface import ContractNotRespected from freezegun import freeze_time +from typing import Any, Optional +from snowplow_tracker.contracts import disable_contracts, enable_contracts from snowplow_tracker.tracker import Tracker -from snowplow_tracker.emitters import Emitter +from snowplow_tracker.tracker import VERSION as TRACKER_VERSION +from snowplow_tracker.subject import Subject +from snowplow_tracker.payload import Payload +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.events import Event, SelfDescribing, ScreenView + +UNSTRUCT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0" +CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" +LINK_CLICK_SCHEMA = "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1" +ADD_TO_CART_SCHEMA = "iglu:com.snowplowanalytics.snowplow/add_to_cart/jsonschema/1-0-0" +REMOVE_FROM_CART_SCHEMA = ( + "iglu:com.snowplowanalytics.snowplow/remove_from_cart/jsonschema/1-0-0" +) +FORM_CHANGE_SCHEMA = "iglu:com.snowplowanalytics.snowplow/change_form/jsonschema/1-0-0" +FORM_SUBMIT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/submit_form/jsonschema/1-0-0" +SITE_SEARCH_SCHEMA = "iglu:com.snowplowanalytics.snowplow/site_search/jsonschema/1-0-0" +MOBILE_SCREEN_VIEW_SCHEMA = ( + "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0" +) +SCREEN_VIEW_SCHEMA = "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0" + +# helpers +_TEST_UUID = "5628c4c6-3f8a-43f8-a09f-6ff68f68dfb6" +geoSchema = "iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0" +geoData = {"latitude": -23.2, "longitude": 43.0} +movSchema = "iglu:com.acme_company/movie_poster/jsonschema/2-1-1" +movData = {"movie": "TestMovie", "year": 2021} + + +def mocked_uuid() -> str: + return _TEST_UUID + + +def mocked_track( + event: Any, + context: Optional[Any] = None, + tstamp: Optional[Any] = None, + event_subject: Optional[Any] = None, +) -> None: + pass + + +def mocked_complete_payload( + event: Any, + event_subject: Optional[Any], + context: Optional[Any], + tstamp: Optional[Any], +) -> Payload: + pass + + +def mocked_track_trans_item(*args: Any, **kwargs: Any) -> None: + pass + + +def mocked_track_unstruct(*args: Any, **kwargs: Any) -> None: + pass + + +class ContractsDisabled(object): + def __enter__(self) -> None: + disable_contracts() + + def __exit__(self, type: Any, value: Any, traceback: Any) -> None: + enable_contracts() class TestTracker(unittest.TestCase): + def create_patch(self, name: str) -> Any: + patcher = mock.patch(name) + thing = patcher.start() + thing.side_effect = mock.MagicMock + self.addCleanup(patcher.stop) + return thing - def setUp(self): + def setUp(self) -> None: pass - def test_initialisation(self): - t = Tracker([Emitter("d3rkrsqld9gmqf.cloudfront.net")], namespace="cloudfront", encode_base64= False, app_id="AF003") - self.assertEquals(t.standard_nv_pairs["tna"], "cloudfront") - self.assertEquals(t.standard_nv_pairs["aid"], "AF003") - self.assertEquals(t.encode_base64, False) + def test_initialisation(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + t = Tracker("cloudfront", [e], encode_base64=False, app_id="AF003") + self.assertEqual(t.standard_nv_pairs["tna"], "cloudfront") + self.assertEqual(t.standard_nv_pairs["aid"], "AF003") + self.assertEqual(t.encode_base64, False) + + def test_initialisation_default_optional(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + t = Tracker("namespace", e) + self.assertEqual(t.emitters, [e]) + self.assertTrue(t.standard_nv_pairs["aid"] is None) + self.assertEqual(t.encode_base64, True) + + def test_initialisation_emitter_list(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() - def test_get_uuid(self): + t = Tracker("namespace", [e1, e2]) + self.assertEqual(t.emitters, [e1, e2]) + + def test_initialisation_error(self) -> None: + with self.assertRaises(ValueError): + Tracker("namespace", []) + + def test_initialization_with_subject(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + s = Subject() + t = Tracker("namespace", e, subject=s) + self.assertIs(t.subject, s) + + def test_get_uuid(self) -> None: eid = Tracker.get_uuid() - self.assertIsNotNone(re.match('[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\Z', eid)) + self.assertIsNotNone( + re.match( + r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\Z", eid + ) + ) + + @freeze_time("1970-01-01 00:00:01") + def test_get_timestamp(self) -> None: + tstamp = Tracker.get_timestamp() + self.assertEqual(tstamp, 1000) # 1970-01-01 00:00:01 in ms + + def test_get_timestamp_1(self) -> None: + tstamp = Tracker.get_timestamp(1399021242030) + self.assertEqual(tstamp, 1399021242030) + + def test_get_timestamp_2(self) -> None: + tstamp = Tracker.get_timestamp(1399021242240.0303) + self.assertEqual(tstamp, 1399021242240) @freeze_time("1970-01-01 00:00:01") - def test_get_timestamp(self): - dtm = Tracker.get_timestamp() - self.assertEquals(dtm, 1000) # 1970-01-01 00:00:01 in ms - - def test_set_timestamp_1(self): - dtm = Tracker.get_timestamp(1399021242030) - self.assertEquals(dtm, 1399021242030) - - def test_set_timestamp_2(self): - dtm = Tracker.get_timestamp(1399021242240.0303) - self.assertEquals(dtm, 1399021242240) - - def test_add_emitter(self): - e1 = Emitter("d3rkrsqld9gmqf.cloudfront.net", method="get") - e2 = Emitter("d3rkrsqld9gmqf.cloudfront.net", method="post") - t = Tracker(e1, namespace="cloudfront", encode_base64=False, app_id="AF003") + def test_get_timestamp_3(self) -> None: + tstamp = Tracker.get_timestamp("1399021242030") # test wrong arg type + self.assertEqual(tstamp, 1000) # 1970-01-01 00:00:01 in ms + + @mock.patch("snowplow_tracker.Tracker.track") + def test_alias_of_track_self_describing_event(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + t = Tracker("namespace", e) + evJson = SelfDescribingJson("test.schema", {"n": "v"}) + # call the alias + t.track_self_describing_event(evJson) + self.assertEqual(mok_track.call_count, 1) + + def test_flush(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + + t = Tracker("namespace", [e1, e2]) + t.flush() + e1.flush.assert_not_called() + self.assertEqual(e1.sync_flush.call_count, 1) + e2.flush.assert_not_called() + self.assertEqual(e2.sync_flush.call_count, 1) + + def test_flush_async(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + + t = Tracker("namespace", [e1, e2]) + t.flush(is_async=True) + self.assertEqual(e1.flush.call_count, 1) + e1.sync_flush.assert_not_called() + self.assertEqual(e2.flush.call_count, 1) + e2.sync_flush.assert_not_called() + + def test_set_subject(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + t = Tracker("namespace", e) + new_subject = Subject() + self.assertIsNot(t.subject, new_subject) + t.set_subject(new_subject) + self.assertIs(t.subject, new_subject) + + def test_add_emitter(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + + t = Tracker("namespace", e1) t.add_emitter(e2) - self.assertEquals(t.emitters, [e1, e2]) - - def test_alias_contract(self): - e1 = Emitter("d3rkrsqld9gmqf.cloudfront.net", method="get") - t = Tracker(e1, namespace="cloudfront", encode_base64=False, app_id="AF003") - try: - t.track_self_describing_event("not-SelfDescribingJson") - except Exception as e: - self.assertIsInstance(e, ContractNotRespected) - - def test_flush_timer(self): - e1 = Emitter("d3rkrsqld9gmqf.cloudfront.net", method="post", buffer_size=10) - t = Tracker(e1, namespace="cloudfront", encode_base64=False, app_id="AF003") - e1.set_flush_timer(3) - t.track_page_view("http://snowplowanalytics.com/blog/2016/09/22/introducing-sauna-a-decisioning-and-response-platform/") - t.track_page_view("http://snowplowanalytics.com/blog/2016/03/17/2015-2016-winternship-wrapup/") - t.track_page_view("http://snowplowanalytics.com/blog/2016/07/31/iglu-r5-scinde-dawk-released/") - self.assertEqual(len(e1.buffer), 3) - time.sleep(4) - self.assertEqual(len(e1.buffer), 0) - t.track_page_view("http://snowplowanalytics.com/blog/2016/03/03/guide-to-debugging-bad-data-in-elasticsearch-kibana/") - t.track_page_view("http://snowplowanalytics.com/blog/2016/03/17/2015-2016-winternship-wrapup/") - self.assertEqual(len(e1.buffer), 2) + self.assertEqual(t.emitters, [e1, e2]) + + ### + # test track and complete payload methods + ### + + def test_track(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + e3 = mokEmitter() + + t = Tracker("namespace", [e1, e2, e3]) + + mok_event = self.create_patch("snowplow_tracker.events.Event") + t.track(mok_event) + mok_payload = mok_event.build_payload().nv_pairs + + e1.input.assert_called_once_with(mok_payload) + e2.input.assert_called_once_with(mok_payload) + e3.input.assert_called_once_with(mok_payload) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload(self, mok_uuid: Any) -> None: + mok_uuid.side_effect = mocked_uuid + + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + t = Tracker("namespace", e) + s = Subject() + event = Event(event_subject=s) + payload = t.complete_payload(event).nv_pairs + + expected = { + "eid": _TEST_UUID, + "dtm": 1618790401000, + "tv": TRACKER_VERSION, + "p": "pc", + "tna": "namespace", + } + self.assertDictEqual(payload, expected) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload_tstamp(self, mok_uuid: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_uuid.side_effect = mocked_uuid + t = Tracker("namespace", e) + s = Subject() + time_in_millis = 100010001000 + event = Event(true_timestamp=time_in_millis, event_subject=s) + + payload = t.complete_payload(event=event).nv_pairs + + expected = { + "tna": "namespace", + "eid": _TEST_UUID, + "dtm": 1618790401000, + "ttm": time_in_millis, + "tv": TRACKER_VERSION, + "p": "pc", + } + self.assertDictEqual(payload, expected) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload_co(self, mok_uuid: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_uuid.side_effect = mocked_uuid + + t = Tracker("namespace", e, encode_base64=False) + + geo_ctx = SelfDescribingJson(geoSchema, geoData) + mov_ctx = SelfDescribingJson(movSchema, movData) + ctx_array = [geo_ctx, mov_ctx] + event = Event(context=ctx_array) + payload = t.complete_payload(event=event).nv_pairs + + expected_co = { + "schema": CONTEXT_SCHEMA, + "data": [ + {"schema": geoSchema, "data": geoData}, + {"schema": movSchema, "data": movData}, + ], + } + self.assertIn("co", payload) + self.assertDictEqual(json.loads(payload["co"]), expected_co) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload_cx(self, mok_uuid: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_uuid.side_effect = mocked_uuid + + t = Tracker("namespace", e, encode_base64=True) + + geo_ctx = SelfDescribingJson(geoSchema, geoData) + mov_ctx = SelfDescribingJson(movSchema, movData) + ctx_array = [geo_ctx, mov_ctx] + event = Event(context=ctx_array) + payload = t.complete_payload(event=event).nv_pairs + + self.assertIn("cx", payload) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload_event_subject(self, mok_uuid: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_uuid.side_effect = mocked_uuid + + t = Tracker("namespace", e) + event_subject = Subject().set_lang("EN").set_user_id("tester") + event = Event(event_subject=event_subject) + payload = t.complete_payload(event=event).nv_pairs + + expected = { + "tna": "namespace", + "eid": _TEST_UUID, + "dtm": 1618790401000, + "tv": TRACKER_VERSION, + "p": "pc", + "lang": "EN", + "uid": "tester", + } + self.assertDictEqual(payload, expected) + + ### + # test track_x methods + ### + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e, encode_base64=False) + event_json = SelfDescribingJson("test.sde.schema", {"n": "v"}) + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json) + self.assertEqual(mok_track.call_count, 1) + + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + # payload + actual_ue_pr = json.loads(actual_pairs["ue_pr"]) + + expectedUePr = { + "data": {"data": {"n": "v"}, "schema": "test.sde.schema"}, + "schema": UNSTRUCT_SCHEMA, + } + + self.assertDictEqual(actual_ue_pr, expectedUePr) + self.assertEqual(actual_pairs["e"], "ue") + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event_all_args(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e, encode_base64=False) + event_json = SelfDescribingJson("test.schema", {"n": "v"}) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + event_context = [ctx] + event_tstamp = 1399021242030 + + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json, event_context, event_tstamp) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + # payload + actualUePr = json.loads(actual_pairs["ue_pr"]) + + expectedUePr = { + "data": {"data": {"n": "v"}, "schema": "test.schema"}, + "schema": UNSTRUCT_SCHEMA, + } + + self.assertDictEqual(actualUePr, expectedUePr) + self.assertEqual(actual_pairs["e"], "ue") + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event_encode(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e, encode_base64=True) + event_json = SelfDescribingJson("test.sde.schema", {"n": "v"}) + + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + self.assertTrue("ue_px" in actual_pairs.keys()) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_struct_event(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + ev_tstamp = 1399021242030 + t.track_struct_event( + "Mixes", + "Play", + "Test", + "TestProp", + value=3.14, + context=[ctx], + tstamp=ev_tstamp, + ) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + actual_payload_arg = complete_args_dict["event"].payload + actual_pairs = actual_payload_arg.nv_pairs + + expected_pairs = { + "e": "se", + "se_ca": "Mixes", + "se_ac": "Play", + "se_la": "Test", + "se_pr": "TestProp", + "se_va": 3.14, + } + self.assertDictEqual(actual_pairs, expected_pairs) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_page_view(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + ev_tstamp = 1399021242030 + t.track_page_view( + "example.com", + "Example", + "docs.snowplow.io", + context=[ctx], + tstamp=ev_tstamp, + ) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + actual_payload_arg = complete_args_dict["event"].payload + actualPairs = actual_payload_arg.nv_pairs + + expectedPairs = { + "e": "pv", + "url": "example.com", + "page": "Example", + "refr": "docs.snowplow.io", + } + self.assertDictEqual(actualPairs, expectedPairs) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_page_ping(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + ev_tstamp = 1399021242030 + t.track_page_ping( + "example.com", + "Example", + "docs.snowplow.io", + 0, + 1, + 2, + 3, + context=[ctx], + tstamp=ev_tstamp, + ) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + actual_payload_arg = complete_args_dict["event"].payload + actual_pairs = actual_payload_arg.nv_pairs + + expectedPairs = { + "e": "pp", + "url": "example.com", + "page": "Example", + "refr": "docs.snowplow.io", + "pp_mix": 0, + "pp_max": 1, + "pp_miy": 2, + "pp_may": 3, + } + self.assertDictEqual(actual_pairs, expectedPairs) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_item(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + ev_tstamp = 1399021242030 + t.track_ecommerce_transaction_item( + order_id="1234", + sku="sku1234", + price=3.14, + quantity=1, + name="itemName", + category="itemCategory", + currency="itemCurrency", + context=[ctx], + tstamp=ev_tstamp, + ) + self.assertEqual(mok_track.call_count, 1) + complete_args_list = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_list), 1) + + actual_payload_arg = complete_args_list["event"].payload + actual_pairs = actual_payload_arg.nv_pairs + + expectedPairs = { + "e": "ti", + "ti_id": "1234", + "ti_sk": "sku1234", + "ti_nm": "itemName", + "ti_ca": "itemCategory", + "ti_pr": 3.14, + "ti_qu": 1, + "ti_cu": "itemCurrency", + } + self.assertDictEqual(actual_pairs, expectedPairs) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_no_items(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + t.track_ecommerce_transaction( + "1234", + 10, + "transAffiliation", + 2.5, + 1.5, + "transCity", + "transState", + "transCountry", + "transCurrency", + context=[ctx], + tstamp=evTstamp, + ) + self.assertEqual(mok_track.call_count, 1) + completeArgsList = mok_track.call_args_list[0][1] + self.assertEqual(len(completeArgsList), 1) + + actualPayloadArg = completeArgsList["event"].payload + actualPairs = actualPayloadArg.nv_pairs + + expectedPairs = { + "e": "tr", + "tr_id": "1234", + "tr_tt": 10, + "tr_af": "transAffiliation", + "tr_tx": 2.5, + "tr_sh": 1.5, + "tr_ci": "transCity", + "tr_st": "transState", + "tr_co": "transCountry", + "tr_cu": "transCurrency", + } + self.assertDictEqual(actualPairs, expectedPairs) + + @mock.patch("snowplow_tracker.Tracker.track_ecommerce_transaction_item") + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_with_items( + self, mok_track: Any, mok_track_trans_item: Any + ) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + mok_track_trans_item.side_effect = mocked_track_trans_item + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + transItems = [ + {"sku": "sku1234", "quantity": 3, "price": 3.14}, + {"sku": "sku5678", "quantity": 1, "price": 2.72}, + ] + t.track_ecommerce_transaction( + order_id="1234", + total_value=10, + affiliation="transAffiliation", + tax_value=2.5, + shipping=1.5, + city="transCity", + state="transState", + country="transCountry", + currency="transCurrency", + items=transItems, + context=[ctx], + tstamp=evTstamp, + ) + + # Transaction + callCompleteArgsList = mok_track.call_args_list + firstCallArgsList = callCompleteArgsList[0][1] + self.assertEqual(len(firstCallArgsList), 1) + + actualPayloadArg = firstCallArgsList["event"].payload + actualPairs = actualPayloadArg.nv_pairs + + expectedTransPairs = { + "e": "tr", + "tr_id": "1234", + "tr_tt": 10, + "tr_af": "transAffiliation", + "tr_tx": 2.5, + "tr_sh": 1.5, + "tr_ci": "transCity", + "tr_st": "transState", + "tr_co": "transCountry", + "tr_cu": "transCurrency", + } + self.assertDictEqual(actualPairs, expectedTransPairs) + + # Items + calls_to_track_trans_item = mok_track_trans_item.call_count + self.assertEqual(calls_to_track_trans_item, 2) + callTrackItemsArgsList = mok_track_trans_item.call_args_list + # 1st item + firstItemCallArgs = callTrackItemsArgsList[0][0] + self.assertEqual((), firstItemCallArgs) + firstItemCallKwargs = callTrackItemsArgsList[0][1] + + expectedFirstItemPairs = { + "sku": "sku1234", + "quantity": 3, + "price": 3.14, + "order_id": "1234", + "currency": "transCurrency", + "tstamp": evTstamp, + "event_subject": None, + "context": [ctx], + } + + self.assertDictEqual(firstItemCallKwargs, expectedFirstItemPairs) + # 2nd item + secItemCallArgs = callTrackItemsArgsList[1][0] + self.assertEqual((), secItemCallArgs) + secItemCallKwargs = callTrackItemsArgsList[1][1] + + expectedSecItemPairs = { + "sku": "sku5678", + "quantity": 1, + "price": 2.72, + "order_id": "1234", + "currency": "transCurrency", + "tstamp": evTstamp, + "event_subject": None, + "context": [ctx], + } + + self.assertDictEqual(secItemCallKwargs, expectedSecItemPairs) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_link_click(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_link_click( + "example.com", + "elemId", + ["elemClass1", "elemClass2"], + "_blank", + "elemContent", + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": LINK_CLICK_SCHEMA, + "data": { + "targetUrl": "example.com", + "elementId": "elemId", + "elementClasses": ["elemClass1", "elemClass2"], + "elementTarget": "_blank", + "elementContent": "elemContent", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + + t.track_link_click("example.com") + + expected = { + "schema": LINK_CLICK_SCHEMA, + "data": { + "targetUrl": "example.com", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_add_to_cart( + "sku1234", + 3, + "testName", + "testCategory", + 3.14, + "testCurrency", + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": ADD_TO_CART_SCHEMA, + "data": { + "sku": "sku1234", + "quantity": 3, + "name": "testName", + "category": "testCategory", + "unitPrice": 3.14, + "currency": "testCurrency", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + + t.track_add_to_cart("sku1234", 1) + + expected = { + "schema": ADD_TO_CART_SCHEMA, + "data": {"sku": "sku1234", "quantity": 1}, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_remove_from_cart( + "sku1234", + 3, + "testName", + "testCategory", + 3.14, + "testCurrency", + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": REMOVE_FROM_CART_SCHEMA, + "data": { + "sku": "sku1234", + "quantity": 3, + "name": "testName", + "category": "testCategory", + "unitPrice": 3.14, + "currency": "testCurrency", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_remove_from_cart_optional_none( + self, mok_track_unstruct: Any + ) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + + t.track_remove_from_cart("sku1234", 1) + + expected = { + "schema": REMOVE_FROM_CART_SCHEMA, + "data": {"sku": "sku1234", "quantity": 1}, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_change(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_form_change( + "testFormId", + "testElemId", + "INPUT", + "testValue", + "text", + ["testClass1", "testClass2"], + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": FORM_CHANGE_SCHEMA, + "data": { + "formId": "testFormId", + "elementId": "testElemId", + "nodeName": "INPUT", + "value": "testValue", + "type": "text", + "elementClasses": ["testClass1", "testClass2"], + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + t.track_form_change("testFormId", "testElemId", "INPUT", "testValue") + + expected = { + "schema": FORM_CHANGE_SCHEMA, + "data": { + "formId": "testFormId", + "elementId": "testElemId", + "nodeName": "INPUT", + "value": "testValue", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + elems = [ + { + "name": "user_email", + "value": "fake@email.fake", + "nodeName": "INPUT", + "type": "email", + } + ] + + t.track_form_submit( + "testFormId", + ["testClass1", "testClass2"], + elems, + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": FORM_SUBMIT_SCHEMA, + "data": { + "formId": "testFormId", + "formClasses": ["testClass1", "testClass2"], + "elements": elems, + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit_invalid_element_type( + self, mok_track_unstruct: Any + ) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + elems = [ + { + "name": "user_email", + "value": "fake@email.fake", + "nodeName": "INPUT", + "type": "invalid", + } + ] + + with self.assertRaises(ValueError): + t.track_form_submit( + "testFormId", + ["testClass1", "testClass2"], + elems, + context=[ctx], + tstamp=evTstamp, + ) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit_invalid_element_type_disabled_contracts( + self, mok_track_unstruct: Any + ) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + elems = [ + { + "name": "user_email", + "value": "fake@email.fake", + "nodeName": "INPUT", + "type": "invalid", + } + ] + + with ContractsDisabled(): + t.track_form_submit( + "testFormId", + ["testClass1", "testClass2"], + elems, + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": FORM_SUBMIT_SCHEMA, + "data": { + "formId": "testFormId", + "formClasses": ["testClass1", "testClass2"], + "elements": elems, + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + t.track_form_submit("testFormId") + + expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + t.track_form_submit("testFormId", elements=[]) + + expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_site_search(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_site_search( + ["track", "search"], {"new": True}, 100, 10, context=[ctx], tstamp=evTstamp + ) + + expected = { + "schema": SITE_SEARCH_SCHEMA, + "data": { + "terms": ["track", "search"], + "filters": {"new": True}, + "totalResults": 100, + "pageResults": 10, + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + t.track_site_search(["track", "search"]) + + expected = { + "schema": SITE_SEARCH_SCHEMA, + "data": {"terms": ["track", "search"]}, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_mobile_screen_view(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + + screen_view = ScreenView(name="screenName", id_="screenId") + actual_pairs = screen_view.build_payload( + encode_base64=False, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track(screen_view) + + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][0] + self.assertEqual(len(complete_args_dict), 1) + actual_ue_pr = json.loads(actual_pairs["ue_pr"]) + + expected = { + "schema": MOBILE_SCREEN_VIEW_SCHEMA, + "data": {"id": "screenId", "name": "screenName"}, + } + + complete_args_dict = mok_track.call_args_list[0][1] + complete_args_dict = mok_track.call_args_list[0][1] + self.assertDictEqual(actual_ue_pr["data"], expected) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_screen_view(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_screen_view("screenName", "screenId", context=[ctx], tstamp=evTstamp) + expected = { + "schema": SCREEN_VIEW_SCHEMA, + "data": {"name": "screenName", "id": "screenId"}, + } + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) diff --git a/snowplow_tracker/timestamp.py b/snowplow_tracker/timestamp.py deleted file mode 100644 index 47080d99..00000000 --- a/snowplow_tracker/timestamp.py +++ /dev/null @@ -1,65 +0,0 @@ -""" - self_describing_json.py - - Copyright (c) 2013-2016 Snowplow Analytics Ltd. All rights reserved. - - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. - - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun, Anton Parkhomenko - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" - -from contracts import contract, new_contract - -new_contract("ts_type", lambda x: x == "ttm" or x == "dtm") - - -class Timestamp(object): - @contract - def __init__(self, ts_type, value): - """ - Construct base timestamp type - - :param ts_type: one of possible timestamp types, according to - tracker protocol - :type ts_type: ts_type - :param value: timestamp value in milliseconds - :type value: int - """ - self.ts_type = ts_type - self.value = value - - -class TrueTimestamp(Timestamp): - @contract - def __init__(self, value): - """ - Construct true_timestamp (ttm) - - :param value: timestamp value in milliseconds - :type value: int - """ - super(TrueTimestamp, self).__init__("ttm", value) - - -class DeviceTimestamp(Timestamp): - @contract - def __init__(self, value): - """ - Construct device_timestamp (dtm) - - :param value: timestamp value in milliseconds - :type value: int - """ - super(DeviceTimestamp, self).__init__("dtm", value) - diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index a6830a68..4dc489dc 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -1,51 +1,51 @@ -""" - tracker.py - - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# """ +# tracker.py - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ import time import uuid -import six - -from contracts import contract, new_contract - -from snowplow_tracker import payload, _version, SelfDescribingJson -from snowplow_tracker import subject as _subject -from snowplow_tracker.timestamp import Timestamp, TrueTimestamp, DeviceTimestamp - - -""" -Constants & config -""" +from typing import Any, Optional, Union, List, Dict, Sequence +from warnings import warn + +from snowplow_tracker import payload, SelfDescribingJson +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string, one_of, non_empty, form_element +from snowplow_tracker.constants import ( + VERSION, + DEFAULT_ENCODE_BASE64, + BASE_SCHEMA_PATH, + SCHEMA_TAG, +) -VERSION = "py-%s" % _version.__version__ -DEFAULT_ENCODE_BASE64 = True -BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" -SCHEMA_TAG = "jsonschema" -CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) -UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) -FORM_NODE_NAMES = ("INPUT", "TEXTAREA", "SELECT") -FORM_TYPES = ( - "button", "checkbox", "color", "date", "datetime", - "datetime-local", "email", "file", "hidden", "image", "month", - "number", "password", "radio", "range", "reset", "search", - "submit", "tel", "text", "time", "url", "week" +from snowplow_tracker.events import ( + Event, + PagePing, + PageView, + SelfDescribing, + StructuredEvent, + ScreenView, +) +from snowplow_tracker.typing import ( + JsonEncoderFunction, + EmitterProtocol, + FORM_NODE_NAMES, + FORM_TYPES, + FormNodeName, + ElementClasses, + FormClasses, ) """ @@ -54,218 +54,253 @@ class Tracker: - - new_contract("not_none", lambda s: s is not None) - - new_contract("non_empty_string", lambda s: isinstance(s, six.string_types) - and len(s) > 0) - new_contract("string_or_none", lambda s: (isinstance(s, six.string_types) - and len(s) > 0) or s is None) - new_contract("payload", lambda s: isinstance(s, payload.Payload)) - - new_contract("tracker", lambda s: isinstance(s, Tracker)) - - new_contract("emitter", lambda s: hasattr(s, "input")) - - new_contract("self_describing_json", lambda s: isinstance(s, SelfDescribingJson)) - - new_contract("context_array", "list(self_describing_json)") - - new_contract("form_node_name", lambda s: s in FORM_NODE_NAMES) - - new_contract("form_type", lambda s: s.lower() in FORM_TYPES) - - new_contract("timestamp", lambda x: (isinstance(x, Timestamp))) - - new_contract("form_element", lambda x: Tracker.check_form_element(x)) - - @contract - def __init__(self, emitters, subject=None, - namespace=None, app_id=None, encode_base64=DEFAULT_ENCODE_BASE64): + def __init__( + self, + namespace: str, + emitters: Union[List[EmitterProtocol], EmitterProtocol], + subject: Optional[Subject] = None, + app_id: Optional[str] = None, + encode_base64: bool = DEFAULT_ENCODE_BASE64, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: """ - :param emitters: Emitters to which events will be sent - :type emitters: list[>0](emitter) | emitter - :param subject: Subject to be tracked - :type subject: subject | None - :param namespace: Identifier for the Tracker instance - :type namespace: string_or_none - :param app_id: Application ID - :type app_id: string_or_none - :param encode_base64: Whether JSONs in the payload should be base-64 encoded - :type encode_base64: bool + :param namespace: Identifier for the Tracker instance + :type namespace: string + :param emitters: Emitters to which events will be sent + :type emitters: list[>0](emitter) | emitter + :param subject: Subject to be tracked + :type subject: subject | None + :param app_id: Application ID + :type app_id: string_or_none + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ if subject is None: - subject = _subject.Subject() + subject = Subject() - if type(emitters) is list: + if isinstance(emitters, list): + non_empty(emitters) self.emitters = emitters else: self.emitters = [emitters] - self.subject = subject + self.subject: Optional[Subject] = subject self.encode_base64 = encode_base64 + self.json_encoder = json_encoder - self.standard_nv_pairs = { - "tv": VERSION, - "tna": namespace, - "aid": app_id - } + self.standard_nv_pairs = {"tv": VERSION, "tna": namespace, "aid": app_id} self.timer = None @staticmethod - @contract - def get_uuid(): + def get_uuid() -> str: """ - Set transaction ID for the payload once during the lifetime of the - event. + Set transaction ID for the payload once during the lifetime of the + event. - :rtype: string + :rtype: string """ return str(uuid.uuid4()) @staticmethod - @contract - def get_timestamp(tstamp=None): + def get_timestamp(tstamp: Optional[float] = None) -> int: """ - :param tstamp: User-input timestamp or None - :type tstamp: int | float | None - :rtype: int + :param tstamp: User-input timestamp or None + :type tstamp: int | float | None + :rtype: int """ - if tstamp is None: - return int(time.time() * 1000) - elif isinstance(tstamp, (int, float, )): + if isinstance( + tstamp, + ( + int, + float, + ), + ): return int(tstamp) - + return int(time.time() * 1000) """ Tracking methods """ - @contract - def track(self, pb): + def track( + self, + event: Event, + ) -> Optional[str]: """ - Send the payload to a emitter - - :param pb: Payload builder - :type pb: payload - :rtype: tracker + Send the event payload to a emitter. Returns the tracked event ID. + :param event: Event + :type event: events.Event + :rtype: String """ + + payload = self.complete_payload( + event=event, + ) + for emitter in self.emitters: - emitter.input(pb.nv_pairs) + emitter.input(payload.nv_pairs) + + if "eid" in payload.nv_pairs.keys(): + return payload.nv_pairs["eid"] + + return None + + def complete_payload( + self, + event: Event, + ) -> payload.Payload: + payload = event.build_payload( + encode_base64=self.encode_base64, + json_encoder=self.json_encoder, + subject=self.subject, + ) + + payload.add("eid", Tracker.get_uuid()) + payload.add("dtm", Tracker.get_timestamp()) + payload.add_dict(self.standard_nv_pairs) + + return payload + + def track_page_view( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_page_view will be removed in future versions. Please use the new PageView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + + pv = PageView( + page_url=page_url, + page_title=page_title, + referrer=referrer, + event_subject=event_subject, + context=context, + true_timestamp=tstamp, + ) + + self.track(event=pv) + return self + + def track_page_ping( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_page_ping will be removed in future versions. Please use the new PagePing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + + pp = PagePing( + page_url=page_url, + page_title=page_title, + referrer=referrer, + min_x=min_x, + max_x=max_x, + min_y=min_y, + max_y=max_y, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + + self.track(event=pp) return self - @contract - def complete_payload(self, pb, context, tstamp): - """ - Called by all tracking events to add the standard name-value pairs - to the Payload object irrespective of the tracked event. - - :param pb: Payload builder - :type pb: payload - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker - """ - pb.add("eid", Tracker.get_uuid()) - - if isinstance(tstamp, TrueTimestamp): - pb.add("ttm", tstamp.value) - if isinstance(tstamp, DeviceTimestamp): - pb.add("dtm", Tracker.get_timestamp(tstamp.value)) - elif isinstance(tstamp, (int, float, type(None))): - pb.add("dtm", Tracker.get_timestamp(tstamp)) - - if context is not None: - context_jsons = list(map(lambda c: c.to_json(), context)) - context_envelope = SelfDescribingJson(CONTEXT_SCHEMA, context_jsons).to_json() - pb.add_json(context_envelope, self.encode_base64, "cx", "co") - - pb.add_dict(self.standard_nv_pairs) - - pb.add_dict(self.subject.standard_nv_pairs) - - return self.track(pb) - - @contract - def track_page_view(self, page_url, page_title=None, referrer=None, context=None, tstamp=None): - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker - """ - pb = payload.Payload() - pb.add("e", "pv") # pv: page view - pb.add("url", page_url) - pb.add("page", page_title) - pb.add("refr", referrer) - - return self.complete_payload(pb, context, tstamp) - - @contract - def track_page_ping(self, page_url, page_title=None, referrer=None, min_x=None, max_x=None, min_y=None, max_y=None, context=None, tstamp=None): - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - :param min_x: Minimum page x offset seen in the last ping period - :type min_x: int | None - :param max_x: Maximum page x offset seen in the last ping period - :type max_x: int | None - :param min_y: Minimum page y offset seen in the last ping period - :type min_y: int | None - :param max_y: Maximum page y offset seen in the last ping period - :type max_y: int | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker - """ - pb = payload.Payload() - pb.add("e", "pp") # pp: page ping - pb.add("url", page_url) - pb.add("page", page_title) - pb.add("refr", referrer) - pb.add("pp_mix", min_x) - pb.add("pp_max", max_x) - pb.add("pp_miy", min_y) - pb.add("pp_may", max_y) - - return self.complete_payload(pb, context, tstamp) - - @contract - def track_link_click(self, target_url, element_id=None, - element_classes=None, element_target=None, - element_content=None, context=None, tstamp=None): - """ - :param target_url: Target URL of the link - :type target_url: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,*) | None - :param element_content: The content of the HTML element - :type element_content: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker - """ - properties = {} + def track_link_click( + self, + target_url: str, + element_id: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + element_target: Optional[str] = None, + element_content: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param target_url: Target URL of the link + :type target_url: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param element_target: ID attribute of the HTML element + :type element_target: string_or_none + :param element_content: The content of the HTML element + :type element_content: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_link_click will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(target_url) + + properties: Dict[str, Union[str, ElementClasses]] = {} properties["targetUrl"] = target_url if element_id is not None: properties["elementId"] = element_id @@ -276,34 +311,59 @@ def track_link_click(self, target_url, element_id=None, if element_content is not None: properties["elementContent"] = element_content - event_json = SelfDescribingJson("%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) - - return self.track_unstruct_event(event_json, context, tstamp) - - @contract - def track_add_to_cart(self, sku, quantity, name=None, category=None, - unit_price=None, currency=None, context=None, - tstamp=None): - """ - :param sku: Item SKU or ID - :type sku: non_empty_string - :param quantity: Number added to cart - :type quantity: int - :param name: Item's name - :type name: string_or_none - :param category: Item's category - :type category: string_or_none - :param unit_price: Item's price - :type unit_price: int | float | None - :param currency: Type of currency the price is in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker - """ - properties = {} + event_json = SelfDescribingJson( + "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_add_to_cart( + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_add_to_cart will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(sku) + + properties: Union[Dict[str, Union[str, float, int]]] = {} properties["sku"] = sku properties["quantity"] = quantity if name is not None: @@ -315,34 +375,59 @@ def track_add_to_cart(self, sku, quantity, name=None, category=None, if currency is not None: properties["currency"] = currency - event_json = SelfDescribingJson("%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) - - return self.track_unstruct_event(event_json, context, tstamp) - - @contract - def track_remove_from_cart(self, sku, quantity, name=None, category=None, - unit_price=None, currency=None, context=None, - tstamp=None): - """ - :param sku: Item SKU or ID - :type sku: non_empty_string - :param quantity: Number added to cart - :type quantity: int - :param name: Item's name - :type name: string_or_none - :param category: Item's category - :type category: string_or_none - :param unit_price: Item's price - :type unit_price: int | float | None - :param currency: Type of currency the price is in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker - """ - properties = {} + event_json = SelfDescribingJson( + "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_remove_from_cart( + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_remove_from_cart will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(sku) + + properties: Dict[str, Union[str, float, int]] = {} properties["sku"] = sku properties["quantity"] = quantity if name is not None: @@ -354,33 +439,63 @@ def track_remove_from_cart(self, sku, quantity, name=None, category=None, if currency is not None: properties["currency"] = currency - event_json = SelfDescribingJson("%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - return self.track_unstruct_event(event_json, context, tstamp) + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self - @contract - def track_form_change(self, form_id, element_id, node_name, value, type_=None, - element_classes=None, context=None, tstamp=None): + def track_form_change( + self, + form_id: str, + element_id: Optional[str], + node_name: FormNodeName, + value: Optional[str], + type_: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param node_name: Type of input element - :type node_name: form_node_name - :param value: Value of the input element - :type value: string_or_none - :param type_: Type of data the element represents - :type type_: non_empty_string, form_type + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param node_name: Type of input element + :type node_name: form_node_name + :param value: Value of the input element + :type value: string_or_none + :param type_: Type of data the element represents + :type type_: non_empty_string, form_type :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,*) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker + :type element_classes: list(str) | tuple(str,\\*) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - properties = dict() + warn( + "track_form_change will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + + non_empty_string(form_id) + one_of(node_name, FORM_NODE_NAMES) + if type_ is not None: + one_of(type_.lower(), FORM_TYPES) + + properties: Dict[str, Union[Optional[str], ElementClasses]] = dict() properties["formId"] = form_id properties["elementId"] = element_id properties["nodeName"] = node_name @@ -390,57 +505,110 @@ def track_form_change(self, form_id, element_id, node_name, value, type_=None, if element_classes is not None: properties["elementClasses"] = element_classes - event_json = SelfDescribingJson("%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - return self.track_unstruct_event(event_json, context, tstamp) + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self - @contract - def track_form_submit(self, form_id, form_classes=None, elements=None, - context=None, tstamp=None): + def track_form_submit( + self, + form_id: str, + form_classes: Optional[FormClasses] = None, + elements: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param form_classes: Classes of the HTML form - :type form_classes: list(str) | tuple(str,*) | None - :param elements: Classes of the HTML form - :type elements: list(form_element) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param form_classes: Classes of the HTML form + :type form_classes: list(str) | tuple(str,\\*) | None + :param elements: Classes of the HTML form + :type elements: list(form_element) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - - properties = dict() - properties['formId'] = form_id + warn( + "track_form_submit will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(form_id) + + for element in elements or []: + form_element(element) + + properties: Dict[ + str, Union[str, ElementClasses, FormClasses, List[Dict[str, Any]]] + ] = dict() + properties["formId"] = form_id if form_classes is not None: - properties['formClasses'] = form_classes + properties["formClasses"] = form_classes if elements is not None and len(elements) > 0: - properties['elements'] = elements - - event_json = SelfDescribingJson("%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) - - return self.track_unstruct_event(event_json, context, tstamp) - - @contract - def track_site_search(self, terms, filters=None, total_results=None, - page_results=None, context=None, tstamp=None): - """ - :param terms: Search terms - :type terms: seq[>=1](str) - :param filters: Filters applied to the search - :type filters: dict(str:str|bool) | None - :param total_results: Total number of results returned - :type total_results: int | None - :param page_results: Total number of pages of results - :type page_results: int | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional user-provided timestamp for the event - :type tstamp: timestamp | int | float | None - :rtype: tracker - """ - properties = {} + properties["elements"] = elements + + event_json = SelfDescribingJson( + "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_site_search( + self, + terms: Sequence[str], + filters: Optional[Dict[str, Union[str, bool]]] = None, + total_results: Optional[int] = None, + page_results: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param terms: Search terms + :type terms: seq[>=1](str) + :param filters: Filters applied to the search + :type filters: dict(str:str|bool) | None + :param total_results: Total number of results returned + :type total_results: int | None + :param page_results: Total number of pages of results + :type page_results: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_site_search will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + non_empty(terms) + + properties: Dict[ + str, Union[Sequence[str], Dict[str, Union[str, bool]], int] + ] = {} properties["terms"] = terms if filters is not None: properties["filters"] = filters @@ -449,234 +617,435 @@ def track_site_search(self, terms, filters=None, total_results=None, if page_results is not None: properties["pageResults"] = page_results - event_json = SelfDescribingJson("%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) - - return self.track_unstruct_event(event_json, context, tstamp) - - @contract - def track_ecommerce_transaction_item(self, order_id, sku, price, quantity, - name=None, category=None, currency=None, - context=None, - tstamp=None): - """ - This is an internal method called by track_ecommerce_transaction. - It is not for public use. - - :param order_id: Order ID - :type order_id: non_empty_string - :param sku: Item SKU - :type sku: non_empty_string - :param price: Item price - :type price: int | float - :param quantity: Item quantity - :type quantity: int - :param name: Item name - :type name: string_or_none - :param category: Item category - :type category: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :rtype: tracker - """ - pb = payload.Payload() - pb.add("e", "ti") - pb.add("ti_id", order_id) - pb.add("ti_sk", sku) - pb.add("ti_nm", name) - pb.add("ti_ca", category) - pb.add("ti_pr", price) - pb.add("ti_qu", quantity) - pb.add("ti_cu", currency) - - return self.complete_payload(pb, context, tstamp) - - @contract - def track_ecommerce_transaction(self, order_id, total_value, - affiliation=None, tax_value=None, shipping=None, - city=None, state=None, country=None, currency=None, - items=None, - context=None, tstamp=None): - """ - :param order_id: ID of the eCommerce transaction - :type order_id: non_empty_string - :param total_value: Total transaction value - :type total_value: int | float - :param affiliation: Transaction affiliation - :type affiliation: string_or_none - :param tax_value: Transaction tax value - :type tax_value: int | float | None - :param shipping: Delivery cost charged - :type shipping: int | float | None - :param city: Delivery address city - :type city: string_or_none - :param state: Delivery address state - :type state: string_or_none - :param country: Delivery address country - :type country: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param items: The items in the transaction - :type items: list(dict(str:*)) - :param context: Custom context for the event - :type context: context_array | None - :rtype: tracker - """ - pb = payload.Payload() - pb.add("e", "tr") - pb.add("tr_id", order_id) - pb.add("tr_tt", total_value) - pb.add("tr_af", affiliation) - pb.add("tr_tx", tax_value) - pb.add("tr_sh", shipping) - pb.add("tr_ci", city) - pb.add("tr_st", state) - pb.add("tr_co", country) - pb.add("tr_cu", currency) + event_json = SelfDescribingJson( + "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_ecommerce_transaction_item( + self, + order_id: str, + sku: str, + price: float, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + This is an internal method called by track_ecommerce_transaction. + It is not for public use. + + :param order_id: Order ID + :type order_id: non_empty_string + :param sku: Item SKU + :type sku: non_empty_string + :param price: Item price + :type price: int | float + :param quantity: Item quantity + :type quantity: int + :param name: Item name + :type name: string_or_none + :param category: Item category + :type category: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_ecommerce_transaction_item will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(order_id) + non_empty_string(sku) + + event = Event( + event_subject=event_subject, context=context, true_timestamp=tstamp + ) + event.payload.add("e", "ti") + event.payload.add("ti_id", order_id) + event.payload.add("ti_sk", sku) + event.payload.add("ti_nm", name) + event.payload.add("ti_ca", category) + event.payload.add("ti_pr", price) + event.payload.add("ti_qu", quantity) + event.payload.add("ti_cu", currency) + + self.track(event=event) + return self + + def track_ecommerce_transaction( + self, + order_id: str, + total_value: float, + affiliation: Optional[str] = None, + tax_value: Optional[float] = None, + shipping: Optional[float] = None, + city: Optional[str] = None, + state: Optional[str] = None, + country: Optional[str] = None, + currency: Optional[str] = None, + items: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param order_id: ID of the eCommerce transaction + :type order_id: non_empty_string + :param total_value: Total transaction value + :type total_value: int | float + :param affiliation: Transaction affiliation + :type affiliation: string_or_none + :param tax_value: Transaction tax value + :type tax_value: int | float | None + :param shipping: Delivery cost charged + :type shipping: int | float | None + :param city: Delivery address city + :type city: string_or_none + :param state: Delivery address state + :type state: string_or_none + :param country: Delivery address country + :type country: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param items: The items in the transaction + :type items: list(dict(str:\\*)) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_ecommerce_transaction will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(order_id) + + event = Event( + event_subject=event_subject, context=context, true_timestamp=tstamp + ) + event.payload.add("e", "tr") + event.payload.add("tr_id", order_id) + event.payload.add("tr_tt", total_value) + event.payload.add("tr_af", affiliation) + event.payload.add("tr_tx", tax_value) + event.payload.add("tr_sh", shipping) + event.payload.add("tr_ci", city) + event.payload.add("tr_st", state) + event.payload.add("tr_co", country) + event.payload.add("tr_cu", currency) tstamp = Tracker.get_timestamp(tstamp) - self.complete_payload(pb, context, tstamp) + self.track(event=event) + if items is None: + items = [] for item in items: - item["tstamp"] = tstamp item["order_id"] = order_id item["currency"] = currency + item["tstamp"] = tstamp + item["event_subject"] = event_subject + item["context"] = context self.track_ecommerce_transaction_item(**item) return self - @contract - def track_screen_view(self, name=None, id_=None, context=None, tstamp=None): + def track_screen_view( + self, + name: Optional[str] = None, + id_: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param name: The name of the screen view event - :type name: string_or_none - :param id_: Screen view ID - :type id_: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :rtype: tracker + :param name: The name of the screen view event + :type name: string_or_none + :param id_: Screen view ID + :type id_: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_screen_view will be removed in future versions. Please use the new ScreenView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) screen_view_properties = {} if name is not None: screen_view_properties["name"] = name if id_ is not None: screen_view_properties["id"] = id_ - event_json = SelfDescribingJson("%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), screen_view_properties) - - return self.track_unstruct_event(event_json, context, tstamp) + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), + screen_view_properties, + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self - @contract - def track_struct_event(self, category, action, label=None, property_=None, value=None, - context=None, - tstamp=None): + def track_mobile_screen_view( + self, + name: str, + id_: Optional[str] = None, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param category: Category of the event - :type category: non_empty_string - :param action: The event itself - :type action: non_empty_string - :param label: Refer to the object the action is - performed on - :type label: string_or_none - :param property_: Property associated with either the action - or the object - :type property_: string_or_none - :param value: A value associated with the user action - :type value: int | float | None - :param context: Custom context for the event - :type context: context_array | None - :rtype: tracker + :param name: The name of the screen view event + :type name: string_or_none + :param id_: Screen view ID. This must be of type UUID. + :type id_: string | None + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - pb = payload.Payload() - pb.add("e", "se") - pb.add("se_ca", category) - pb.add("se_ac", action) - pb.add("se_la", label) - pb.add("se_pr", property_) - pb.add("se_va", value) - - return self.complete_payload(pb, context, tstamp) + warn( + "track_mobile_screen_view will be removed in future versions. Please use the new ScreenView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + if id_ is None: + id_ = self.get_uuid() + + sv = ScreenView( + name=name, + id_=id_, + type=type, + previous_name=previous_name, + previous_id=previous_id, + previous_type=previous_type, + transition_type=transition_type, + event_subject=event_subject, + context=context, + true_timestamp=tstamp, + ) + + self.track(event=sv) + return self - @contract - def track_unstruct_event(self, event_json, context=None, tstamp=None): + def track_struct_event( + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[Union[int, float]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param event_json: The properties of the event. Has two field: - A "data" field containing the event properties and - A "schema" field identifying the schema against which the data is validated - :type event_json: self_describing_json - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: User-set timestamp - :type tstamp: timestamp | int | None - :rtype: tracker + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ + warn( + "track_struct_event will be removed in future versions. Please use the new Structured class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + se = StructuredEvent( + category=category, + action=action, + label=label, + property_=property_, + value=value, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + + self.track( + event=se, + ) + return self - envelope = SelfDescribingJson(UNSTRUCT_EVENT_SCHEMA, event_json.to_json()).to_json() - - pb = payload.Payload() - - pb.add("e", "ue") - pb.add_json(envelope, self.encode_base64, "ue_px", "ue_pr") - - return self.complete_payload(pb, context, tstamp) + def track_self_describing_event( + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_self_describing_event will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + + sd = SelfDescribing( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + self.track( + event=sd, + ) + return self # Alias - track_self_describing_event = track_unstruct_event + def track_unstruct_event( + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.", + DeprecationWarning, + stacklevel=2, + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self - @contract - def flush(self, async=False): + def flush(self, is_async: bool = False) -> "Tracker": """ - Flush the emitter + Flush the emitter - :param async: Whether the flush is done asynchronously. Default is False - :type async: bool - :rtype: tracker + :param is_async: Whether the flush is done asynchronously. Default is False + :type is_async: bool + :rtype: tracker """ for emitter in self.emitters: - if async: - emitter.flush() + if is_async: + if hasattr(emitter, "flush"): + emitter.flush() else: - emitter.sync_flush() + if hasattr(emitter, "sync_flush"): + emitter.sync_flush() return self - @contract - def set_subject(self, subject): + def set_subject(self, subject: Optional[Subject]) -> "Tracker": """ - Set the subject of the events fired by the tracker + Set the subject of the events fired by the tracker - :param subject: Subject to be tracked - :type subject: subject | None - :rtype: tracker + :param subject: Subject to be tracked + :type subject: subject | None + :rtype: tracker """ self.subject = subject return self - @contract - def add_emitter(self, emitter): + def add_emitter(self, emitter: EmitterProtocol) -> "Tracker": """ - Add a new emitter to which events should be passed + Add a new emitter to which events should be passed - :param emitter: New emitter - :type emitter: emitter - :rtype: tracker + :param emitter: New emitter + :type emitter: emitter + :rtype: tracker """ self.emitters.append(emitter) return self - @staticmethod - def check_form_element(element): - """ - PyContracts helper method to check that dictionary conforms element - in sumbit_form and change_form schemas - """ - all_present = isinstance(element, dict) and 'name' in element and 'value' in element and 'nodeName' in element - try: - if element['type'] in FORM_TYPES: - type_valid = True - else: - type_valid = False - except KeyError: - type_valid = True - return all_present and element['nodeName'] in FORM_NODE_NAMES and type_valid + def get_namespace(self) -> str: + # As app_id is added to the standard_nv_pairs dict above with a type of Optional[str], the type for + # the whole standard_nv_pairs dict is inferred to be dict[str, Optional[str]]. + # But, we know that "tna" should always be present in the dict, since namespace is a required argument. + # + # This ignores MyPy saying Incompatible return value type (got "str | None", expected "str") + return self.standard_nv_pairs["tna"] # type: ignore diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py new file mode 100644 index 00000000..6a574dc2 --- /dev/null +++ b/snowplow_tracker/tracker_configuration.py @@ -0,0 +1,60 @@ +# """ +# tracker_configuration.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional +from snowplow_tracker.typing import JsonEncoderFunction + + +class TrackerConfiguration(object): + def __init__( + self, + encode_base64: bool = True, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + Configuration for additional tracker configuration options. + :param encode_base64: Whether JSONs in the payload should be base-64 encoded. Default is True. + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object. + :type json_encoder: function | None + """ + + self.encode_base64 = encode_base64 + self.json_encoder = json_encoder + + @property + def encode_base64(self) -> bool: + """ + Whether JSONs in the payload should be base-64 encoded. Default is True. + """ + return self._encode_base64 + + @encode_base64.setter + def encode_base64(self, value: bool): + if isinstance(value, bool) or value is None: + self._encode_base64 = value + + @property + def json_encoder(self) -> Optional[JsonEncoderFunction]: + """ + Custom JSON serializer that gets called on non-serializable object. + """ + return self._json_encoder + + @json_encoder.setter + def json_encoder(self, value: Optional[JsonEncoderFunction]): + self._json_encoder = value diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py new file mode 100644 index 00000000..3e973562 --- /dev/null +++ b/snowplow_tracker/typing.py @@ -0,0 +1,74 @@ +# """ +# typing.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Dict, List, Callable, Any, Optional, Union, Tuple +from typing_extensions import Protocol, Literal + +PayloadDict = Dict[str, Any] +PayloadDictList = List[PayloadDict] +JsonEncoderFunction = Callable[[Any], Any] + +# tracker +FORM_NODE_NAMES = {"INPUT", "TEXTAREA", "SELECT"} +FORM_TYPES = { + "button", + "checkbox", + "color", + "date", + "datetime", + "datetime-local", + "email", + "file", + "hidden", + "image", + "month", + "number", + "password", + "radio", + "range", + "reset", + "search", + "submit", + "tel", + "text", + "time", + "url", + "week", +} +FormNodeName = Literal["INPUT", "TEXTAREA", "SELECT"] +ElementClasses = Union[List[str], Tuple[str, Any]] +FormClasses = Union[List[str], Tuple[str, Any]] + +# emitters +HttpProtocol = Literal["http", "https"] +Method = Literal["get", "post"] +SuccessCallback = Callable[[PayloadDictList], None] +FailureCallback = Callable[[int, PayloadDictList], None] + +# subject +SUPPORTED_PLATFORMS = {"pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"} +SupportedPlatform = Literal["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] + + +class EmitterProtocol(Protocol): + def input(self, payload: PayloadDict) -> None: ... + + def flush(self) -> None: ... + + def async_flush(self) -> None: ... + + def sync_flush(self) -> None: ... diff --git a/vagrant/.gitignore b/vagrant/.gitignore deleted file mode 100644 index 5b164d31..00000000 --- a/vagrant/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -oss-playbooks -ansible -.peru diff --git a/vagrant/ansible.hosts b/vagrant/ansible.hosts deleted file mode 100644 index 588fa08c..00000000 --- a/vagrant/ansible.hosts +++ /dev/null @@ -1,2 +0,0 @@ -[vagrant] -127.0.0.1:2222 diff --git a/vagrant/peru.yaml b/vagrant/peru.yaml deleted file mode 100644 index e7fdf41c..00000000 --- a/vagrant/peru.yaml +++ /dev/null @@ -1,14 +0,0 @@ -imports: - ansible: ansible - ansible_playbooks: oss-playbooks - -curl module ansible: - # Equivalent of git cloning tags/v1.6.6 but much, much faster - url: https://codeload.github.com/ansible/ansible/zip/69d85c22c7475ccf8169b6ec9dee3ee28c92a314 - unpack: zip - export: ansible-69d85c22c7475ccf8169b6ec9dee3ee28c92a314 - -git module ansible_playbooks: - url: https://github.com/snowplow/ansible-playbooks.git - # Comment out to fetch a specific rev instead of master: - # rev: xxx diff --git a/vagrant/push.bash b/vagrant/push.bash deleted file mode 100755 index 840b5b0e..00000000 --- a/vagrant/push.bash +++ /dev/null @@ -1,92 +0,0 @@ -#!/bin/bash -set -e - -project_path="/vagrant" -python_bin="~/snowplow-python-2.7-tracker-environment/bin/python2.7" - -# Similar to Perl die -function die() { - echo "$@" 1>&2 ; exit 1; -} - -# Check if our Vagrant box is running. Expects `vagrant status` to look like: -# -# > Current machine states: -# > -# > default poweroff (virtualbox) -# > -# > The VM is powered off. To restart the VM, simply run `vagrant up` -# -# Parameters: -# 1. out_running (out parameter) -function is_running { - [ "$#" -eq 1 ] || die "1 argument required, $# provided" - local __out_running=$1 - - set +e - vagrant status | sed -n 3p | grep -q "^default\s*running (virtualbox)$" - local retval=${?} - set -e - if [ ${retval} -eq "0" ] ; then - eval ${__out_running}=1 - else - eval ${__out_running}=0 - fi -} - -# Get version, checking we are on the latest -# -# Parameters: -# 1. out_version (out parameter) -# 2. out_error (out parameter) -function get_version { - [ "$#" -eq 2 ] || die "2 arguments required, $# provided" - local __out_version=$1 - local __out_error=$2 - - # Extract the version from package.json using Node and save it in a file named "VERSION" - vagrant ssh -c "cd ${project_path} && ${python_bin} -c \"v={}; execfile('snowplow_tracker/_version.py', v); print v['__version__']\" > VERSION" - file_version=`cat VERSION` - tag_version=`git describe --abbrev=0 --tags` - if [ ${file_version} != ${tag_version} ] ; then - eval ${__out_error}="'File version ${file_version} != tag version ${tag_version}'" - else - eval ${__out_version}=${file_version} - fi -} - -# Go to parent-parent dir of this script -function cd_root() { - source="${BASH_SOURCE[0]}" - while [ -h "${source}" ] ; do source="$(readlink "${source}")"; done - dir="$( cd -P "$( dirname "${source}" )/.." && pwd )" - cd ${dir} -} - -function upload_to_pypi() { - - # Register the new release with PyPI - echo "Registering the release with PyPI. Choose option 1..." - vagrant ssh -c "cd ${project_path} && ${python_bin} setup.py register" - - # Upload the new release to PyPI - echo "Uploading the file to PyPI. IMPORTANT: PyPI does not allow a file to be re-uploaded." - read -p "Do you want to upload the file to PyPI? [Y/N]" -n 1 -r - if [[ $REPLY =~ ^[Yy]$ ]] - then - # We have to upload from a folder which supports hard-linking (which guest folders shared with host don't) - vagrant ssh -c "cd \$(mktemp -d) && cp -r ${project_path}/* . && ${python_bin} setup.py sdist upload" - fi -} - -cd_root - -# Precondition for running -running=0 && is_running "running" -[ ${running} -eq 1 ] || die "Vagrant guest must be running to push" - -# Git tag must match version in snowplow_tracker/_version.py -version="" && error="" && get_version "version" "error" -[ "${error}" ] && die "Versions don't match: ${error}. Are you trying to publish an old version, or maybe on the wrong branch?" - -upload_to_pypi diff --git a/vagrant/up.bash b/vagrant/up.bash deleted file mode 100755 index 7450ae89..00000000 --- a/vagrant/up.bash +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -set -e - -vagrant_dir=/vagrant/vagrant -bashrc=/home/vagrant/.bashrc - -echo "========================================" -echo "INSTALLING PERU AND ANSIBLE DEPENDENCIES" -echo "----------------------------------------" -apt-get update -apt-get install -y language-pack-en git unzip libyaml-dev python3-pip python-yaml python-paramiko python-jinja2 - -echo "===============" -echo "INSTALLING PERU" -echo "---------------" -sudo pip3 install peru - -echo "=======================================" -echo "CLONING ANSIBLE AND PLAYBOOKS WITH PERU" -echo "---------------------------------------" -cd ${vagrant_dir} && peru sync -v -echo "... done" - -env_setup=${vagrant_dir}/ansible/hacking/env-setup -hosts=${vagrant_dir}/ansible.hosts - -echo "===================" -echo "CONFIGURING ANSIBLE" -echo "-------------------" -touch ${bashrc} -echo "source ${env_setup}" >> ${bashrc} -echo "export ANSIBLE_HOSTS=${hosts}" >> ${bashrc} -echo "... done" - -echo "==========================================" -echo "RUNNING PLAYBOOKS WITH ANSIBLE*" -echo "* no output while each playbook is running" -echo "------------------------------------------" -while read pb; do - su - -c "source ${env_setup} && ${vagrant_dir}/ansible/bin/ansible-playbook ${vagrant_dir}/${pb} --connection=local --inventory-file=${hosts}" vagrant -done <${vagrant_dir}/up.playbooks - -guidance=${vagrant_dir}/up.guidance - -if [ -f ${guidance} ]; then - echo "===========" - echo "PLEASE READ" - echo "-----------" - cat $guidance -fi diff --git a/vagrant/up.guidance b/vagrant/up.guidance deleted file mode 100644 index e1571af2..00000000 --- a/vagrant/up.guidance +++ /dev/null @@ -1,5 +0,0 @@ -To get started: -vagrant ssh -cd /vagrant -./run-tests.sh deploy -./run-tests.sh test diff --git a/vagrant/up.playbooks b/vagrant/up.playbooks deleted file mode 100644 index 9e800eeb..00000000 --- a/vagrant/up.playbooks +++ /dev/null @@ -1,2 +0,0 @@ -oss-playbooks/python-pyenv.yml -oss-playbooks/snowplow-python-tracker.yml