diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..4edd7b1a --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +relative_files = True diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..8b8914db --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,27 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior or code snippets that produce the issue. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Environment (please complete the following information):** + - OS: [e.g. Ubuntu 20.04] + - Version [e.g. 3.8] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..11fc491e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 00000000..2a0fcff1 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,113 @@ +name: Deploy + +on: + push: + tags: + - '*.*.*' + +jobs: + version_check: + runs-on: ubuntu-latest + outputs: + v_tracker: ${{ steps.version.outputs.PYTHON_TRACKER_VERSION}} + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + + - name: Get tag and tracker versions + id: version + run: | + echo "TAG_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT + echo "PYTHON_TRACKER_VERSION=$(python setup.py --version)" >> $GITHUB_OUTPUT + + - name: Fail if version mismatch + if: ${{ steps.version.outputs.TAG_VERSION != steps.version.outputs.PYTHON_TRACKER_VERSION }} + run: | + echo "Tag version (${{ steps.version.outputs.TAG_VERSION }}) doesn't match version in project (${{ steps.version.outputs.PYTHON_TRACKER_VERSION }})" + exit 1 + + build: + needs: ["version_check"] + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: [3.8] + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Pin pip version + run: | + echo "pip_v=pip" >> $GITHUB_ENV + + - name: Build + run: | + python -m pip install --upgrade "${{ env.pip_v }}" setuptools wheel + python setup.py sdist bdist_wheel + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: distfiles_${{ github.run_id }} + path: dist + + publish: + needs: ["build"] + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: distfiles_${{ github.run_id }} + path: ${{ github.workspace }}/dist + + - name: Twine check + run: | + python -m pip install --upgrade pip twine + twine check ${{ github.workspace }}/dist/* + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@v1.4.2 + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} + packages_dir: ${{ github.workspace }}/dist/ + verbose: true + + release: + needs: ["publish", "version_check"] + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Release + uses: softprops/action-gh-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + name: Version ${{ needs.version_check.outputs.v_tracker }} + draft: false + prerelease: ${{ contains(needs.version_check.outputs.v_tracker, 'rc') }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..9c14d2fe --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,81 @@ +name: build + +on: + push: + branches: + - master + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: + - 3.8 + - 3.9 + - "3.10" + - "3.11" + - "3.12" + - "3.13" + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Pin pip version + run: | + echo "pip_v=pip" >> $GITHUB_ENV + + - name: Install dependencies + run: | + python -m pip install --upgrade "${{ env.pip_v }}" setuptools wheel + python -m pip install -e . + python -m pip install -r requirements-test.txt + + - name: Build + run: | + python setup.py sdist bdist_wheel + + - name: Tests + run: | + pytest --cov=snowplow_tracker --cov-report=xml + + - name: MyPy + run: | + python -m pip install -e .[typing] + mypy snowplow_tracker --exclude '/test' + + - name: Demo + run: | + cd examples + cd tracker_api_example + python app.py "localhost:9090" + + - name: Snowplow Demo + run: | + cd examples + cd snowplow_api_example + python snowplow_app.py "localhost:9090" + + - name: Coveralls + uses: AndreMiras/coveralls-python-action@develop + with: + parallel: true + + coveralls_finish: + needs: ["build"] + runs-on: ubuntu-latest + + steps: + - name: Coveralls finished + uses: AndreMiras/coveralls-python-action@develop + with: + parallel-finished: true diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 00000000..c50ac5e2 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,20 @@ +name: documentation + +on: + push: + branches: + - master +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ammaraskar/sphinx-action@master + with: + docs-folder: "docs/" + - name: Deploy to GitHub Pages + if: success() + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs/_build/html \ No newline at end of file diff --git a/.github/workflows/snyk.yml b/.github/workflows/snyk.yml new file mode 100644 index 00000000..b2e36c27 --- /dev/null +++ b/.github/workflows/snyk.yml @@ -0,0 +1,27 @@ +name: Snyk + +on: + push: + branches: [ master ] + +jobs: + security: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: snyk/actions/setup@master + + - name: Set up Python 3.8 + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install . + + - name: Run Snyk to check for vulnerabilities + run: snyk monitor --file=setup.py --project-name=snowplow-python-tracker + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} diff --git a/.gitignore b/.gitignore index 33506ca3..deb1bace 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ MANIFEST +.cache *.py[cod] @@ -36,3 +37,10 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +# Vagrant +.vagrant +VERSION + +#Docs +docs/_build \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 10d485d8..00000000 --- a/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ -language: python -python: - - "2.7" - - "3.3" -# command to install dependencies -install: - - "pip install python-dateutil" - - "pip install httmock" - - "pip install freezegun" - - "pip install -r requirements.txt" -# command to run tests -script: nosetests diff --git a/CHANGES.txt b/CHANGES.txt index c819a59f..6a56dedb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,9 +1,245 @@ +Version 1.1.0 (2025-02-20) +-------------------------- +Bump Ubuntu Version in GH Actions (#375) +Avoid installing types-requests at run-time (#370) (Thanks to @edgarrmondragon) + +Version 1.0.4 (2024-11-19) +-------------------------- +Test with Python 3.13 (#365) (Thanks to @edgarrmondragon) +Shorten automatic github release title (#352) + +Version 1.0.3 (2024-08-27) +-------------------------- +Fix docs action (close #367) +Update `on_success` docstring (close #358) +Add py.typed to package (close #360) (Thanks to @edgarrmondragon) +Update typing +Fix `PagePing`, `PageView`, and `StructuredEvent` property getters (close #361) + +Version 1.0.2 (2024-02-26) +-------------------------- +Add Python 3.12 to CI tests (#356) (Thanks to @edgarrmondragon) + +Version 1.0.1 (2023-07-12) +-------------------------- +Fix tstamp parameter in track_self_describing_event (#350) (Thanks to @andehen) + +Version 1.0.0 (2023-06-16) +-------------------------- +Remove Redis and Celery Emitters (#335) +Make tracker namespace mandatory (#337) +Track function to return event_id (#338) +Fix namespace assignment in Snowplow API (#341) +Refactor track_xxx() methods (#343) +Update payload builder to combine event subjects (#347) + +Version 0.15.0 (2023-04-19) +--------------------------- +Use Requests Session for sending eventss (#221) +Add Redis example app (#322) + +Version 0.14.0 (2023-03-21) +--------------------------- +Adds deprecation warnings for V1 changes (#315) +Update GH actions to use Node16 (#317) +Adds event store parameter to Snowplow interface (#320) +Adds missing parameters to async emitter (#323) + +Version 0.13.0 (2023-01-24) +--------------------------- +Adds Snowplow Interface (#295) +Adds retry for failed events (#296) +Adds customisable retry codes (#297) +Adds EventStore with max limit (#309) +Adds Snowplow Example App (#302) +Fix Collector URL with trailing '/' (#300) +Rename unstruct_event to self_describing_event (#298) +Upgrade `set-output` in cd (#294) + +Version 0.12.0 (2022-11-03) +--------------------------- +Adds Domain Session ID and Domain Session Index to Subject class (#282) (Thanks to @cpnat) +Add support for Python 3.11 (#286) +Change default protocol to HTTPS in the Emitter (#14) +Change default method to POST in the Emitter (#289) +Update Docker base image (#283) (Thanks to @cpnat) + +Version 0.11.0 (2022-10-06) +--------------------------- +Update README file (#264) +Update CONTRIBUTING.md file (#265) +Add API doc generation in CI (#277) +Add Demo App (#279) +Fix failing build in Dockerfile (#266) +Bump Copyright to 2022 (#271) +Update README.rst to README.md (#270) + +Version 0.10.0 (2021-12-16) +-------------------------- +Add Python 3.10 support (#254) +Add configurable timeout for HTTP requests (#258) + +Version 0.9.1 (2021-10-26) +-------------------------- +Update python versions in run-tests script (#256) +Fix pycontracts incompatibility with pyparsing v3 (#255) + +Version 0.9.0 (2021-04-23) +-------------------------- +Fix items default value issue in track_ecommerce_transaction (#252) +Make optional timestamp argument set 'true timestamp' rather than 'device created timestamp' (#251) +Pass successfully sent events through to on_success callback (#228) +Restrict endpoint parameter as non-empty string for Emitter (#249) +Add ability to set Subject per event (#158) +Extract correct version in deploy workflow (#182) +Support Unicode in adding events to buffer (#162) +Use mocks for unit tests (#250) +Replace regular file test operator in run-tests.sh (#247) +Bump dependencies (#246) +Add a Python 3 wheel to pypi (#244) +Make sure that tracker attaches timestamp even if wrong type was passed (#190) +Correct capitalization of Content-Type header (#152) +Replace deprecating syntax (#243) +Allow celery and redis to be optional dependencies (#232) +Update gevent to the last major version (#233) +Switch to GitHub Actions (#234) +Allow custom json encoder to be passed when configuring tracker (#242) +Update Copyright notices to 2021 (#241) +Pin decorator package version for Python2 platforms (#245) + +Version 0.8.4 (2020-10-10) +-------------------------- +Fix incompatible versions of greenlet and gevent (closes #236) +Update build to Active Python Releases (closes #237) +Add Snyk monitoring (closes #238) +Update Copyright notices to 2020 (closes #235) + +Version 0.8.3 (2019-06-28) +-------------------------- +Fix test_bytelimit test (#227) +Initialize Celery in CeleryEmitter (#226) +Allow non ascii characters to be encoded using Base64 (#194) +Allow empty strings in string_or_none contract (#184) +Fix on_failure param docstring description (#225) +Bump max version requirement of redis (#223) +Remove Vagrant & Ansible (#222) + +Version 0.8.2 (2018-12-01) +-------------------------- +Fix date for 0.8.0 release in CHANGELOG (#183) +Remove Python 3.3 from tests (#206) +Update PyPI deployment to use Twine (#207) +Add version bounds for all dependencies (#208) +Upgrade Celery to 4.x (#210) +Fix flush argument clash with async keyword (#211) +Add Python 3.7 to test suite (close #212) + +Version 0.8.0 (2016-10-13) +-------------------------- +Add byte_limit to Emitter (#170) +Add support for dvce_sent_tstamp (#159) +Use SelfDescribingJson class to build payload_data JSON (#141) +Add ability to automatically send all events in buffer at a regular time interval (#114) +Add support for attaching true timestamp for events (#161) +Use exact dependencies versions and remove requirements.txt (#175) +Add missing dependencies (#174) +Remove Python 3.2 from travis testing (#173) +Add missing classifiers to setup.py (#148) +Add additional supported platforms to Subject (#172) +Add missing tracker events (#165) +Add support for Python 3.4 and 3.5 (#169) +Add `track_self_describing_event()` method (#160) + +Version 0.7.2 (2015-08-16) +-------------------------- +Corrected contract typo in the docstring of AsyncEmitter's constructor (#147) +Ensured that Travis uses latest versions of dependencies (#149) +Fixed 0.7.0 reference for 0.7.1 in CHANGELOG (#146) + +Version 0.7.1 (2015-08-11) +-------------------------- +Ensured synchronous flush always waits for the task queue to empty (#142) + +Version 0.7.0 (2015-08-07) +-------------------------- +Added SelfDescribingJson class (#140) +Added support for Python 2 unicode strings using six library, thanks @mthomas! (#138) +Started handling RequestExceptions (#134) +Started treating all 2xx and 3xx status codes as successful (#133) +Made Emitter and AsyncEmitter thread-safe (#130) +Made synchronous flush wait until buffer is empty (#139) +Made the number of worker threads used by the AsyncEmitter configurable (#136) +Fixed on_failure implementation for POST requests (#135) +Fixed to latest Peru version (#132) +Fixed code formatting in README (#129) + +Version 0.6.0.post1 (2015-02-14) +-------------------------------- +Improved logging (#109) +Removed unnecessary whitespace from POST requests (#110) +Started sending payload_data version 1-0-2 (#113) +Added set_domain_user_id method (#115) +Added set_useragent method (#116) +Added set_ip_address method (#117) +Added set_network_user_id method (#118) +Updated contexts schema to 1-0-1 (#119) +Added integration tests using mocked POST requests (#122) +Started preserving unicode characters in JSONs (#123) +Used Travis CI image for master branch in README (#125) +Added license button to README (#126) +Added dedicated Vagrant setup (#127) +Added Vagrant push to publish tracker to PyPI (#128) + +Version 0.5.0 (2014-08-13) +-------------------------- +Converted payload values to strings for POST requests (#100) +Set content type to "application/json; charset=utf-8" for POST requests (#99) +Changed collector endpoint for POST to /com.snowplowanalytics.snowplow/tp2 (#98) +Stopped setting and sending tid (#94) +Started setting and sending eid (#93) +Allowed a single Tracker instance to send events to multiple Emitters (#91) +Started passing a list of dictionaries to the on_failure callback for POST requests (#104) +Made the "name" argument of track_screen_view optional (#103) +Made all tracker methods chainable (#105) +Stopped sending empty payloads (#106) + +Version 0.4.0 (2014-06-10) +-------------------------- +Migrated unstructured events to self-describing JSON (#87) +Migrated custom contexts to self-describing JSON (#88) +Gave separate events within an ecommerce transaction different transaction IDs (#89) +Added https support for tracker (#81) +Added callbacks for flushing (#78) +Added Python-logging (#76) +Added Redis and gevent based async approach (#75) +Added thread-based AsyncBufferedConsumer (#74) +Added ability to specify port for collector (#72) +Added POST support to tracker (#70) +Added Redis-based queue (#45) +Added Buffered Consumer (#44) +Changed user_id to be set on a per-event basis (#39) +Removed type hint suffixes from unstructured events (#36) + +Version 0.3.0 (2014-04-25) +-------------------------- +Added custom context vendor configuration option (#67) +Changed the return value of the tracking methods to a tuple (#65) +Added coveralls code coverage button (#64) +Added currency parameter to ecommerce tracking methods (#62) +Added config option to disable contracts (#61) +Added event_vendor as argument to track_unstruct_event (#54) +Added classifiers to setup.py (#48) +Added support for custom context to all track() methods (#38) +Updated Tracker constructor to use map of optional args (#37) +Updated so a transaction and its items have the same txnid and dtm (#25) +Added support for Python 3.2 (#41) + Version 0.2.0 (2014-04-15) -------------------------- Fixed Pycontracts dependency (#63) Made unrequired tracker method arguments optional (#40) Linked the Technical Docs and Setup Guide images to the appropriate pages (#60) -Changed API to no longer specify a collector URL option (#57) +Changed API to no longer specify a collector URL option (#57) Removed the "URL from Cloudfront subdomain" option (#56) Started sending event vendor parameter through on querystring (#55) Changed track screen view to use an unstructured event (#53) @@ -14,6 +250,6 @@ Fixed versions in requirements.txt (#47) Added platform and tracker version to payload (#50) Changed tracker version prefix from "python-" to "py-" (#51) -Version 0.1.0 (2014-28-03) +Version 0.1.0 (2014-03-28) -------------------------- Initial release diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..dd4a535e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,370 @@ +# Snowplow Python Tracker - CLAUDE.md + +## Project Overview + +The Snowplow Python Tracker is a public Python library for sending analytics events to Snowplow collectors. It enables developers to integrate Snowplow analytics into Python applications, games, and web servers. The library provides a robust event tracking system with support for various event types, custom contexts, and reliable event delivery through configurable emitters. + +**Key Technologies:** +- Python 3.8+ (supported versions: 3.8-3.13) +- requests library for HTTP communication +- typing_extensions for enhanced type hints +- Event-driven architecture with schema validation +- Asynchronous and synchronous event emission + +## Development Commands + +```bash +# Install dependencies +pip install -r requirements-test.txt + +# Run tests +./run-tests.sh + +# Run specific test module +python -m pytest snowplow_tracker/test/unit/test_tracker.py + +# Run integration tests +python -m pytest snowplow_tracker/test/integration/ + +# Install package in development mode +pip install -e . + +# Build Docker image for testing +docker build -t snowplow-python-tracker . +docker run snowplow-python-tracker +``` + +## Architecture + +The tracker follows a layered architecture with clear separation of concerns: + +``` +snowplow_tracker/ +├── Core Components +│ ├── tracker.py # Main Tracker class orchestrating events +│ ├── snowplow.py # High-level API for tracker management +│ └── subject.py # User/device context management +├── Event Layer (events/) +│ ├── event.py # Base Event class +│ ├── page_view.py # PageView event +│ ├── structured_event.py # Structured events +│ └── self_describing.py # Custom schema events +├── Emission Layer +│ ├── emitters.py # Sync/Async event transmission +│ ├── event_store.py # Event buffering and persistence +│ └── payload.py # Event payload construction +├── Configuration +│ ├── tracker_configuration.py +│ └── emitter_configuration.py +└── Validation + ├── contracts.py # Runtime validation + └── typing.py # Type definitions +``` + +## Core Architectural Principles + +1. **Schema-First Design**: All events conform to Iglu schemas for consistency +2. **Separation of Concerns**: Event creation, validation, and emission are separate +3. **Configuration Objects**: Use dedicated configuration classes, not raw dictionaries +4. **Type Safety**: Extensive use of type hints and Protocol classes +5. **Fail-Safe Delivery**: Events are buffered and retried on failure +6. **Immutability**: Event objects are largely immutable after creation + +## Layer Organization & Responsibilities + +### Application Layer (snowplow.py) +- Singleton pattern for global tracker management +- Factory methods for tracker creation +- Namespace-based tracker registry + +### Domain Layer (tracker.py, events/) +- Event creation and validation +- Subject (user/device) context management +- Event enrichment with standard fields + +### Infrastructure Layer (emitters.py, event_store.py) +- HTTP communication with collectors +- Event buffering and retry logic +- Async/sync emission strategies + +### Cross-Cutting (contracts.py, typing.py) +- Runtime validation with togglable contracts +- Shared type definitions and protocols + +## Critical Import Patterns + +```python +# ✅ Import from package root for public API +from snowplow_tracker import Snowplow, Tracker, Subject +from snowplow_tracker import EmitterConfiguration, TrackerConfiguration + +# ✅ Import specific event classes +from snowplow_tracker.events import PageView, StructuredEvent + +# ❌ Don't import from internal modules +from snowplow_tracker.emitters import Requester # Internal class + +# ✅ Use typing module for type hints +from snowplow_tracker.typing import PayloadDict, Method +``` + +## Essential Library Patterns + +### Tracker Initialization Pattern +```python +# ✅ Use Snowplow factory with configuration objects +tracker = Snowplow.create_tracker( + namespace="my_app", + endpoint="https://collector.example.com", + tracker_config=TrackerConfiguration(encode_base64=True), + emitter_config=EmitterConfiguration(batch_size=10) +) + +# ❌ Don't instantiate Tracker directly without Snowplow +tracker = Tracker("namespace", emitter) # Missing registration +``` + +### Event Creation Pattern +```python +# ✅ Use event classes with named parameters +page_view = PageView( + page_url="https://example.com", + page_title="Homepage" +) + +# ✅ Add contexts to events +event.context = [SelfDescribingJson(schema, data)] + +# ❌ Don't modify event payload directly +event.payload.add("custom", "value") # Breaks schema validation +``` + +### Subject Management Pattern +```python +# ✅ Set subject at tracker or event level +subject = Subject() +subject.set_user_id("user123") +tracker = Snowplow.create_tracker(..., subject=subject) + +# ✅ Override subject per event +event = PageView(..., event_subject=Subject()) + +# ❌ Don't modify subject after tracker creation +tracker.subject.set_user_id("new_id") # Not thread-safe +``` + +### Emitter Configuration Pattern +```python +# ✅ Configure retry and buffering behavior +config = EmitterConfiguration( + batch_size=50, + buffer_capacity=10000, + custom_retry_codes={429: True, 500: True} +) + +# ❌ Don't use magic numbers +emitter = Emitter(endpoint, 443, "post", 100) # Use config object +``` + +## Model Organization Pattern + +### Event Hierarchy +```python +Event (base class) +├── PageView # Web page views +├── PagePing # Page engagement tracking +├── ScreenView # Mobile screen views +├── StructuredEvent # Category/action/label/property/value events +└── SelfDescribing # Custom schema events +``` + +### Data Structures +```python +# SelfDescribingJson for custom contexts +context = SelfDescribingJson( + "iglu:com.example/context/jsonschema/1-0-0", + {"key": "value"} +) + +# Payload for event data assembly +payload = Payload() +payload.add("e", "pv") # Event type +payload.add_dict({"aid": "app_id"}) +``` + +## Common Pitfalls & Solutions + +### Contract Validation +```python +# ❌ Passing invalid parameters silently fails in production +tracker.track_page_view("") # Empty URL + +# ✅ Enable contracts during development +from snowplow_tracker import enable_contracts +enable_contracts() +``` + +### Event Buffering +```python +# ❌ Not flushing events before shutdown +tracker.track(event) +sys.exit() # Events lost! + +# ✅ Always flush before exit +tracker.track(event) +tracker.flush() +``` + +### Thread Safety +```python +# ❌ Sharing emitter across threads +emitter = Emitter(endpoint) +# Multiple threads using same emitter + +# ✅ Use AsyncEmitter for concurrent scenarios +emitter = AsyncEmitter(endpoint, thread_count=2) +``` + +### Schema Validation +```python +# ❌ Hardcoding schema strings +schema = "iglu:com.snowplow/event/1-0-0" + +# ✅ Use constants for schemas +from snowplow_tracker.constants import CONTEXT_SCHEMA +``` + +## File Structure Template + +``` +project/ +├── tracker_app.py # Application entry point +├── config/ +│ └── tracker_config.py # Tracker configuration +├── events/ +│ ├── __init__.py +│ └── custom_events.py # Custom event definitions +├── contexts/ +│ └── custom_contexts.py # Custom context schemas +└── tests/ + ├── unit/ + │ └── test_events.py + └── integration/ + └── test_emission.py +``` + +## Testing Patterns + +### Unit Testing +```python +# ✅ Mock emitters for unit tests +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_track_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(...)) + mock_emitter.input.assert_called_once() +``` + +### Contract Testing +```python +# ✅ Use ContractsDisabled context manager +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) +``` + +### Integration Testing +```python +# ✅ Test against mock collector +def test_event_delivery(): + with requests_mock.Mocker() as m: + m.post("https://collector.test/com.snowplow/tp2") + # Track and verify delivery +``` + +## Configuration Best Practices + +### Environment-Based Configuration +```python +# ✅ Use environment variables +import os +endpoint = os.getenv("SNOWPLOW_COLLECTOR_URL") +namespace = os.getenv("SNOWPLOW_NAMESPACE", "default") +``` + +### Retry Configuration +```python +# ✅ Configure intelligent retry behavior +EmitterConfiguration( + max_retry_delay_seconds=120, + custom_retry_codes={ + 429: True, # Retry rate limits + 500: True, # Retry server errors + 400: False # Don't retry bad requests + } +) +``` + +## Quick Reference + +### Import Checklist +- [ ] Import from `snowplow_tracker` package root +- [ ] Use `EmitterConfiguration` and `TrackerConfiguration` +- [ ] Import specific event classes from `snowplow_tracker.events` +- [ ] Use type hints from `snowplow_tracker.typing` + +### Event Tracking Checklist +- [ ] Create tracker with `Snowplow.create_tracker()` +- [ ] Configure emitter with appropriate batch size +- [ ] Set subject context if tracking users +- [ ] Use appropriate event class for the use case +- [ ] Add custom contexts as `SelfDescribingJson` +- [ ] Call `flush()` before application shutdown +- [ ] Handle failures with callbacks + +### Common Event Types +- `PageView`: Web page views +- `ScreenView`: Mobile app screens +- `StructuredEvent`: Generic events with 5 parameters +- `SelfDescribing`: Custom schema events +- `PagePing`: Engagement tracking + +## Contributing to CLAUDE.md + +When adding or updating content in this document, please follow these guidelines: + +### File Size Limit +- **CLAUDE.md must not exceed 40KB** (currently ~19KB) +- Check file size after updates: `wc -c CLAUDE.md` +- Remove outdated content if approaching the limit + +### Code Examples +- Keep all code examples **4 lines or fewer** +- Focus on the essential pattern, not complete implementations +- Use `// ❌` and `// ✅` to clearly show wrong vs right approaches + +### Content Organization +- Add new patterns to existing sections when possible +- Create new sections sparingly to maintain structure +- Update the architectural principles section for major changes +- Ensure examples follow current codebase conventions + +### Quality Standards +- Test any new patterns in actual code before documenting +- Verify imports and syntax are correct for the codebase +- Keep language concise and actionable +- Focus on "what" and "how", minimize "why" explanations + +### Multiple CLAUDE.md Files +- **Directory-specific CLAUDE.md files** can be created for specialized modules +- Follow the same structure and guidelines as this root CLAUDE.md +- Keep them focused on directory-specific patterns and conventions +- Maximum 20KB per directory-specific CLAUDE.md file + +### Instructions for LLMs +When editing files in this repository, **always check for CLAUDE.md guidance**: + +1. **Look for CLAUDE.md in the same directory** as the file being edited +2. **If not found, check parent directories** recursively up to project root +3. **Follow the patterns and conventions** described in the applicable CLAUDE.md +4. **Prioritize directory-specific guidance** over root-level guidance when conflicts exist \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..254afe73 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,80 @@ +# Contributing + +The Snowplow Python Tracker is maintained by the Engineering team at Snowplow Analytics. We welcome suggestions for improvements and bug fixes to all Snowplow Trackers. + +We are extremely grateful for all contributions we receive, whether that is reporting an issue or a change to the code which can be made in the form of a pull request. + +For support requests, please use our community support Discourse forum: https://discourse.snowplow.io/. + +## Setting up an Environment + +Instructions on how to build and run tests are available in the [README.md](README.md). The README will also list any requirements that you will need to install first before being able to build and run the tests. + +You should ensure you are comfortable building and testing the existing release before adding new functionality or fixing issues. + +## Issues + +### Creating an issue + +The project contains an issue template which should help guiding you through the process. However, please keep in mind that support requests should go to our Discourse forum: https://discourse.snowplow.io/ and not GitHub issues. + +It's also a good idea to log an issue before starting to work on a pull request to discuss it with the maintainers. A pull request is just one solution to a problem and it is often a good idea to talk about the problem with the maintainers first. + +### Working on an issue + +If you see an issue you would like to work on, please let us know in the issue! That will help us in terms of scheduling and +not doubling the amount of work. + +If you don't know where to start contributing, you can look at +[the issues labeled `good first issue`](https://github.com/snowplow/snowplow-python-tracker/labels/good%20first%20issue). + +## Pull requests + +These are a few guidelines to keep in mind when opening pull requests. + +### Guidelines + +Please supply a good PR description. These are very helpful and help the maintainers to understand _why_ the change has been made, not just _what_ changes have been made. + +Please try and keep your PR to a single feature of fix. This might mean breaking up a feature into multiple PRs but this makes it easier for the maintainers to review and also reduces the risk in each change. + +Please review your own PR as you would do it you were a reviewer first. This is a great way to spot any mistakes you made when writing the change. Additionally, ensure your code compiles and all tests pass. + +### Commit hygiene + +We keep a strict 1-to-1 correspondance between commits and issues, as such our commit messages are formatted in the following +fashion: + +`Issue Description (closes #1234)` + +for example: + +`Fix Issue with Tracker (closes #1234)` + +### Writing tests + +Whenever necessary, it's good practice to add the corresponding tests to whichever feature you are working on. +Any non-trivial PR must have tests and will not be accepted without them. + +### Feedback cycle + +Reviews should happen fairly quickly during weekdays. +If you feel your pull request has been forgotten, please ping one or more maintainers in the pull request. + +### Getting your pull request merged + +If your pull request is fairly chunky, there might be a non-trivial delay between the moment the pull request is approved and the moment it gets merged. This is because your pull request will have been scheduled for a specific milestone which might or might not be actively worked on by a maintainer at the moment. + +### Contributor license agreement + +We require outside contributors to sign a Contributor license agreement (or CLA) before we can merge their pull requests. +You can find more information on the topic in [the dedicated wiki page](https://docs.snowplow.io/docs/contributing/contributor-license-agreement/). +The @snowplowcla bot will guide you through the process. + +## Getting in touch + +### Community support requests + +Please do not log an issue if you are asking for support, all of our community support requests go through our Discourse forum: https://discourse.snowplow.io/. + +Posting your problem there ensures more people will see it and you should get support faster than creating a new issue on GitHub. Please do create a new issue on GitHub if you think you've found a bug though! \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..d79a72cd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM debian:bullseye-slim + +RUN apt-get update && apt-get install -y --no-install-recommends make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev \ + libsqlite3-dev wget curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ + mecab-ipadic-utf8 git ca-certificates + +ENV HOME /root +ENV PYENV_ROOT $HOME/.pyenv +ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH +RUN git clone --depth=1 https://github.com/pyenv/pyenv.git $PYENV_ROOT +RUN git clone --depth=1 https://github.com/pyenv/pyenv-virtualenv.git $PYENV_ROOT/plugins/pyenv-virtualenv + +RUN pyenv install 3.5.10 && pyenv install 3.6.15 && pyenv install 3.7.17 && pyenv install 3.8.20 && pyenv install 3.9.20 && pyenv install 3.10.15 && pyenv install 3.11.10 && pyenv install 3.12.7 && pyenv install 3.13.0 + +WORKDIR /app +COPY . . +RUN [ "./run-tests.sh", "deploy"] +CMD [ "./run-tests.sh", "test"] diff --git a/LICENSE-2.0.txt b/LICENSE similarity index 99% rename from LICENSE-2.0.txt rename to LICENSE index 7a4a3ea2..db047f7e 100644 --- a/LICENSE-2.0.txt +++ b/LICENSE @@ -1,4 +1,3 @@ - Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -187,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2013-2023 Snowplow Analytics Ltd. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -199,4 +198,4 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..694d3ce9 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +Python Analytics for Snowplow +============================= + +[![Early Release](https://img.shields.io/static/v1?style=flat&label=Snowplow&message=Early%20Release&color=014477&labelColor=9ba0aa&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAAAeFBMVEVMaXGXANeYANeXANZbAJmXANeUANSQAM+XANeMAMpaAJhZAJeZANiXANaXANaOAM2WANVnAKWXANZ9ALtmAKVaAJmXANZaAJlXAJZdAJxaAJlZAJdbAJlbAJmQAM+UANKZANhhAJ+EAL+BAL9oAKZnAKVjAKF1ALNBd8J1AAAAKHRSTlMAa1hWXyteBTQJIEwRgUh2JjJon21wcBgNfmc+JlOBQjwezWF2l5dXzkW3/wAAAHpJREFUeNokhQOCA1EAxTL85hi7dXv/E5YPCYBq5DeN4pcqV1XbtW/xTVMIMAZE0cBHEaZhBmIQwCFofeprPUHqjmD/+7peztd62dWQRkvrQayXkn01f/gWp2CrxfjY7rcZ5V7DEMDQgmEozFpZqLUYDsNwOqbnMLwPAJEwCopZxKttAAAAAElFTkSuQmCC)](https://docs.snowplow.io/docs/collecting-data/collecting-from-own-applications/tracker-maintenance-classification/)[![Build Status](https://github.com/snowplow/snowplow-python-tracker/actions/workflows/ci.yml/badge.svg)](https://github.com/snowplow/snowplow-python-tracker/actions)[![Test Coverage](https://img.shields.io/coveralls/github/snowplow/snowplow-python-tracker)](https://coveralls.io/github/snowplow/snowplow-python-tracker?branch=master) [![image](http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat)](http://www.apache.org/licenses/LICENSE-2.0) + + +[![Pypi Snowplow Tracker](https://img.shields.io/pypi/v/snowplow-tracker)](https://pypi.org/project/snowplow-tracker/)[![Python Versions](https://img.shields.io/pypi/pyversions/snowplow-tracker)](https://pypi.org/project/snowplow-tracker/)[![Monthly Downloads](https://img.shields.io/pypi/dm/snowplow-tracker)](https://pypi.org/project/snowplow-tracker/) + +Overview +-------- + +Add analytics to your Python apps and Python games with the +[Snowplow](http://snowplow.io) event tracker for +[Python](http://python.org). + +With this tracker you can collect event data from your Python-based +applications, games or Python web servers/frameworks. + +Find out more +------------- + + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + + | Snowplow Docs | API Docs | Contributing | + | :----: | :----: | :----: | + | ![techdocs](https://d3i6fms1cm1j0i.cloudfront.net/github/images/techdocs.png) | ![setup](https://d3i6fms1cm1j0i.cloudfront.net/github/images/setup.png) | ![contributing](https://d3i6fms1cm1j0i.cloudfront.net/github/images/contributing.png) | + | [Snowplow Docs](https://docs.snowplow.io/docs/collecting-data/collecting-from-own-applications/python-tracker/) | [API Docs](https://snowplow.github.io/snowplow-python-tracker/index.html)| [Contributing](https://github.com/snowplow/snowplow-python-tracker/blob/master/CONTRIBUTING.md) | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Python Support +-------------- + +| Python version | snowplow-tracker version | +| :----: | :----: | +| \>=3.8 | > 1.1.0 | +| \>=3.5 | > 0.10.0 | +| 2.7 | > 0.9.1 | + +Maintainer Quickstart +--------------------- + +Assuming [docker](https://www.docker.com/) is installed + + host$ git clone git@github.com:snowplow/snowplow-python-tracker.git + host$ cd snowplow-python-tracker + host$ docker build -t snowplow-python-tracker . && docker run snowplow-python-tracker + +Copyright and license +--------------------- + +The Snowplow Python Tracker is copyright 2013-2023 Snowplow Analytics +Ltd. + +Licensed under the [Apache License, Version +2.0](http://www.apache.org/licenses/LICENSE-2.0) (the \"License\"); you +may not use this software except in compliance with the License. + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an \"AS IS\" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.rst b/README.rst deleted file mode 100644 index f0e86b5a..00000000 --- a/README.rst +++ /dev/null @@ -1,69 +0,0 @@ -====================================================== -Python Analytics for Snowplow -====================================================== -.. image:: https://travis-ci.org/snowplow/snowplow-python-tracker.png - :alt: Build Status - :target: https://travis-ci.org/snowplow/snowplow-python-tracker -.. image:: https://badge.fury.io/py/snowplow-tracker.png - :target: http://badge.fury.io/py/snowplow-tracker - -Overview -######## - -Add analytics to your Python apps and Python games with the Snowplow_ event tracker for Python_. - -.. _Snowplow: http://snowplowanalytics.com -.. _Python: http://python.org - -With this tracker you can collect event data from your Python-based applications, games or Python web servers/frameworks. - -Find out more -############# - -+---------------------------------+---------------------------+-------------------------+-----------------------------------+ -| Technical Docs | Setup Guide | Roadmap | Contributing | -+=================================+===========================+=========================+===================================+ -| |techdocs|_ | |setup|_ | |roadmap| | |contributing| | -+---------------------------------+---------------------------+-------------------------+-----------------------------------+ -| `Technical Docs`_ | `Setup Guide`_ | `Roadmap`_ | `Contributing`_ | -+---------------------------------+---------------------------+-------------------------+-----------------------------------+ - -.. |techdocs| image:: https://d3i6fms1cm1j0i.cloudfront.net/github/images/techdocs.png -.. |setup| image:: https://d3i6fms1cm1j0i.cloudfront.net/github/images/setup.png -.. |roadmap| image:: https://d3i6fms1cm1j0i.cloudfront.net/github/images/roadmap.png -.. |contributing| image:: https://d3i6fms1cm1j0i.cloudfront.net/github/images/contributing.png - -.. _techdocs: https://github.com/snowplow/snowplow/wiki/Python-Tracker -.. _setup: https://github.com/snowplow/snowplow/wiki/Python-Tracker-Setup - -.. _`Technical Docs`: https://github.com/snowplow/snowplow/wiki/Python-Tracker -.. _`Setup Guide`: https://github.com/snowplow/snowplow/wiki/Python-Tracker-Setup -.. _`Roadmap`: https://github.com/snowplow/snowplow/wiki/Python-Tracker-Roadmap -.. _`Contributing`: https://github.com/snowplow/snowplow/wiki/Python-Tracker-Contributing - -Contributing quickstart -####################### - -:: - - guest$ git clone --recursive https://github.com/snowplow/dev-environment.git && cd dev-environment - guest$ vagrant up && vagrant ssh - host$ ansible-playbook /vagrant/ansible-playbooks/snowplow-python-tracker.yml --inventory-file=/home/vagrant/ansible_hosts --connection=local - host$ /vagrant/snowplow-python-tracker/run-tests.sh - -Copyright and license -##################### - -The Snowplow Python Tracker is copyright 2013-2014 Snowplow Analytics Ltd. - -Licensed under the `Apache License, Version 2.0`_ (the "License"); -you may not use this software except in compliance with the License. - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - - -.. _Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..92dd33a1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..83cf06c1 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..36f69b0c --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,15 @@ + +sphinx==7.1.2 +sphinx_rtd_theme==2.0.0 +sphinx_copybutton==0.5.2 +sphinx_minipres==0.2.1 +sphinx_tabs==3.4.5 + +sphinx_togglebutton==0.3.2 +# Transitive dependency of togglebutton causing: +# https://security.snyk.io/vuln/SNYK-PYTHON-SETUPTOOLS-7448482 +setuptools==70.0.0 + +sphinx-autobuild==2021.3.14 +myst_nb>0.8.3 +sphinx_rtd_theme_ext_color_contrast==0.3.2 diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..88d210c3 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,63 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +import os +import sys +sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../..')) + + + +# -- Project information ----------------------------------------------------- + +project = 'Snowplow Python Tracker' +copyright = "2023, Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene" +author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' + +# The full version, including alpha/beta/rc tags +release = "1.1.0" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + 'sphinx_rtd_theme' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..4404d24c --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,25 @@ +.. Snowplow Python Tracker documentation master file, created by + sphinx-quickstart on Wed Jul 20 14:00:53 2022. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Snowplow Python Tracker API Documentation +========================================== + +Overview +######## +Add analytics to your Python apps and Python games with the `Snowplow`_ event tracker for `Python`_. + +.. _Snowplow: https://snowplow.io +.. _Python: https://www.python.org/ + +With this tracker you can collect event data from your Python-based applications, games or Python web servers/frameworks. + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + modules + +* :ref:`genindex` +* :ref:`modindex` diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 00000000..8e98bdff --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +snowplow_tracker +================ + +.. toctree:: + :maxdepth: 4 + + snowplow_tracker diff --git a/docs/source/snowplow_tracker.rst b/docs/source/snowplow_tracker.rst new file mode 100644 index 00000000..a1368439 --- /dev/null +++ b/docs/source/snowplow_tracker.rst @@ -0,0 +1,59 @@ +snowplow\_tracker package +========================= + +snowplow\_tracker.contracts module +---------------------------------- + +.. automodule:: snowplow_tracker.contracts + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.emitters module +--------------------------------- + +.. automodule:: snowplow_tracker.emitters + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.payload module +-------------------------------- + +.. automodule:: snowplow_tracker.payload + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.self\_describing\_json module +----------------------------------------------- + +.. automodule:: snowplow_tracker.self_describing_json + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.subject module +-------------------------------- + +.. automodule:: snowplow_tracker.subject + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.tracker module +-------------------------------- + +.. automodule:: snowplow_tracker.tracker + :members: + :undoc-members: + :show-inheritance: + +snowplow\_tracker.typing module +------------------------------- + +.. automodule:: snowplow_tracker.typing + :members: + :undoc-members: + :show-inheritance: + diff --git a/examples/redis_example/README.md b/examples/redis_example/README.md new file mode 100644 index 00000000..59b1aeb7 --- /dev/null +++ b/examples/redis_example/README.md @@ -0,0 +1,26 @@ +# Redis Example App + +This example shows how to set up the Python tracker with a Redis database and a Redis worker to forward events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +- Install redis for your machine. More information can be found [here](https://redis.io/docs/getting-started/installation/) + +`brew install redis` + +- Run `redis-server` to check your redis installation, to stop the server enter `ctrl+c`. + +#### Usage +Navigate to the example folder. + +`cd examples/redis_example` + +This example has two programmes, `redis_app.py` tracks events and sends them to a redis database, `redis_worker.py` then forwards these events onto a Snowplow pipeline. + +To send events to your pipeline, run `redis-server`, followed by the `redis_worker.py {{your_collector_endpoint}}` and finally `redis_app.py`. You should see 3 events in your pipleine. + + + diff --git a/examples/redis_example/redis_app.py b/examples/redis_example/redis_app.py new file mode 100644 index 00000000..553a547f --- /dev/null +++ b/examples/redis_example/redis_app.py @@ -0,0 +1,87 @@ +from snowplow_tracker import ( + Tracker, + ScreenView, + PagePing, + PageView, + SelfDescribing, + StructuredEvent, + SelfDescribingJson, +) +from snowplow_tracker.typing import PayloadDict +import json +import redis +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class RedisEmitter(object): + """ + Sends Snowplow events to a Redis database + """ + + def __init__(self, rdb=None, key: str = "redis_key") -> None: + """ + :param rdb: Optional custom Redis database + :type rdb: redis | None + :param key: The Redis key for the list of events + :type key: string + """ + + if rdb is None: + rdb = redis.StrictRedis() + + self.rdb = rdb + self.key = key + + def input(self, payload: PayloadDict) -> None: + """ + :param payload: The event properties + :type payload: dict(string:*) + """ + logger.info("Pushing event to Redis queue...") + self.rdb.rpush(self.key, json.dumps(payload)) + logger.info("Finished sending event to Redis.") + + def flush(self) -> None: + logger.warning("The RedisEmitter class does not need to be flushed") + return + + def sync_flush(self) -> None: + self.flush() + + +def main(): + emitter = RedisEmitter() + + t = Tracker(namespace="snowplow_tracker", emitters=emitter) + + page_view = PageView(page_url="https://www.snowplow.io", page_title="Homepage") + t.track(page_view) + + page_ping = PagePing(page_url="https://www.snowplow.io", page_title="Homepage") + t.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ) + ) + t.track(link_click) + + id = t.get_uuid() + screen_view = ScreenView(id_=id, name="name") + t.track(screen_view) + + struct_event = StructuredEvent( + category="shop", action="add-to-basket", property_="pcs", value=2 + ) + t.track(struct_event) + + +if __name__ == "__main__": + main() diff --git a/examples/redis_example/redis_worker.py b/examples/redis_example/redis_worker.py new file mode 100644 index 00000000..6a190683 --- /dev/null +++ b/examples/redis_example/redis_worker.py @@ -0,0 +1,74 @@ +import sys +from snowplow_tracker import Emitter +from typing import Any +from snowplow_tracker.typing import PayloadDict +import json +import redis +import signal +import gevent +from gevent.pool import Pool + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +class RedisWorker: + def __init__(self, emitter: Emitter, key) -> None: + self.pool = Pool(5) + self.emitter = emitter + self.rdb = redis.StrictRedis() + self.key = key + + signal.signal(signal.SIGTERM, self.request_shutdown) + signal.signal(signal.SIGINT, self.request_shutdown) + signal.signal(signal.SIGQUIT, self.request_shutdown) + + def send(self, payload: PayloadDict) -> None: + """ + Send an event to an emitter + """ + self.emitter.input(payload) + + def pop_payload(self) -> None: + """ + Get a single event from Redis and send it + If the Redis queue is empty, sleep to avoid making continual requests + """ + payload = self.rdb.lpop(self.key) + if payload: + self.pool.spawn(self.send, json.loads(payload.decode("utf-8"))) + else: + gevent.sleep(5) + + def run(self) -> None: + """ + Run indefinitely + """ + self._shutdown = False + while not self._shutdown: + self.pop_payload() + self.pool.join(timeout=20) + + def request_shutdown(self, *args: Any) -> None: + """ + Halt the worker + """ + self._shutdown = True + + +def main(): + collector_url = get_url_from_args() + + # Configure Emitter + emitter = Emitter(collector_url, batch_size=1) + + # Setup worker + worker = RedisWorker(emitter=emitter, key="redis_key") + worker.run() + + +if __name__ == "__main__": + main() diff --git a/examples/redis_example/requirements.txt b/examples/redis_example/requirements.txt new file mode 100644 index 00000000..ac10dd44 --- /dev/null +++ b/examples/redis_example/requirements.txt @@ -0,0 +1,2 @@ +redis~=4.5 +gevent~=22.10 \ No newline at end of file diff --git a/examples/snowplow_api_example/README.md b/examples/snowplow_api_example/README.md new file mode 100644 index 00000000..6819757b --- /dev/null +++ b/examples/snowplow_api_example/README.md @@ -0,0 +1,18 @@ +# Snowplow API Example App + +This example shows how to set up the Python tracker with the Snowplow API to send events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +#### Usage +Navigate to the example folder. + +`cd examples/snowplow_api_example` + +To send events to your pipeline, run `snowplow_app.py {{your_collector_endpoint}}`. You should see 6 events in your pipleine. + + + diff --git a/examples/snowplow_api_example/snowplow_app.py b/examples/snowplow_api_example/snowplow_app.py new file mode 100644 index 00000000..1bbd21c8 --- /dev/null +++ b/examples/snowplow_api_example/snowplow_app.py @@ -0,0 +1,74 @@ +import sys +from snowplow_tracker import ( + Snowplow, + EmitterConfiguration, + Subject, + TrackerConfiguration, + SelfDescribingJson, + PagePing, + PageView, + ScreenView, + SelfDescribing, + StructuredEvent, +) + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +def main(): + collector_url = get_url_from_args() + # Configure Emitter + custom_retry_codes = {500: False, 401: True} + emitter_config = EmitterConfiguration( + batch_size=5, custom_retry_codes=custom_retry_codes + ) + + # Configure Tracker + tracker_config = TrackerConfiguration(encode_base64=True) + + # Initialise subject + subject = Subject() + subject.set_user_id("uid") + + Snowplow.create_tracker( + namespace="ns", + endpoint=collector_url, + app_id="app1", + subject=subject, + tracker_config=tracker_config, + emitter_config=emitter_config, + ) + + tracker = Snowplow.get_tracker("ns") + + page_view = PageView(page_url="https://www.snowplow.io", page_title="Homepage") + tracker.track(page_view) + + page_ping = PagePing(page_url="https://www.snowplow.io", page_title="Homepage") + tracker.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ) + ) + tracker.track(link_click) + + id = tracker.get_uuid() + screen_view = ScreenView(id_=id, name="name") + tracker.track(screen_view) + + struct_event = StructuredEvent( + category="shop", action="add-to-basket", property_="pcs", value=2 + ) + tracker.track(struct_event) + tracker.flush() + + +if __name__ == "__main__": + main() diff --git a/examples/tracker_api_example/README.md b/examples/tracker_api_example/README.md new file mode 100644 index 00000000..10392b17 --- /dev/null +++ b/examples/tracker_api_example/README.md @@ -0,0 +1,18 @@ +# Example App + +This example shows how to set up the Python tracker with the tracker API to send events to a Snowplow pipeline. + +#### Installation +- Install the Python tracker from the root folder of the project. + +`python setup.py install` + +#### Usage +Navigate to the example folder. + +`cd examples/tracker_api_example` + +To send events to your pipeline, run `app.py {{your_collector_endpoint}}`. You should see 5 events in your pipleine. + + + diff --git a/examples/tracker_api_example/app.py b/examples/tracker_api_example/app.py new file mode 100644 index 00000000..41f520ce --- /dev/null +++ b/examples/tracker_api_example/app.py @@ -0,0 +1,76 @@ +from distutils.log import error +from snowplow_tracker import ( + Tracker, + Emitter, + Subject, + SelfDescribingJson, + PageView, + PagePing, + SelfDescribing, + ScreenView, + StructuredEvent, +) +import sys + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +def main(): + collector_url = get_url_from_args() + + e = Emitter(collector_url) + + s = Subject().set_platform("pc") + s.set_lang("en").set_user_id("test_user") + + t = Tracker(namespace="snowplow_tracker", emitters=e, subject=s) + + print("Sending events to " + e.endpoint) + + event_subject = Subject() + event_subject.set_color_depth(10) + + page_view = PageView( + page_url="https://www.snowplow.io", + page_title="Homepage", + event_subject=event_subject, + ) + t.track(page_view) + + page_ping = PagePing( + page_url="https://www.snowplow.io", + page_title="Homepage", + event_subject=t.subject, + ) + t.track(page_ping) + + link_click = SelfDescribing( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "https://www.snowplow.io"}, + ), + event_subject=t.subject, + ) + t.track(link_click) + + id = t.get_uuid() + screen_view = ScreenView(id_=id, name="name", event_subject=t.subject) + t.track(screen_view) + + struct_event = StructuredEvent( + category="shop", + action="add-to-basket", + property_="pcs", + value=2, + event_subject=t.subject, + ) + t.track(struct_event) + t.flush() + + +if __name__ == "__main__": + main() diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 00000000..cde305f6 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,8 @@ +pytest==4.6.11; python_version < '3.10.0' +pytest==8.3.2; python_version >= '3.10.0' +attrs==21.2.0 +httmock==1.4.0 +freezegun==1.1.0; python_version < '3.13' +freezegun==1.5.1; python_version >= '3.13' +pytest-cov +coveralls==3.3.1 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 76460c8b..00000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pycontracts==1.6.0 -requests==2.2.1 diff --git a/run-tests.sh b/run-tests.sh index b3d63a84..bb938e85 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -1,11 +1,168 @@ -#!/bin/sh +#!/bin/bash + # Run the Snowplow Tracker test suite. +# Quit on failure +set -e + # Need to execute from this dir cd $(dirname $0) # pytest because it has a neat output -/vagrant/snowplow-python-3.3-tracker-environment/bin/python3.3 -m pytest -s +export PATH="~/.pyenv/bin:$PATH" +eval "$(pyenv init -)" +eval "$(pyenv virtualenv-init -)" + +function deploy { + # pyenv install 3.5.10 + if [ ! -e ~/.pyenv/versions/tracker35 ]; then + pyenv virtualenv 3.5.10 tracker35 + pyenv activate tracker35 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.6.15 + if [ ! -e ~/.pyenv/versions/tracker36 ]; then + pyenv virtualenv 3.6.15 tracker36 + pyenv activate tracker36 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.7.17 + if [ ! -e ~/.pyenv/versions/tracker37 ]; then + pyenv virtualenv 3.7.17 tracker37 + pyenv activate tracker37 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.8.20 + if [ ! -e ~/.pyenv/versions/tracker38 ]; then + pyenv virtualenv 3.8.20 tracker38 + pyenv activate tracker38 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.9.20 + if [ ! -e ~/.pyenv/versions/tracker39 ]; then + pyenv virtualenv 3.9.20 tracker39 + pyenv activate tracker39 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.10.15 + if [ ! -e ~/.pyenv/versions/tracker310 ]; then + pyenv virtualenv 3.10.15 tracker310 + pyenv activate tracker310 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.11.10 + if [ ! -e ~/.pyenv/versions/tracker311 ]; then + pyenv virtualenv 3.11.10 tracker311 + pyenv activate tracker311 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.12.7 + if [ ! -e ~/.pyenv/versions/tracker312 ]; then + pyenv virtualenv 3.12.7 tracker312 + pyenv activate tracker312 + pip install . + pip install -r requirements-test.txt + source deactivate + fi + + # pyenv install 3.13.0 + if [ ! -e ~/.pyenv/versions/tracker313 ]; then + pyenv virtualenv 3.13.0 tracker313 + pyenv activate tracker313 + pip install . + pip install -r requirements-test.txt + source deactivate + fi +} + + +function run_tests { + pyenv activate tracker35 + pytest + source deactivate + + pyenv activate tracker36 + pytest + source deactivate + + pyenv activate tracker37 + pytest + source deactivate + + pyenv activate tracker38 + pytest + source deactivate + + pyenv activate tracker39 + pytest + source deactivate + + pyenv activate tracker310 + pytest + source deactivate + + pyenv activate tracker311 + pytest + source deactivate + + pyenv activate tracker312 + pytest + source deactivate + + pyenv activate tracker313 + pytest + source deactivate +} + +function refresh_deploy { + pyenv uninstall -f tracker35 + pyenv uninstall -f tracker36 + pyenv uninstall -f tracker37 + pyenv uninstall -f tracker38 + pyenv uninstall -f tracker39 + pyenv uninstall -f tracker310 + pyenv uninstall -f tracker311 + pyenv uninstall -f tracker312 + pyenv uninstall -f tracker313 +} + + +case "$1" in + + "deploy") echo "Deploying python environments. This can take few minutes" + deploy + ;; + "test") echo "Running tests" + run_tests + ;; + "refresh") echo "Refreshing python environments" + refresh_deploy + deploy + ;; + *) echo "Unknown subcommand. Specify deploy or test" + exit 1 + ;; -/vagrant/snowplow-python-2.7-tracker-environment/bin/python2.7 -m pytest -s +esac diff --git a/setup.py b/setup.py index b7b0c39b..efaf6536 100644 --- a/setup.py +++ b/setup.py @@ -1,24 +1,19 @@ -""" - setup.py +# +# setup.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. - - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ #!/usr/bin/env python # -*- coding: utf-8 -*- @@ -28,40 +23,55 @@ except ImportError: from distutils.core import setup -import os - -version_file_path = os.path.join( - os.path.dirname(__file__), - 'snowplow_tracker', - '_version.py' - ) -exec(open(version_file_path).read(), {}, locals()) - authors_list = [ - 'Anuj More', - 'Alexander Dean', - 'Fred Blundun' - ] -authors_str = ', '.join(authors_list) + "Anuj More", + "Alexander Dean", + "Fred Blundun", + "Paul Boocock", + "Matus Tomlein", + "Jack Keene", +] +authors_str = ", ".join(authors_list) authors_email_list = [ - 'support@snowplowanalytics.com', - ] -authors_email_str = ', '.join(authors_email_list) + "support@snowplow.io", +] +authors_email_str = ", ".join(authors_email_list) setup( - name='snowplow-tracker', - version=__version__, + name="snowplow-tracker", + version="1.1.0", author=authors_str, author_email=authors_email_str, - packages=['snowplow_tracker', 'snowplow_tracker.test'], - url='http://snowplowanalytics.com', - license='Apache License 2.0', - description='Snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games', - long_description=open('README.rst').read(), - - install_requires = [ - "requests", - "pycontracts", + packages=["snowplow_tracker", "snowplow_tracker.test", "snowplow_tracker.events"], + package_data={"snowplow_tracker": ["py.typed"]}, + url="http://snowplow.io", + license="Apache License 2.0", + description="Snowplow event tracker for Python. Add analytics to your Python and Django apps, webapps and games", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Operating System :: OS Independent", + ], + install_requires=[ + "requests>=2.25.1,<3.0", + "typing_extensions>=3.7.4", ], + extras_require={ + "typing": [ + "mypy>=0.971", + "types-requests>=2.25.1,<3.0", + ], + }, ) diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index e69de29b..689b2539 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -0,0 +1,18 @@ +from snowplow_tracker._version import __version__ +from snowplow_tracker.subject import Subject +from snowplow_tracker.emitters import logger, Emitter, AsyncEmitter +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.tracker import Tracker +from snowplow_tracker.emitter_configuration import EmitterConfiguration +from snowplow_tracker.tracker_configuration import TrackerConfiguration +from snowplow_tracker.snowplow import Snowplow +from snowplow_tracker.contracts import disable_contracts, enable_contracts +from snowplow_tracker.event_store import EventStore +from snowplow_tracker.events import ( + Event, + PageView, + PagePing, + SelfDescribing, + StructuredEvent, + ScreenView, +) diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index 3e58dfc5..f4ff17a0 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -1,24 +1,20 @@ -""" - _version.py +# """ +# _version.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" - - -__version_info__ = (0, 2, 0) +__version_info__ = (1, 1, 0) __version__ = ".".join(str(x) for x in __version_info__) +__build_version__ = __version__ + "" diff --git a/snowplow_tracker/constants.py b/snowplow_tracker/constants.py new file mode 100644 index 00000000..53ecc151 --- /dev/null +++ b/snowplow_tracker/constants.py @@ -0,0 +1,27 @@ +# """ +# constants.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from typing import List +from snowplow_tracker import _version, SelfDescribingJson + +VERSION = "py-%s" % _version.__version__ +DEFAULT_ENCODE_BASE64: bool = True # Type hint required for Python 3.6 MyPy check +BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" +MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" +SCHEMA_TAG = "jsonschema" +CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) +UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) +ContextArray = List[SelfDescribingJson] diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py new file mode 100644 index 00000000..3b17e1a3 --- /dev/null +++ b/snowplow_tracker/contracts.py @@ -0,0 +1,101 @@ +# """ +# contracts.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import traceback +import re +from typing import Any, Dict, Iterable, Callable, Sized +from snowplow_tracker.typing import FORM_TYPES, FORM_NODE_NAMES + +_CONTRACTS_ENABLED = True +_MATCH_FIRST_PARAMETER_REGEX = re.compile(r"\(([\w.]+)[,)]") + + +def disable_contracts() -> None: + global _CONTRACTS_ENABLED + _CONTRACTS_ENABLED = False + + +def enable_contracts() -> None: + global _CONTRACTS_ENABLED + _CONTRACTS_ENABLED = True + + +def contracts_enabled() -> bool: + global _CONTRACTS_ENABLED + return _CONTRACTS_ENABLED + + +def greater_than(value: float, compared_to: float) -> None: + if contracts_enabled() and value <= compared_to: + raise ValueError( + "{0} must be greater than {1}.".format(_get_parameter_name(), compared_to) + ) + + +def non_empty(seq: Sized) -> None: + if contracts_enabled() and len(seq) == 0: + raise ValueError("{0} is empty.".format(_get_parameter_name())) + + +def non_empty_string(s: str) -> None: + if contracts_enabled() and type(s) is not str or not s: + raise ValueError("{0} is empty.".format(_get_parameter_name())) + + +def one_of(value: Any, supported: Iterable) -> None: + if contracts_enabled() and value not in supported: + raise ValueError("{0} is not supported.".format(_get_parameter_name())) + + +def satisfies(value: Any, check: Callable[[Any], bool]) -> None: + if contracts_enabled() and not check(value): + raise ValueError("{0} is not allowed.".format(_get_parameter_name())) + + +def form_element(element: Dict[str, Any]) -> None: + satisfies(element, lambda x: _check_form_element(x)) + + +def _get_parameter_name() -> str: + stack = traceback.extract_stack() + _, _, _, code = stack[-3] + + match = _MATCH_FIRST_PARAMETER_REGEX.search(code) + if not match: + return "Unnamed parameter" + return str(match.groups(0)[0]) + + +def _check_form_element(element: Dict[str, Any]) -> bool: + """ + Helper method to check that dictionary conforms element + in sumbit_form and change_form schemas + """ + all_present = ( + isinstance(element, dict) + and "name" in element + and "value" in element + and "nodeName" in element + ) + try: + if element["type"] in FORM_TYPES: + type_valid = True + else: + type_valid = False + except KeyError: + type_valid = True + return all_present and element["nodeName"] in FORM_NODE_NAMES and type_valid diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py new file mode 100644 index 00000000..82626fa4 --- /dev/null +++ b/snowplow_tracker/emitter_configuration.py @@ -0,0 +1,213 @@ +# """ +# emitter_configuration.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional, Union, Tuple, Dict +from snowplow_tracker.typing import SuccessCallback, FailureCallback +from snowplow_tracker.event_store import EventStore +import requests + + +class EmitterConfiguration(object): + def __init__( + self, + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + Configuration for the emitter that sends events to the Snowplow collector. + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None + :param session: Persist parameters across requests by using a session object + :type session: request.Session | None + """ + + self.batch_size = batch_size + self.on_success = on_success + self.on_failure = on_failure + self.byte_limit = byte_limit + self.request_timeout = request_timeout + self.buffer_capacity = buffer_capacity + self.custom_retry_codes = custom_retry_codes + self.event_store = event_store + self.session = session + + @property + def batch_size(self) -> Optional[int]: + """ + The maximum number of queued events before the buffer is flushed. Default is 10. + """ + return self._batch_size + + @batch_size.setter + def batch_size(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("batch_size must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("batch_size must be of type int") + self._batch_size = value + + @property + def on_success(self) -> Optional[SuccessCallback]: + """ + Callback executed after every HTTP request in a flush has status code 200. Gets passed the number of events flushed. + """ + return self._on_success + + @on_success.setter + def on_success(self, value: Optional[SuccessCallback]): + self._on_success = value + + @property + def on_failure(self) -> Optional[FailureCallback]: + """ + Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + """ + return self._on_failure + + @on_failure.setter + def on_failure(self, value: Optional[FailureCallback]): + self._on_failure = value + + @property + def byte_limit(self) -> Optional[int]: + """ + The size event list after reaching which queued events will be flushed + """ + return self._byte_limit + + @byte_limit.setter + def byte_limit(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("byte_limit must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("byte_limit must be of type int") + self._byte_limit = value + + @property + def request_timeout(self) -> Optional[Union[float, Tuple[float, float]]]: + """ + Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + """ + return self._request_timeout + + @request_timeout.setter + def request_timeout(self, value: Optional[Union[float, Tuple[float, float]]]): + self._request_timeout = value + + @property + def buffer_capacity(self) -> Optional[int]: + """ + The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + When the buffer is full new events are lost. + """ + return self._buffer_capacity + + @buffer_capacity.setter + def buffer_capacity(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("buffer_capacity must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("buffer_capacity must be of type int") + self._buffer_capacity = value + + @property + def custom_retry_codes(self) -> Dict[int, bool]: + """ + Custom retry rules for HTTP status codes received in emit responses from the Collector. + """ + return self._custom_retry_codes + + @custom_retry_codes.setter + def custom_retry_codes(self, value: Dict[int, bool]): + self._custom_retry_codes = value + + def set_retry_code(self, status_code: int, retry=True) -> bool: + """ + Add a retry rule for HTTP status code received from emit responses from the Collector. + :param status_code: HTTP response code + :type status_code: int + :param retry: Set the status_code to retry (True) or not retry (False). Default is True + :type retry: bool + """ + if not isinstance(status_code, int): + print("status_code must be of type int") + return False + + if not isinstance(retry, bool): + print("retry must be of type bool") + return False + + if 200 <= status_code < 300: + print( + "custom_retry_codes should not include codes for succesful requests (2XX codes)" + ) + return False + + self.custom_retry_codes[status_code] = retry + + return status_code in self.custom_retry_codes.keys() + + @property + def event_store(self) -> Optional[EventStore]: + return self._event_store + + @event_store.setter + def event_store(self, value: Optional[EventStore]): + self._event_store = value + + @property + def session(self) -> Optional[requests.Session]: + """ + Persist parameters across requests using a requests.Session object + """ + return self._session + + @session.setter + def session(self, value: Optional[requests.Session]): + self._session = value diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py new file mode 100644 index 00000000..72f451bf --- /dev/null +++ b/snowplow_tracker/emitters.py @@ -0,0 +1,594 @@ +# """ +# emitters.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import logging +import time +import threading +import requests +import random +from typing import Optional, Union, Tuple, Dict, cast, Callable +from queue import Queue + +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.typing import ( + PayloadDict, + PayloadDictList, + HttpProtocol, + Method, + SuccessCallback, + FailureCallback, + EmitterProtocol, +) +from snowplow_tracker.contracts import one_of +from snowplow_tracker.event_store import EventStore, InMemoryEventStore + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +DEFAULT_MAX_LENGTH = 10 +PAYLOAD_DATA_SCHEMA = ( + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" +) +PROTOCOLS = {"http", "https"} +METHODS = {"get", "post"} + + +# Unifes the two request methods under one interface +class Requester: + post: Callable + get: Callable + + def __init__(self, post: Callable, get: Callable): + # 3.6 MyPy compatibility: + # error: Cannot assign to a method + # https://github.com/python/mypy/issues/2427 + setattr(self, "post", post) + setattr(self, "get", get) + + +class Emitter(EmitterProtocol): + """ + Synchronously send Snowplow events to a Snowplow collector + Supports both GET and POST requests + """ + + def __init__( + self, + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to https. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method. Defaults to post. + :type method: method + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity: int + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None + :param session: Persist parameters across requests by using a session object + :type session: requests.Session | None + """ + one_of(protocol, PROTOCOLS) + one_of(method, METHODS) + + self.endpoint = Emitter.as_collector_uri(endpoint, protocol, port, method) + + self.method = method + + if event_store is None: + if buffer_capacity is None: + event_store = InMemoryEventStore(logger=logger) + else: + event_store = InMemoryEventStore( + buffer_capacity=buffer_capacity, logger=logger + ) + + self.event_store = event_store + + if batch_size is None: + if method == "post": + batch_size = DEFAULT_MAX_LENGTH + else: + batch_size = 1 + + if buffer_capacity is not None and batch_size > buffer_capacity: + batch_size = buffer_capacity + + self.batch_size = batch_size + self.byte_limit = byte_limit + self.bytes_queued = None if byte_limit is None else 0 + self.request_timeout = request_timeout + + self.on_success = on_success + self.on_failure = on_failure + + self.lock = threading.RLock() + + self.timer = FlushTimer(emitter=self, repeating=True) + self.retry_timer = FlushTimer(emitter=self, repeating=False) + + self.max_retry_delay_seconds = max_retry_delay_seconds + self.retry_delay: Union[int, float] = 0 + + self.custom_retry_codes = custom_retry_codes + logger.info("Emitter initialized with endpoint " + self.endpoint) + + if session is None: + self.request_method = Requester(post=requests.post, get=requests.get) + else: + self.request_method = Requester(post=session.post, get=session.get) + + @staticmethod + def as_collector_uri( + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + ) -> str: + """ + :param endpoint: The raw endpoint provided by the user + :type endpoint: string + :param protocol: The protocol to use - http or https + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: Either `get` or `post` HTTP method + :type method: method + :rtype: string + """ + if len(endpoint) < 1: + raise ValueError("No endpoint provided.") + + endpoint = endpoint.rstrip("/") + + if endpoint.split("://")[0] in PROTOCOLS: + endpoint_arr = endpoint.split("://") + protocol = cast(HttpProtocol, endpoint_arr[0]) + endpoint = endpoint_arr[1] + + if method == "get": + path = "/i" + else: + path = "/com.snowplowanalytics.snowplow/tp2" + if port is None: + return protocol + "://" + endpoint + path + else: + return protocol + "://" + endpoint + ":" + str(port) + path + + def input(self, payload: PayloadDict) -> None: + """ + Adds an event to the buffer. + If the maximum size has been reached, flushes the buffer. + + :param payload: The name-value pairs for the event + :type payload: dict(string:\\*) + """ + with self.lock: + if self.bytes_queued is not None: + self.bytes_queued += len(str(payload)) + + if self.method == "post": + self.event_store.add_event({key: str(payload[key]) for key in payload}) + else: + self.event_store.add_event(payload) + + if self.reached_limit(): + self.flush() + + def reached_limit(self) -> bool: + """ + Checks if event-size or bytes limit are reached + + :rtype: bool + """ + if self.byte_limit is None: + return self.event_store.size() >= self.batch_size + else: + return ( + self.bytes_queued or 0 + ) >= self.byte_limit or self.event_store.size() >= self.batch_size + + def flush(self) -> None: + """ + Sends all events in the buffer to the collector. + """ + with self.lock: + if self.retry_timer.is_active(): + return + send_events = self.event_store.get_events_batch() + self.send_events(send_events) + if self.bytes_queued is not None: + self.bytes_queued = 0 + + def http_post(self, data: str) -> int: + """ + :param data: The array of JSONs to be sent + :type data: string + """ + logger.info("Sending POST request to %s..." % self.endpoint) + logger.debug("Payload: %s" % data) + try: + r = self.request_method.post( + self.endpoint, + data=data, + headers={"Content-Type": "application/json; charset=utf-8"}, + timeout=self.request_timeout, + ) + except requests.RequestException as e: + logger.warning(e) + return -1 + + return r.status_code + + def http_get(self, payload: PayloadDict) -> int: + """ + :param payload: The event properties + :type payload: dict(string:\\*) + """ + logger.info("Sending GET request to %s..." % self.endpoint) + logger.debug("Payload: %s" % payload) + try: + r = self.request_method.get( + self.endpoint, params=payload, timeout=self.request_timeout + ) + except requests.RequestException as e: + logger.warning(e) + return -1 + + return r.status_code + + def sync_flush(self) -> None: + """ + Calls the flush method of the base Emitter class. + This is guaranteed to be blocking, not asynchronous. + """ + logger.debug("Starting synchronous flush...") + self.flush() + logger.info("Finished synchronous flush") + + @staticmethod + def is_good_status_code(status_code: int) -> bool: + """ + :param status_code: HTTP status code + :type status_code: int + :rtype: bool + """ + return 200 <= status_code < 300 + + def send_events(self, evts: PayloadDictList) -> None: + """ + :param evts: Array of events to be sent + :type evts: list(dict(string:\\*)) + """ + if len(evts) > 0: + logger.info("Attempting to send %s events" % len(evts)) + + Emitter.attach_sent_timestamp(evts) + success_events = [] + failure_events = [] + + if self.method == "post": + data = SelfDescribingJson(PAYLOAD_DATA_SCHEMA, evts).to_string() + status_code = self.http_post(data) + request_succeeded = Emitter.is_good_status_code(status_code) + if request_succeeded: + success_events += evts + else: + failure_events += evts + + elif self.method == "get": + for evt in evts: + status_code = self.http_get(evt) + request_succeeded = Emitter.is_good_status_code(status_code) + + if request_succeeded: + success_events += [evt] + else: + failure_events += [evt] + + if self.on_success is not None and len(success_events) > 0: + self.on_success(success_events) + if self.on_failure is not None and len(failure_events) > 0: + self.on_failure(len(success_events), failure_events) + + if self._should_retry(status_code): + self._set_retry_delay() + self._retry_failed_events(failure_events) + else: + self.event_store.cleanup(success_events, False) + self._reset_retry_delay() + else: + logger.info("Skipping flush since buffer is empty") + + def _set_retry_timer(self, timeout: float) -> None: + """ + Set an interval at which failed events will be retried + + :param timeout: interval in seconds + :type timeout: int | float + """ + self.retry_timer.start(timeout=timeout) + + def set_flush_timer(self, timeout: float) -> None: + """ + Set an interval at which the buffer will be flushed + :param timeout: interval in seconds + :type timeout: int | float + """ + self.timer.start(timeout=timeout) + + def cancel_flush_timer(self) -> None: + """ + Abort automatic async flushing + """ + self.timer.cancel() + + @staticmethod + def attach_sent_timestamp(events: PayloadDictList) -> None: + """ + Attach (by mutating in-place) current timestamp in milliseconds + as `stm` param + + :param events: Array of events to be sent + :type events: list(dict(string:\\*)) + :rtype: None + """ + + def update(e: PayloadDict) -> None: + e.update({"stm": str(int(time.time()) * 1000)}) + + for event in events: + update(event) + + def _should_retry(self, status_code: int) -> bool: + """ + Checks if a request should be retried + + :param status_code: Response status code + :type status_code: int + :rtype: bool + """ + if Emitter.is_good_status_code(status_code): + return False + + if status_code in self.custom_retry_codes.keys(): + return self.custom_retry_codes[status_code] + + return status_code not in [400, 401, 403, 410, 422] + + def _set_retry_delay(self) -> None: + """ + Sets a delay to retry failed events + """ + random_noise = random.random() + self.retry_delay = min( + self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds + ) + + def _reset_retry_delay(self) -> None: + """ + Resets retry delay to 0 + """ + self.retry_delay = 0 + + def _retry_failed_events(self, failed_events) -> None: + """ + Adds failed events back to the buffer to retry + + :param failed_events: List of failed events + :type List + """ + self.event_store.cleanup(failed_events, True) + self._set_retry_timer(self.retry_delay) + + def _cancel_retry_timer(self) -> None: + """ + Cancels a retry timer + """ + self.retry_timer.cancel() + + # This is only here to satisfy the `EmitterProtocol` interface + def async_flush(self) -> None: + return + + +class AsyncEmitter(Emitter): + """ + Uses threads to send HTTP requests asynchronously + """ + + def __init__( + self, + endpoint: str, + protocol: HttpProtocol = "http", + port: Optional[int] = None, + method: Method = "post", + batch_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + thread_count: int = 1, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, + session: Optional[requests.Session] = None, + ) -> None: + """ + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to http. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method + :type method: method + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed one argument, an array of dictionaries corresponding to the sent events' payloads + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param thread_count: Number of worker threads to use for HTTP requests + :type thread_count: int + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity: int + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore + :param session: Persist parameters across requests by using a session object + :type session: requests.Session | None + """ + super(AsyncEmitter, self).__init__( + endpoint=endpoint, + protocol=protocol, + port=port, + method=method, + batch_size=batch_size, + on_success=on_success, + on_failure=on_failure, + byte_limit=byte_limit, + request_timeout=request_timeout, + max_retry_delay_seconds=max_retry_delay_seconds, + buffer_capacity=buffer_capacity, + custom_retry_codes=custom_retry_codes, + event_store=event_store, + session=session, + ) + self.queue: Queue = Queue() + for i in range(thread_count): + t = threading.Thread(target=self.consume) + t.daemon = True + t.start() + + def sync_flush(self) -> None: + while True: + self.flush() + self.queue.join() + if self.event_store.size() < 1: + break + + def flush(self) -> None: + """ + Removes all dead threads, then creates a new thread which + executes the flush method of the base Emitter class + """ + with self.lock: + self.queue.put(self.event_store.get_events_batch()) + if self.bytes_queued is not None: + self.bytes_queued = 0 + + def consume(self) -> None: + while True: + evts = self.queue.get() + self.send_events(evts) + self.queue.task_done() + + +class FlushTimer(object): + """ + Internal class used by the Emitter to schedule flush calls for later. + """ + + def __init__(self, emitter: Emitter, repeating: bool): + self.emitter = emitter + self.repeating = repeating + self.timer: Optional[threading.Timer] = None + self.lock = threading.RLock() + + def start(self, timeout: float) -> bool: + with self.lock: + if self.timer is not None: + return False + else: + self._schedule_timer(timeout=timeout) + return True + + def cancel(self) -> None: + with self.lock: + if self.timer is not None: + self.timer.cancel() + self.timer = None + + def is_active(self) -> bool: + with self.lock: + return self.timer is not None + + def _fire(self, timeout: float) -> None: + with self.lock: + if self.repeating: + self._schedule_timer(timeout) + else: + self.timer = None + + self.emitter.flush() + + def _schedule_timer(self, timeout: float) -> None: + self.timer = threading.Timer(timeout, self._fire, [timeout]) + self.timer.daemon = True + self.timer.start() diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py new file mode 100644 index 00000000..b8d13028 --- /dev/null +++ b/snowplow_tracker/event_store.py @@ -0,0 +1,139 @@ +# """ +# event_store.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import List +from typing_extensions import Protocol +from snowplow_tracker.typing import PayloadDict, PayloadDictList +from logging import Logger + + +class EventStore(Protocol): + """ + EventStore protocol. For buffering events in the Emitter. + """ + + def add_event(self, payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. Returns True if successful. + + :param payload: The payload to add + :type payload: PayloadDict + :rtype bool + """ + ... + + def get_events_batch(self) -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the buffer. + + :rtype PayloadDictList + """ + ... + + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the event store. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + ... + + def size(self) -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + ... + + +class InMemoryEventStore(EventStore): + """ + Create a InMemoryEventStore object with custom buffer capacity. The default is 10,000 events. + """ + + def __init__(self, logger: Logger, buffer_capacity: int = 10000) -> None: + """ + :param logger: Logging module + :type logger: Logger + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity int + """ + self.event_buffer: List[PayloadDict] = [] + self.buffer_capacity = buffer_capacity + self.logger = logger + + def add_event(self, payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. + + :param payload: The payload to add + :type payload: PayloadDict + """ + if self._buffer_capacity_reached(): + self.logger.error("Event buffer is full, dropping event.") + return False + + self.event_buffer.append(payload) + return True + + def get_events_batch(self) -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the in the buffer. + + :rtype PayloadDictList + """ + batch = self.event_buffer + self.event_buffer = [] + return batch + + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the InMemoryEventStore buffer. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + if not need_retry: + return + + for event in batch: + if not event in self.event_buffer: + if not self.add_event(event): + return + + def size(self) -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + return len(self.event_buffer) + + def _buffer_capacity_reached(self) -> bool: + """ + Returns true if buffer capacity is reached + + :rtype: bool + """ + return self.size() >= self.buffer_capacity diff --git a/snowplow_tracker/events/CLAUDE.md b/snowplow_tracker/events/CLAUDE.md new file mode 100644 index 00000000..efc0f5ab --- /dev/null +++ b/snowplow_tracker/events/CLAUDE.md @@ -0,0 +1,284 @@ +# Snowplow Event Types - CLAUDE.md + +## Directory Overview + +The `events/` directory contains all event type implementations for the Snowplow Python Tracker. Each event class represents a specific type of analytics event that can be sent to Snowplow collectors. All events inherit from the base `Event` class and follow a consistent pattern for construction, validation, and payload generation. + +## Event Class Hierarchy + +``` +Event (base class) +├── PageView # Web page view tracking +├── PagePing # Page engagement/heartbeat +├── ScreenView # Mobile/app screen views +├── StructuredEvent # Generic 5-parameter events +└── SelfDescribing # Custom schema events +``` + +## Core Event Patterns + +### Event Construction Pattern +```python +# ✅ Use keyword arguments for clarity +event = PageView( + page_url="https://example.com", + page_title="Homepage", + referrer="https://google.com" +) + +# ❌ Don't use positional arguments +event = PageView("https://example.com", "Homepage") +``` + +### Event Context Pattern +```python +# ✅ Add contexts as SelfDescribingJson list +geo_context = SelfDescribingJson( + "iglu:com.acme/geolocation/jsonschema/1-0-0", + {"latitude": 40.0, "longitude": -73.0} +) +event = PageView(page_url="...", context=[geo_context]) + +# ❌ Don't use raw dictionaries for context +event.context = [{"latitude": 40.0}] # Missing schema! +``` + +### Event Subject Override Pattern +```python +# ✅ Override tracker subject for specific event +special_subject = Subject() +special_subject.set_user_id("anonymous_user") +event = StructuredEvent( + category="shop", + action="view", + event_subject=special_subject +) + +# ❌ Don't modify shared subject +tracker.subject.set_user_id("temp") # Affects all events +``` + +### True Timestamp Pattern +```python +# ✅ Use milliseconds for true_timestamp +import time +timestamp_ms = time.time() * 1000 +event = PageView( + page_url="...", + true_timestamp=timestamp_ms +) + +# ❌ Don't use seconds +event = PageView(true_timestamp=time.time()) +``` + +## Event-Specific Patterns + +### PageView Events +```python +# ✅ Complete PageView with all fields +page_view = PageView( + page_url="https://example.com/products", + page_title="Products", + referrer="https://example.com/home" +) + +# ❌ Missing required page_url +page_view = PageView(page_title="Products") +``` + +### StructuredEvent Pattern +```python +# ✅ Use descriptive category/action pairs +event = StructuredEvent( + category="ecommerce", + action="add-to-cart", + label="SKU-123", + property_="size:XL", + value=29.99 +) + +# ❌ Generic naming loses meaning +event = StructuredEvent("event", "click") +``` + +### SelfDescribing Events +```python +# ✅ Custom events with Iglu schemas +purchase_event = SelfDescribing( + SelfDescribingJson( + "iglu:com.acme/purchase/jsonschema/2-0-0", + { + "orderId": "ORD-123", + "total": 99.99, + "currency": "USD" + } + ) +) + +# ❌ Missing schema version +event = SelfDescribing( + SelfDescribingJson("iglu:com.acme/purchase", {...}) +) +``` + +### ScreenView Pattern (Mobile) +```python +# ✅ Mobile screen tracking with ID +screen = ScreenView( + name="ProductDetailScreen", + id_="screen-456", + previous_name="ProductListScreen" +) + +# ❌ Using PageView for mobile apps +page = PageView(page_url="app://product-detail") +``` + +## Event Validation Rules + +### Required Fields by Event Type +- **PageView**: `page_url` (required), `page_title`, `referrer` +- **StructuredEvent**: `category`, `action` (required), `label`, `property_`, `value` +- **SelfDescribing**: `event_json` (SelfDescribingJson required) +- **ScreenView**: `name` or `id_` (at least one required) +- **PagePing**: `page_url` (required) + +### Schema Validation Pattern +```python +# ✅ Validate schema format +SCHEMA_PATTERN = r"^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/" +SCHEMA_PATTERN += r"[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$" + +# ❌ Invalid schema formats +"iglu:com.acme/event" # Missing version +"com.acme/event/1-0-0" # Missing iglu: prefix +``` + +## Payload Building Pattern + +### Internal Payload Construction +```python +# ✅ Event classes handle payload internally +def build_payload(self, encode_base64, json_encoder, subject): + # Add event-specific fields + self.payload.add("e", "pv") # Page view type + self.payload.add("url", self.page_url) + + # Let base class handle common fields + return super().build_payload(encode_base64, json_encoder, subject) + +# ❌ Don't expose payload building to users +event.payload = Payload() +event.payload.add("custom", "field") +``` + +## Testing Event Classes + +### Unit Test Pattern +```python +# ✅ Test event construction and validation +def test_page_view_required_fields(): + with self.assertRaises(TypeError): + PageView() # Missing required page_url + + event = PageView(page_url="https://test.com") + assert event.page_url == "https://test.com" + +# ✅ Test payload generation +def test_event_payload(): + event = PageView(page_url="https://test.com") + payload = event.build_payload(False, None, None) + assert payload.get()["url"] == "https://test.com" +``` + +### Context Testing Pattern +```python +# ✅ Test context attachment +def test_event_context(): + context = SelfDescribingJson(schema, data) + event = PageView(page_url="...", context=[context]) + + payload = event.build_payload(True, None, None) + assert "cx" in payload.get() # Base64 context +``` + +## Common Event Pitfalls + +### Timestamp Confusion +```python +# ❌ Mixing timestamp types +event.true_timestamp = "2024-01-01" # String not allowed +event.true_timestamp = datetime.now() # Use milliseconds + +# ✅ Consistent millisecond timestamps +event.true_timestamp = int(time.time() * 1000) +``` + +### Context Array Management +```python +# ❌ Modifying context after creation +event.context.append(new_context) # Unexpected behavior + +# ✅ Set complete context at creation +all_contexts = [context1, context2] +event = PageView(page_url="...", context=all_contexts) +``` + +### Schema Version Control +```python +# ❌ Hardcoding schema versions +schema = "iglu:com.acme/event/jsonschema/1-0-0" + +# ✅ Centralize schema definitions +PURCHASE_SCHEMA = "iglu:com.acme/purchase/jsonschema/2-1-0" +event = SelfDescribing(SelfDescribingJson(PURCHASE_SCHEMA, data)) +``` + +## Event Migration Guide + +### Upgrading Event Schemas +```python +# From version 1-0-0 to 2-0-0 +# ✅ Handle backward compatibility +def create_purchase_event(data): + if "items" in data: # New schema + schema = "iglu:.../purchase/jsonschema/2-0-0" + else: # Old schema + schema = "iglu:.../purchase/jsonschema/1-0-0" + + return SelfDescribing(SelfDescribingJson(schema, data)) +``` + +## Quick Reference + +### Event Type Selection +- **PageView**: Traditional web page tracking +- **ScreenView**: Mobile app screen tracking +- **StructuredEvent**: Generic business events +- **SelfDescribing**: Complex custom events +- **PagePing**: Engagement/time-on-page tracking + +### Event Field Checklist +- [ ] Required fields provided +- [ ] Timestamps in milliseconds +- [ ] Contexts as SelfDescribingJson array +- [ ] Valid Iglu schema format +- [ ] Event-specific subject if needed + +### Common Event Methods +- `build_payload()`: Internal payload generation +- `event_subject`: Per-event user context +- `context`: Custom context array +- `true_timestamp`: User-defined timestamp + +## Contributing to events/CLAUDE.md + +When modifying event implementations or adding new event types: + +1. **Follow the Event base class pattern** - All events must inherit from Event +2. **Implement required abstract methods** - Ensure payload building works correctly +3. **Document required fields** - Update this file with new event requirements +4. **Add comprehensive tests** - Test construction, validation, and payload generation +5. **Maintain backward compatibility** - Don't break existing event APIs +6. **Update schema constants** - Add new schemas to constants.py if needed \ No newline at end of file diff --git a/snowplow_tracker/events/__init__.py b/snowplow_tracker/events/__init__.py new file mode 100644 index 00000000..0f75c84f --- /dev/null +++ b/snowplow_tracker/events/__init__.py @@ -0,0 +1,22 @@ +# """ +# __init__.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from snowplow_tracker.events.page_ping import PagePing +from snowplow_tracker.events.page_view import PageView +from snowplow_tracker.events.self_describing import SelfDescribing +from snowplow_tracker.events.structured_event import StructuredEvent +from snowplow_tracker.events.screen_view import ScreenView diff --git a/snowplow_tracker/events/event.py b/snowplow_tracker/events/event.py new file mode 100644 index 00000000..fb300b87 --- /dev/null +++ b/snowplow_tracker/events/event.py @@ -0,0 +1,136 @@ +# """ +# event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional, List +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject + +from snowplow_tracker.self_describing_json import SelfDescribingJson + +from snowplow_tracker.constants import CONTEXT_SCHEMA +from snowplow_tracker.typing import JsonEncoderFunction, PayloadDict + + +class Event(object): + """ + Event class which contains + elements that can be set in all events. These are context, trueTimestamp, and Subject. + + Context is a list of custom SelfDescribingJson entities. + TrueTimestamp is a user-defined timestamp. + Subject is an event-specific Subject. Its fields will override those of the + Tracker-associated Subject, if present. + + """ + + def __init__( + self, + dict_: Optional[PayloadDict] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + Constructor + :param dict_: Optional Dictionary to be added to the Events Payload + :type dict_: dict(string:\\*) | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + + """ + self.payload = payload.Payload(dict_=dict_) + self.event_subject = event_subject + self.context = context or [] + self.true_timestamp = true_timestamp + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + if len(self.context) > 0: + context_jsons = list(map(lambda c: c.to_json(), self.context)) + context_envelope = SelfDescribingJson( + CONTEXT_SCHEMA, context_jsons + ).to_json() + self.payload.add_json( + context_envelope, encode_base64, "cx", "co", json_encoder + ) + + if isinstance( + self.true_timestamp, + ( + int, + float, + ), + ): + self.payload.add("ttm", int(self.true_timestamp)) + + if self.event_subject is not None: + fin_payload_dict = self.event_subject.combine_subject(subject) + else: + fin_payload_dict = {} if subject is None else subject.standard_nv_pairs + + self.payload.add_dict(fin_payload_dict) + return self.payload + + @property + def event_subject(self) -> Optional[Subject]: + """ + Optional per event subject + """ + return self._event_subject + + @event_subject.setter + def event_subject(self, value: Optional[Subject]): + self._event_subject = value + + @property + def context(self) -> List[SelfDescribingJson]: + """ + Custom context for the event + """ + return self._context + + @context.setter + def context(self, value: List[SelfDescribingJson]): + self._context = value + + @property + def true_timestamp(self) -> Optional[float]: + """ + Optional event timestamp in milliseconds + """ + return self._true_timestamp + + @true_timestamp.setter + def true_timestamp(self, value: Optional[float]): + self._true_timestamp = value diff --git a/snowplow_tracker/events/page_ping.py b/snowplow_tracker/events/page_ping.py new file mode 100644 index 00000000..43bbb210 --- /dev/null +++ b/snowplow_tracker/events/page_ping.py @@ -0,0 +1,155 @@ +# """ +# page_ping.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string + + +class PagePing(Event): + """ + Constructs a PagePing event object. + + When tracked, generates a "pp" or "page_ping" event. + + """ + + def __init__( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(PagePing, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "pp") + self.page_url = page_url + self.page_title = page_title + self.referrer = referrer + self.min_x = min_x + self.max_x = max_x + self.min_y = min_y + self.max_y = max_y + + @property + def page_url(self) -> str: + """ + URL of the viewed page + """ + return self.payload.nv_pairs["url"] + + @page_url.setter + def page_url(self, value: str): + non_empty_string(value) + self.payload.add("url", value) + + @property + def page_title(self) -> Optional[str]: + """ + URL of the viewed page + """ + return self.payload.nv_pairs.get("page") + + @page_title.setter + def page_title(self, value: Optional[str]): + self.payload.add("page", value) + + @property + def referrer(self) -> Optional[str]: + """ + The referrer of the page + """ + return self.payload.nv_pairs.get("refr") + + @referrer.setter + def referrer(self, value: Optional[str]): + self.payload.add("refr", value) + + @property + def min_x(self) -> Optional[int]: + """ + Minimum page x offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_mix") + + @min_x.setter + def min_x(self, value: Optional[int]): + self.payload.add("pp_mix", value) + + @property + def max_x(self) -> Optional[int]: + """ + Maximum page x offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_max") + + @max_x.setter + def max_x(self, value: Optional[int]): + self.payload.add("pp_max", value) + + @property + def min_y(self) -> Optional[int]: + """ + Minimum page y offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_miy") + + @min_y.setter + def min_y(self, value: Optional[int]): + self.payload.add("pp_miy", value) + + @property + def max_y(self) -> Optional[int]: + """ + Maximum page y offset seen in the last ping period + """ + return self.payload.nv_pairs.get("pp_may") + + @max_y.setter + def max_y(self, value: Optional[int]): + self.payload.add("pp_may", value) diff --git a/snowplow_tracker/events/page_view.py b/snowplow_tracker/events/page_view.py new file mode 100644 index 00000000..53e44bb6 --- /dev/null +++ b/snowplow_tracker/events/page_view.py @@ -0,0 +1,95 @@ +# """ +# page_view.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.contracts import non_empty_string + + +class PageView(Event): + """ + Constructs a PageView event object. + + When tracked, generates a "pv" or "page_view" event. + + """ + + def __init__( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(PageView, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "pv") + self.page_url = page_url + self.page_title = page_title + self.referrer = referrer + + @property + def page_url(self) -> str: + """ + URL of the viewed page + """ + return self.payload.nv_pairs["url"] + + @page_url.setter + def page_url(self, value: str): + non_empty_string(value) + self.payload.add("url", value) + + @property + def page_title(self) -> Optional[str]: + """ + Title of the viewed page + """ + return self.payload.nv_pairs.get("page") + + @page_title.setter + def page_title(self, value: Optional[str]): + self.payload.add("page", value) + + @property + def referrer(self) -> Optional[str]: + """ + The referrer of the page + """ + return self.payload.nv_pairs.get("refr") + + @referrer.setter + def referrer(self, value: Optional[str]): + self.payload.add("refr", value) diff --git a/snowplow_tracker/events/screen_view.py b/snowplow_tracker/events/screen_view.py new file mode 100644 index 00000000..6b4af927 --- /dev/null +++ b/snowplow_tracker/events/screen_view.py @@ -0,0 +1,199 @@ +# """ +# screen_view.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Dict, Optional, List +from snowplow_tracker.typing import JsonEncoderFunction +from snowplow_tracker.events.event import Event +from snowplow_tracker.events.self_describing import SelfDescribing +from snowplow_tracker import SelfDescribingJson +from snowplow_tracker.constants import ( + MOBILE_SCHEMA_PATH, + SCHEMA_TAG, +) +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string + + +class ScreenView(Event): + """ + Constructs a ScreenView event object. + + When tracked, generates a SelfDescribing event (event type "ue"). + + Schema: `iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0` + """ + + def __init__( + self, + id_: str, + name: str, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param id_: Screen view ID. This must be of type UUID. + :type id_: string + :param name: The name of the screen view event + :type name: string + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(ScreenView, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.screen_view_properties: Dict[str, str] = {} + self.id_ = id_ + self.name = name + self.type = type + self.previous_name = previous_name + self.previous_id = previous_id + self.previous_type = previous_type + self.transition_type = transition_type + + @property + def id_(self) -> str: + """ + Screen view ID. This must be of type UUID. + """ + return self.screen_view_properties["id"] + + @id_.setter + def id_(self, value: str): + non_empty_string(value) + self.screen_view_properties["id"] = value + + @property + def name(self) -> str: + """ + The name of the screen view event + """ + return self.screen_view_properties["name"] + + @name.setter + def name(self, value: str): + non_empty_string(value) + self.screen_view_properties["name"] = value + + @property + def type(self) -> Optional[str]: + """ + The type of screen that was viewed e.g feed / carousel + """ + return self.screen_view_properties["type"] + + @type.setter + def type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["type"] = value + + @property + def previous_name(self) -> Optional[str]: + """ + The name of the previous screen. + """ + return self.screen_view_properties["previousName"] + + @previous_name.setter + def previous_name(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousName"] = value + + @property + def previous_id(self) -> Optional[str]: + """ + The screenview ID of the previous screenview. + """ + return self.screen_view_properties["previousId"] + + @previous_id.setter + def previous_id(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousId"] = value + + @property + def previous_type(self) -> Optional[str]: + """ + The screen type of the previous screenview + """ + return self.screen_view_properties["previousType"] + + @previous_type.setter + def previous_type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["previousType"] = value + + @property + def transition_type(self) -> Optional[str]: + """ + The type of transition that led to the screen being viewed + """ + return self.screen_view_properties["transitionType"] + + @transition_type.setter + def transition_type(self, value: Optional[str]): + if value is not None: + self.screen_view_properties["transitionType"] = value + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (MOBILE_SCHEMA_PATH, SCHEMA_TAG), + self.screen_view_properties, + ) + self_describing = SelfDescribing( + event_json=event_json, + event_subject=self.event_subject, + context=self.context, + true_timestamp=self.true_timestamp, + ) + return self_describing.build_payload( + encode_base64, json_encoder, subject=subject + ) diff --git a/snowplow_tracker/events/self_describing.py b/snowplow_tracker/events/self_describing.py new file mode 100644 index 00000000..e560eb72 --- /dev/null +++ b/snowplow_tracker/events/self_describing.py @@ -0,0 +1,98 @@ +# """ +# self_describing.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from typing import Optional, List +from snowplow_tracker.typing import JsonEncoderFunction +from snowplow_tracker.events.event import Event +from snowplow_tracker import SelfDescribingJson +from snowplow_tracker.constants import UNSTRUCT_EVENT_SCHEMA +from snowplow_tracker import payload +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty + + +class SelfDescribing(Event): + """ + Constructs a SelfDescribing event object. + + This is a customisable event type which allows you to track anything describable + by a JsonSchema. + + When tracked, generates a self-describing event (event type "ue"). + """ + + def __init__( + self, + event_json: SelfDescribingJson, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(SelfDescribing, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "ue") + self.event_json = event_json + + @property + def event_json(self) -> SelfDescribingJson: + """ + The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + """ + return self._event_json + + @event_json.setter + def event_json(self, value: SelfDescribingJson): + self._event_json = value + + def build_payload( + self, + encode_base64: bool, + json_encoder: Optional[JsonEncoderFunction], + subject: Optional[Subject] = None, + ) -> "payload.Payload": + """ + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + :param subject: Optional per event subject + :type subject: subject | None + :rtype: payload.Payload + """ + + envelope = SelfDescribingJson( + UNSTRUCT_EVENT_SCHEMA, self.event_json.to_json() + ).to_json() + self.payload.add_json(envelope, encode_base64, "ue_px", "ue_pr", json_encoder) + + return super(SelfDescribing, self).build_payload( + encode_base64=encode_base64, json_encoder=json_encoder, subject=subject + ) diff --git a/snowplow_tracker/events/structured_event.py b/snowplow_tracker/events/structured_event.py new file mode 100644 index 00000000..23abafa8 --- /dev/null +++ b/snowplow_tracker/events/structured_event.py @@ -0,0 +1,134 @@ +# """ +# struct_event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +from snowplow_tracker.events.event import Event +from typing import Optional, List, Union +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.contracts import non_empty_string + + +class StructuredEvent(Event): + """ + Constructs a Structured event object. + + This event type is provided to be roughly equivalent to Google Analytics-style events. + Note that it is not automatically clear what data should be placed in what field. + To aid data quality and modeling, agree on business-wide definitions when designing + your tracking strategy. + + We recommend using SelfDescribing - fully custom - events instead. + + When tracked, generates a "struct" or "se" event. + """ + + def __init__( + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[Union[int, float]] = None, + event_subject: Optional[Subject] = None, + context: Optional[List[SelfDescribingJson]] = None, + true_timestamp: Optional[float] = None, + ) -> None: + """ + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :param context: Custom context for the event + :type context: context_array | None + :param true_timestamp: Optional event timestamp in milliseconds + :type true_timestamp: int | float | None + """ + super(StructuredEvent, self).__init__( + event_subject=event_subject, context=context, true_timestamp=true_timestamp + ) + self.payload.add("e", "se") + self.category = category + self.action = action + self.label = label + self.property_ = property_ + self.value = value + + @property + def category(self) -> Optional[str]: + """ + Category of the event + """ + return self.payload.nv_pairs.get("se_ca") + + @category.setter + def category(self, value: str): + non_empty_string(value) + self.payload.add("se_ca", value) + + @property + def action(self) -> Optional[str]: + """ + The event itself + """ + return self.payload.nv_pairs.get("se_ac") + + @action.setter + def action(self, value: str): + non_empty_string(value) + self.payload.add("se_ac", value) + + @property + def label(self) -> Optional[str]: + """ + Refer to the object the action is performed on + """ + return self.payload.nv_pairs.get("se_la") + + @label.setter + def label(self, value: Optional[str]): + self.payload.add("se_la", value) + + @property + def property_(self) -> Optional[str]: + """ + Property associated with either the action or the object + """ + return self.payload.nv_pairs.get("se_pr") + + @property_.setter + def property_(self, value: Optional[str]): + self.payload.add("se_pr", value) + + @property + def value(self) -> Optional[Union[int, float]]: + """ + A value associated with the user action + """ + return self.payload.nv_pairs.get("se_va") + + @value.setter + def value(self, value: Optional[Union[int, float]]): + self.payload.add("se_va", value) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index 3504c499..18d1bf4d 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -1,139 +1,96 @@ -""" - payload.py +# """ +# payload.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" - - -import random -import time import json import base64 -from contracts import contract +from typing import Any, Optional +from snowplow_tracker.typing import PayloadDict, JsonEncoderFunction class Payload: - - def __init__(self, tstamp=None, dict_=None): + def __init__(self, dict_: Optional[PayloadDict] = None) -> None: """ - Constructor + Constructor """ - self.context = {} - # Set transaction for every event - self.context["tid"] = Payload.set_transaction_id() - # Set timestamp for every event - self.set_timestamp(tstamp) + self.nv_pairs = {} + if dict_ is not None: for f in dict_: - self.context[f] = dict_[f] + self.nv_pairs[f] = dict_[f] """ - Special payload creation functions + Methods to add to the payload """ - @staticmethod - def set_transaction_id(): - """ - Set transaction ID for the payload once during the lifetime of the - event. - """ - tid = random.randrange(100000, 999999) - return tid - def set_timestamp(self, tstamp=None): + def add(self, name: str, value: Any) -> None: """ - Set timestamp and allow rewriting it multiple times. - - :param tstamp: Timestamp value - """ - if tstamp is None: - tstamp = time.time() - if tstamp and isinstance(tstamp, (int, float)): - value = int(tstamp * 1000) - else: - value = tstamp - self.context["dtm"] = value - - """ - Payload creators - """ - - def add(self, name, value): - """ - Add a name value pair to the Payload object + Add a name value pair to the Payload object """ if not (value == "" or value is None): - self.context[name] = value + self.nv_pairs[name] = value - @contract - def add_dict(self, dict_, base64=False): + def add_dict(self, dict_: PayloadDict, base64: bool = False) -> None: """ - Add a dict of name value pairs to the Payload object + Add a dict of name value pairs to the Payload object - :param dict_: Dictionary to be added to the Payload - :type dict_: dict(*:*) + :param dict_: Dictionary to be added to the Payload + :type dict_: dict(string:\\*) """ for f in dict_: self.add(f, dict_[f]) - @contract - def add_unstruct(self, dict_, encode_base64, - type_when_encoded, type_when_not_encoded): + def add_json( + self, + dict_: Optional[PayloadDict], + encode_base64: bool, + type_when_encoded: str, + type_when_not_encoded: str, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: """ - Add an encoded or unencoded JSON to the payload after verifying - the contents of the dict - - :param dict_: Dictionary of the payload to be generated - :type dict_: dict(str:*) - :param encode_base64: If the payload is base64 encoded - :type encode_base64: bool - :param type_when_encoded: Name of the field when encode_base64 - is set - :type type_when_encoded: str - :param type_when_not_encoded: Name of the field when - encode_base64 is not set - :type type_when_not_encoded: str + Add an encoded or unencoded JSON to the payload + + :param dict_: Custom context for the event + :type dict_: dict(string:\\*) | None + :param encode_base64: If the payload is base64 encoded + :type encode_base64: bool + :param type_when_encoded: Name of the field when encode_base64 is set + :type type_when_encoded: string + :param type_when_not_encoded: Name of the field when encode_base64 is not set + :type type_when_not_encoded: string + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ - def raise_error(f, type_): - raise RuntimeError("".join([f, " in dict is not a ", type_])) - types = ["int", "flt", "geo", "dt", "ts", "tms"] + if dict_ is not None and dict_ != {}: - for f in dict_: - parts = f.split("$") - if parts[-1] in types: - type_ = parts[-1] - if ((type_ == "int" and not isinstance(dict_[f], int)) or - (type_ == "flt" and not isinstance(dict_[f], float)) or - (type_ == "geo" and not isinstance(dict_[f], tuple)) or - (type_ == "dt" and not isinstance(dict_[f], int)) or - (type_ == "ts" and not isinstance(dict_[f], int)) or - (type_ == "tms" and not isinstance(dict_[f], int))): - raise_error(f, type_) - json_dict = json.dumps(dict_) - - if encode_base64: - self.add(type_when_encoded, base64.urlsafe_b64encode(json_dict.encode("ascii"))) - else: - self.add(type_when_not_encoded, json_dict) - - def get(self): + json_dict = json.dumps(dict_, ensure_ascii=False, default=json_encoder) + + if encode_base64: + encoded_dict = base64.urlsafe_b64encode(json_dict.encode("utf-8")) + encoded_dict_str = encoded_dict.decode("utf-8") + self.add(type_when_encoded, encoded_dict_str) + + else: + self.add(type_when_not_encoded, json_dict) + + def get(self) -> PayloadDict: """ - Returns the context dictionary from the Payload object + Returns the context dictionary from the Payload object """ - return self.context + return self.nv_pairs diff --git a/snowplow_tracker/py.typed b/snowplow_tracker/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/snowplow_tracker/self_describing_json.py b/snowplow_tracker/self_describing_json.py new file mode 100644 index 00000000..8f7b65ea --- /dev/null +++ b/snowplow_tracker/self_describing_json.py @@ -0,0 +1,43 @@ +# """ +# self_describing_json.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import json +from typing import Union + +from snowplow_tracker.typing import PayloadDict, PayloadDictList +from snowplow_tracker.contracts import non_empty_string + + +class SelfDescribingJson(object): + def __init__(self, schema: str, data: Union[PayloadDict, PayloadDictList]) -> None: + self.schema = schema + self.data = data + + @property + def schema(self) -> str: + return self._schema + + @schema.setter + def schema(self, value: str): + non_empty_string(value) + self._schema = value + + def to_json(self) -> PayloadDict: + return {"schema": self.schema, "data": self.data} + + def to_string(self) -> str: + return json.dumps(self.to_json()) diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py new file mode 100644 index 00000000..daa1434b --- /dev/null +++ b/snowplow_tracker/snowplow.py @@ -0,0 +1,161 @@ +# """ +# snowplow.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import logging +from typing import Dict, Optional +from snowplow_tracker import ( + Tracker, + Emitter, + subject, + EmitterConfiguration, + TrackerConfiguration, +) +from snowplow_tracker.typing import Method + +# Logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +""" +Snowplow Class +""" + + +class Snowplow: + _trackers: Dict[str, Tracker] = {} + + @staticmethod + def create_tracker( + namespace: str, + endpoint: str, + method: Method = "post", + app_id: Optional[str] = None, + subject: Optional[subject.Subject] = None, + tracker_config: TrackerConfiguration = TrackerConfiguration(), + emitter_config: EmitterConfiguration = EmitterConfiguration(), + ) -> Tracker: + """ + Create a Snowplow tracker with a namespace and collector URL + + :param namespace: Name of the tracker + :type namespace: String + :param endpoint: The collector URL + :type endpoint: String + :param method: The HTTP request method. Defaults to post. + :type method: method + :param appId: Application ID + :type appId: String | None + :param subject: Subject to be tracked + :type subject: Subject | None + :param tracker_config: Tracker configuration + :type tracker_config: TrackerConfiguration + :param emitter_config: Emitter configuration + :type emitter_config: EmitterConfiguration + :rtype Tracker + """ + if endpoint is None: + raise TypeError("Emitter or Collector URL must be provided") + + emitter = Emitter( + endpoint=endpoint, + method=method, + batch_size=emitter_config.batch_size, + on_success=emitter_config.on_success, + on_failure=emitter_config.on_failure, + byte_limit=emitter_config.byte_limit, + request_timeout=emitter_config.request_timeout, + custom_retry_codes=emitter_config.custom_retry_codes, + event_store=emitter_config.event_store, + session=emitter_config.session, + ) + + tracker = Tracker( + namespace=namespace, + emitters=emitter, + app_id=app_id, + subject=subject, + encode_base64=tracker_config.encode_base64, + json_encoder=tracker_config.json_encoder, + ) + + return Snowplow.add_tracker(tracker) + + @classmethod + def add_tracker(cls, tracker: Tracker) -> Tracker: + """ + Add a Snowplow tracker to the Snowplow object + + :param tracker: Tracker object to add to Snowplow + :type tracker: Tracker + :rtype Tracker + """ + if not isinstance(tracker, Tracker): + logger.info("Tracker not provided.") + return None + + namespace = tracker.get_namespace() + + if namespace in cls._trackers.keys(): + raise TypeError("Tracker with this namespace already exists") + + cls._trackers[namespace] = tracker + logger.info("Tracker with namespace: '" + namespace + "' added to Snowplow") + return cls._trackers[namespace] + + @classmethod + def remove_tracker(cls, tracker: Tracker): + """ + Remove a Snowplow tracker from the Snowplow object if it exists + + :param tracker: Tracker object to remove from Snowplow + :type tracker: Tracker | None + """ + namespace = tracker.get_namespace() + cls.remove_tracker_by_namespace(namespace) + + @classmethod + def remove_tracker_by_namespace(cls, namespace: str): + """ + Remove a Snowplow tracker from the Snowplow object using it's namespace if it exists + + :param namespace: Tracker namespace to remove from Snowplow + :type tracker: String | None + """ + if not cls._trackers.pop(namespace, False): + logger.info("Tracker with namespace: '" + namespace + "' does not exist") + return + logger.info("Tracker with namespace: '" + namespace + "' removed from Snowplow") + + @classmethod + def reset(cls): + """ + Remove all active Snowplow trackers from the Snowplow object + """ + cls._trackers = {} + + @classmethod + def get_tracker(cls, namespace: str) -> Optional[Tracker]: + """ + Returns a Snowplow tracker from the Snowplow object if it exists + :param namespace: Snowplow tracker namespace + :type namespace: string + :rtype: Tracker + """ + if namespace in cls._trackers.keys(): + return cls._trackers[namespace] + return None diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py new file mode 100644 index 00000000..cbf29aa8 --- /dev/null +++ b/snowplow_tracker/subject.py @@ -0,0 +1,188 @@ +# """ +# subject.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Dict, Optional, Union +from snowplow_tracker.contracts import one_of, greater_than +from snowplow_tracker.typing import SupportedPlatform, SUPPORTED_PLATFORMS, PayloadDict + +DEFAULT_PLATFORM = "pc" + + +class Subject(object): + """ + Class for an event subject, where we view events as of the form + + (Subject) -> (Verb) -> (Object) + """ + + def __init__(self) -> None: + self.standard_nv_pairs: Dict[str, Union[str, int]] = {"p": DEFAULT_PLATFORM} + + def set_platform(self, value: SupportedPlatform) -> "Subject": + """ + :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] + :type value: supported_platform + :rtype: subject + """ + one_of(value, SUPPORTED_PLATFORMS) + + self.standard_nv_pairs["p"] = value + return self + + def set_user_id(self, user_id: str) -> "Subject": + """ + :param user_id: User ID + :type user_id: string + :rtype: subject + """ + self.standard_nv_pairs["uid"] = user_id + return self + + def set_screen_resolution(self, width: int, height: int) -> "Subject": + """ + :param width: Width of the screen + :param height: Height of the screen + :type width: int,>0 + :type height: int,>0 + :rtype: subject + """ + greater_than(width, 0) + greater_than(height, 0) + + self.standard_nv_pairs["res"] = "".join([str(width), "x", str(height)]) + return self + + def set_viewport(self, width: int, height: int) -> "Subject": + """ + :param width: Width of the viewport + :param height: Height of the viewport + :type width: int,>0 + :type height: int,>0 + :rtype: subject + """ + greater_than(width, 0) + greater_than(height, 0) + + self.standard_nv_pairs["vp"] = "".join([str(width), "x", str(height)]) + return self + + def set_color_depth(self, depth: int) -> "Subject": + """ + :param depth: Depth of the color on the screen + :type depth: int + :rtype: subject + """ + self.standard_nv_pairs["cd"] = depth + return self + + def set_timezone(self, timezone: str) -> "Subject": + """ + :param timezone: Timezone as a string + :type timezone: string + :rtype: subject + """ + self.standard_nv_pairs["tz"] = timezone + return self + + def set_lang(self, lang: str) -> "Subject": + """ + Set language. + + :param lang: Language the application is set to + :type lang: string + :rtype: subject + """ + self.standard_nv_pairs["lang"] = lang + return self + + def set_domain_user_id(self, duid: str) -> "Subject": + """ + Set the domain user ID + + :param duid: Domain user ID + :type duid: string + :rtype: subject + """ + self.standard_nv_pairs["duid"] = duid + return self + + def set_domain_session_id(self, sid: str) -> "Subject": + """ + Set the domain session ID + :param sid: Domain session ID + :type sid: string + :rtype: subject + """ + self.standard_nv_pairs["sid"] = sid + return self + + def set_domain_session_index(self, vid: int) -> "Subject": + """ + Set the domain session Index + :param vid: Domain session Index + :type vid: int + :rtype: subject + """ + self.standard_nv_pairs["vid"] = vid + return self + + def set_ip_address(self, ip: str) -> "Subject": + """ + Set the domain user ID + + :param ip: IP address + :type ip: string + :rtype: subject + """ + self.standard_nv_pairs["ip"] = ip + return self + + def set_useragent(self, ua: str) -> "Subject": + """ + Set the user agent + + :param ua: User agent + :type ua: string + :rtype: subject + """ + self.standard_nv_pairs["ua"] = ua + return self + + def set_network_user_id(self, nuid: str) -> "Subject": + """ + Set the network user ID field + This overwrites the nuid field set by the collector + + :param nuid: Network user ID + :type nuid: string + :rtype: subject + """ + self.standard_nv_pairs["tnuid"] = nuid + return self + + def combine_subject(self, subject: Optional["Subject"]) -> PayloadDict: + """ + Merges another instance of Subject, with self taking priority + :param subject Subject to update + :type subject subject + :rtype PayloadDict + + """ + if subject is not None: + return {**subject.standard_nv_pairs, **self.standard_nv_pairs} + + return self.standard_nv_pairs diff --git a/snowplow_tracker/test/CLAUDE.md b/snowplow_tracker/test/CLAUDE.md new file mode 100644 index 00000000..08d0b042 --- /dev/null +++ b/snowplow_tracker/test/CLAUDE.md @@ -0,0 +1,365 @@ +# Snowplow Python Tracker Tests - CLAUDE.md + +## Directory Overview + +The `test/` directory contains comprehensive test suites for the Snowplow Python Tracker. Tests are organized into unit tests (isolated component testing) and integration tests (end-to-end collector communication). The test suite uses pytest and unittest.mock for mocking, with freezegun for time-based testing. + +## Test Organization + +``` +test/ +├── unit/ # Isolated component tests +│ ├── test_tracker.py # Tracker class tests +│ ├── test_emitters.py # Emitter functionality +│ ├── test_event.py # Base event class +│ ├── test_payload.py # Payload construction +│ ├── test_contracts.py # Validation logic +│ └── test_*.py # Other component tests +└── integration/ # End-to-end tests + └── test_integration.py # Collector communication +``` + +## Core Testing Patterns + +### Mock Pattern for Emitters +```python +# ✅ Mock emitter for isolated tracker testing +@mock.patch('snowplow_tracker.emitters.Emitter') +def test_tracker_tracks_event(mock_emitter): + tracker = Tracker("test", mock_emitter) + tracker.track(PageView(page_url="test.com")) + mock_emitter.input.assert_called_once() + +# ❌ Don't test with real network calls in unit tests +def test_tracker(): + emitter = Emitter("https://real-collector.com") +``` + +### Contract Testing Pattern +```python +# ✅ Use ContractsDisabled context manager +class ContractsDisabled: + def __enter__(self): + disable_contracts() + def __exit__(self, type, value, traceback): + enable_contracts() + +with ContractsDisabled(): + # Test invalid inputs without raising + tracker.track_page_view(None) + +# ❌ Don't disable contracts globally +disable_contracts() +# ... rest of test file +``` + +### Time-Based Testing Pattern +```python +# ✅ Use freezegun for deterministic timestamps +from freezegun import freeze_time + +@freeze_time("2024-01-01 12:00:00") +def test_event_timestamp(): + event = PageView(page_url="test.com") + # Timestamp will be consistent + +# ❌ Don't use actual system time +import time +timestamp = time.time() # Non-deterministic +``` + +### UUID Mocking Pattern +```python +# ✅ Mock UUID generation for predictable IDs +@mock.patch('snowplow_tracker.tracker.Tracker.get_uuid') +def test_event_id(mock_uuid): + mock_uuid.return_value = "test-uuid-123" + tracker.track(event) + assert payload["eid"] == "test-uuid-123" + +# ❌ Don't rely on random UUIDs +event_id = tracker.get_uuid() # Different each run +``` + +## Unit Test Patterns + +### Payload Testing +```python +# ✅ Test payload field presence and values +def test_payload_construction(): + payload = Payload() + payload.add("e", "pv") + payload.add("url", "https://test.com") + + result = payload.get() + assert result["e"] == "pv" + assert result["url"] == "https://test.com" + +# ✅ Test JSON encoding +def test_payload_json_encoding(): + payload.add_json({"key": "value"}, True, "cx", "co") + assert "cx" in payload.get() # Base64 encoded +``` + +### Event Testing +```python +# ✅ Test event construction with all parameters +def test_page_view_complete(): + context = SelfDescribingJson(schema, data) + subject = Subject() + + event = PageView( + page_url="https://test.com", + page_title="Test", + context=[context], + event_subject=subject, + true_timestamp=1234567890 + ) + + assert event.page_url == "https://test.com" + assert len(event.context) == 1 + +# ❌ Don't test internal implementation details +def test_private_methods(): + event._internal_method() # Testing private methods +``` + +### Emitter Testing +```python +# ✅ Mock HTTP requests for emitter tests +@mock.patch('requests.post') +def test_emitter_sends_events(mock_post): + mock_post.return_value.status_code = 200 + + emitter = Emitter("https://collector.test") + emitter.input({"e": "pv"}) + emitter.flush() + + mock_post.assert_called_once() + +# ✅ Test retry logic +def test_emitter_retry_on_failure(mock_post): + mock_post.return_value.status_code = 500 + emitter.custom_retry_codes = {500: True} + # Verify retry behavior +``` + +### Contract Validation Testing +```python +# ✅ Test validation rules +def test_non_empty_string_validation(): + with self.assertRaises(ValueError): + non_empty_string("") + + non_empty_string("valid") # Should not raise + +# ✅ Test form element validation +def test_form_element_contract(): + valid_element = { + "name": "field1", + "value": "test", + "nodeName": "INPUT", + "type": "text" + } + form_element(valid_element) # Should not raise +``` + +## Integration Test Patterns + +### Mock Collector Pattern +```python +# ✅ Use micro mock collector for integration tests +from http.server import HTTPServer, BaseHTTPRequestHandler + +class MockCollector(BaseHTTPRequestHandler): + def do_POST(self): + # Capture and validate payload + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + # Store for assertions + self.send_response(200) + +# Start mock collector in test +server = HTTPServer(('localhost', 9090), MockCollector) +``` + +### End-to-End Testing +```python +# ✅ Test complete tracking flow +def test_end_to_end_tracking(): + tracker = Snowplow.create_tracker( + namespace="test", + endpoint="http://localhost:9090" + ) + + # Track multiple events + tracker.track(PageView(page_url="test1.com")) + tracker.track(StructuredEvent("cat", "act")) + tracker.flush() + + # Verify collector received both events + assert len(received_events) == 2 +``` + +## Testing Best Practices + +### Test Isolation +```python +# ✅ Clean up after each test +def setUp(self): + Snowplow.reset() # Clear all trackers + +def tearDown(self): + # Clean up any test artifacts + if hasattr(self, 'server'): + self.server.shutdown() + +# ❌ Don't leave state between tests +class TestSuite: + shared_tracker = Tracker(...) # Shared state! +``` + +### Assertion Patterns +```python +# ✅ Use specific assertions +assert event.page_url == "https://expected.com" +assert "e" in payload.get() +mock_func.assert_called_with(expected_arg) + +# ❌ Avoid generic assertions +assert event # Too vague +assert payload.get() # What are we checking? +``` + +### Mock Management +```python +# ✅ Use patch decorators or context managers +@mock.patch('snowplow_tracker.tracker.uuid.uuid4') +def test_with_mock(mock_uuid): + mock_uuid.return_value = "test-id" + +# ✅ Clean up patches +def create_patch(self, name): + patcher = mock.patch(name) + thing = patcher.start() + self.addCleanup(patcher.stop) + return thing +``` + +## Common Test Scenarios + +### Testing Event Contexts +```python +# ✅ Test context encoding and attachment +def test_event_with_multiple_contexts(): + contexts = [ + SelfDescribingJson(schema1, data1), + SelfDescribingJson(schema2, data2) + ] + event = PageView(page_url="test", context=contexts) + + payload = event.build_payload(True, None, None) + cx_data = json.loads(base64.b64decode(payload.get()["cx"])) + assert len(cx_data["data"]) == 2 +``` + +### Testing Failure Scenarios +```python +# ✅ Test failure callbacks +def test_emitter_failure_callback(): + failed_events = [] + + def on_failure(count, events): + failed_events.extend(events) + + emitter = Emitter( + "https://invalid.collector", + on_failure=on_failure + ) + # Trigger failure and verify callback +``` + +### Testing Async Behavior +```python +# ✅ Test async emitter threading +def test_async_emitter(): + emitter = AsyncEmitter("https://collector.test") + + # Track events + for i in range(100): + emitter.input({"e": "pv", "url": f"test{i}.com"}) + + # Wait for flush + emitter.flush() + time.sleep(1) # Allow async processing + + # Verify all events sent +``` + +## Test Utilities + +### Helper Functions +```python +# ✅ Create reusable test helpers +def create_test_tracker(namespace="test"): + emitter = mock.MagicMock() + return Tracker(namespace, emitter) + +def create_test_event(): + return PageView(page_url="https://test.com") + +# ❌ Don't duplicate test setup +def test_one(): + emitter = mock.MagicMock() + tracker = Tracker("test", emitter) + # ... repeated in every test +``` + +## Performance Testing + +### Load Testing Pattern +```python +# ✅ Test tracker under load +def test_high_volume_tracking(): + tracker = create_test_tracker() + + start = time.time() + for i in range(10000): + tracker.track(PageView(page_url=f"test{i}.com")) + + duration = time.time() - start + assert duration < 5.0 # Performance threshold +``` + +## Quick Reference + +### Test File Naming +- Unit tests: `test_.py` +- Integration tests: `test_integration_.py` +- Test classes: `Test` +- Test methods: `test_` + +### Essential Test Imports +```python +import unittest +import unittest.mock as mock +from freezegun import freeze_time +from snowplow_tracker.contracts import ContractsDisabled +``` + +### Common Mock Targets +- `snowplow_tracker.tracker.Tracker.get_uuid` +- `requests.post` / `requests.get` +- `time.time` +- `snowplow_tracker.emitters.Emitter.sync_flush` + +## Contributing to test/CLAUDE.md + +When adding or modifying tests: + +1. **Maintain test isolation** - Each test should be independent +2. **Mock external dependencies** - No real network calls in unit tests +3. **Use descriptive test names** - Clear what is being tested +4. **Test both success and failure paths** - Include edge cases +5. **Keep tests fast** - Mock time-consuming operations +6. **Document complex test scenarios** - Add comments for clarity \ No newline at end of file diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 1428fb4e..57b1a58c 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -1,143 +1,577 @@ -""" - test_integration.py +# """ +# test_integration.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ import unittest -import time import re -from snowplow_tracker import tracker, _version +import json +import base64 +from urllib.parse import unquote_plus +import pytest from httmock import all_requests, HTTMock +from freezegun import freeze_time +from typing import Any, Dict, Optional + +from snowplow_tracker import tracker, _version, emitters, subject +from snowplow_tracker.self_describing_json import SelfDescribingJson + + +querystrings = [""] + +default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=1) + +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method="get") + +default_subject = subject.Subject() -def from_querystring(field, url): + +def from_querystring(field: str, url: str) -> Optional[str]: pattern = re.compile("^[^#]*[?&]" + field + "=([^&#]*)") match = pattern.match(url) if match: return match.groups()[0] + +@all_requests +def pass_response_content(url: str, request: Any) -> Dict[str, Any]: + querystrings.append(request.url) + return {"url": request.url, "status_code": 200} + + @all_requests -def pass_response_content(url, request): - return { - "url": request.url, - "status_code": 200 - } +def pass_post_response_content(url: str, request: Any) -> Dict[str, Any]: + querystrings.append(json.loads(request.body)) + return {"url": request.url, "status_code": 200} + @all_requests -def fail_response_content(url, request): - return "HTTP status code [501] is a server error" +def fail_response_content(url: str, request: Any) -> Dict[str, Any]: + return {"url": request.url, "status_code": 501} class IntegrationTest(unittest.TestCase): + def test_integration_page_view(self) -> None: + t = tracker.Tracker("namespace", [get_emitter], default_subject) + with HTTMock(pass_response_content): + t.track_page_view( + "http://savethearctic.org", "Save The Arctic", "http://referrer.com" + ) + expected_fields = { + "e": "pv", + "page": "Save+The+Arctic", + "url": "http%3A%2F%2Fsavethearctic.org", + "refr": "http%3A%2F%2Freferrer.com", + } + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) - def test_integration_page_view(self): - t = tracker.Tracker("localhost") + def test_integration_ecommerce_transaction_item(self) -> None: + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - val = t.track_page_view("http://savethearctic.org", "Save The Arctic", None) - self.assertEquals(from_querystring("page", val), "Save+The+Arctic") + t.track_ecommerce_transaction_item( + "12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP" + ) + expected_fields = { + "ti_ca": "tarot", + "ti_id": "12345", + "ti_qu": "2", + "ti_sk": "pbz0025", + "e": "ti", + "ti_nm": "black-tarot", + "ti_pr": "7.99", + "ti_cu": "GBP", + } + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) - def test_integration_ecommerce_transaction(self): - t = tracker.Tracker("localhost") + def test_integration_ecommerce_transaction(self) -> None: + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - val = t.track_ecommerce_transaction("12345", 9.99, "Web", 1.98, 3.05, "London", "Denver", "Greenland") - assertion_array = {"tr_tt": "9.99", "e": "tr", "tr_id": "12345", "tr_sh": "3.05", "tr_st": "Denver", "tr_af": "Web", "tr_co": "Greenland", "tr_tx": "1.98", "tr_ci": "London"} - for key in assertion_array: - self.assertEquals(from_querystring(key, val), assertion_array[key]) + t.track_ecommerce_transaction( + order_id="6a8078be", + total_value=35, + city="London", + currency="GBP", + items=[ + {"sku": "pbz0026", "price": 20, "quantity": 1}, + {"sku": "pbz0038", "price": 15, "quantity": 1}, + ], + tstamp=1399021242240, + ) + + expected_fields = { + "e": "tr", + "tr_id": "6a8078be", + "tr_tt": "35", + "tr_ci": "London", + "tr_cu": "GBP", + } + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-3]), expected_fields[key] + ) + + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0026", + "ti_pr": "20", + "ti_cu": "GBP", + } + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-2]), expected_fields[key] + ) - def test_integration_ecommerce_transaction_item(self): - t = tracker.Tracker("localhost") + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0038", + "ti_pr": "15", + "ti_cu": "GBP", + } + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + self.assertEqual( + from_querystring("ttm", querystrings[-3]), + from_querystring("ttm", querystrings[-2]), + ) + + def test_integration_mobile_screen_view(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - val = t.track_ecommerce_transaction_item("12345", "pbz0025", 7.99, 2, "black-tarot", "tarot") - assertion_array = {"ti_ca": "tarot", "ti_id": "12345", "ti_qu": "2", "ti_sk": "pbz0025", "e": "ti", "ti_nm": "black-tarot", "ti_pr": "7.99"} - for key in assertion_array: - self.assertEquals(from_querystring(key, val), assertion_array[key]) + t.track_mobile_screen_view(id_="534", name="Game HUD 2") + expected_fields = {"e": "ue"} + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + envelope_string = from_querystring("ue_pr", querystrings[-1]) + envelope = json.loads(unquote_plus(envelope_string)) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", + "data": {"id": "534", "name": "Game HUD 2"}, + }, + }, + ) - def test_integration_screen_view(self): - t = tracker.Tracker("localhost") + def test_integration_struct_event(self) -> None: + t = tracker.Tracker("namespace", [get_emitter], default_subject) with HTTMock(pass_response_content): - val = t.track_screen_view("Game HUD 2", "Hello!") - assertion_array = {"e": "ue", "ue_na": "screen_view"} - for key in assertion_array: - self.assertEquals(from_querystring(key, val), assertion_array[key]) + t.track_struct_event( + "Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99 + ) + expected_fields = { + "se_ca": "Ecomm", + "se_pr": "hd", + "se_la": "dog-skateboarding-video", + "se_va": "13.99", + "se_ac": "add-to-basket", + "e": "se", + } + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) - def test_integration_struct_event(self): - t = tracker.Tracker("localhost") + def test_integration_self_describing_event_non_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - val = t.track_struct_event("Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99) - assertion_array = {"se_ca": "Ecomm", "se_pr": "hd", "se_la": "dog-skateboarding-video", "se_va": "13.99", "se_ac": "add-to-basket", "e": "se"} - for key in assertion_array: - self.assertEquals(from_querystring(key, val), assertion_array[key]) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) + expected_fields = {"e": "ue"} + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + envelope_string = from_querystring("ue_pr", querystrings[-1]) + envelope = json.loads(unquote_plus(envelope_string)) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) + def test_integration_self_describing_event_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=True + ) + with HTTMock(pass_response_content): + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) + expected_fields = {"e": "ue"} + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + envelope_string = unquote_plus(from_querystring("ue_px", querystrings[-1])) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) - def test_integration_unstruct_event_non_base64(self): - t = tracker.Tracker("localhost") - t.config["encode_base64"] = False + def test_integration_context_non_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) with HTTMock(pass_response_content): - val = t.track_unstruct_event("viewed_product", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": int(time.time() * 1000)}) - assertion_array = {"e": "ue", "ue_na": "viewed_product"} - for key in assertion_array: - self.assertEquals(from_querystring(key, val), assertion_array[key]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) + envelope_string = from_querystring("co", querystrings[-1]) + envelope = json.loads(unquote_plus(envelope_string)) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) - def test_integration_unstruct_event_base64(self): - t = tracker.Tracker("localhost") + def test_integration_context_base64(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=True + ) with HTTMock(pass_response_content): - val = t.track_unstruct_event("viewed_product", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": int(time.time() * 1000)}) - assertion_array = {"e": "ue", "ue_na": "viewed_product"} - for key in assertion_array: - self.assertEquals(from_querystring(key, val), assertion_array[key]) - - def test_integration_unstruct_event_non_base64_error(self): - t = tracker.Tracker("localhost") - t.config["encode_base64"] = False - try: - val = t.track_unstruct_event("viewed_product", - { - "product_id": "ASO01043", - "price$flt": 49, # ERROR - "walrus$tms": int(time.time() * 1000), - }) - except RuntimeError as e: - self.assertEquals("price$flt in dict is not a flt", str(e)) - - - def test_integration_unstruct_event_base64_error(self): - t = tracker.Tracker("localhost") - try: - val = t.track_unstruct_event("viewed_product", - { - "product_id": "ASO01043", - "price$flt": 49.95, - "walrus$tms": "hello", # ERROR - }) - except RuntimeError as e: - self.assertEquals("walrus$tms in dict is not a tms", str(e)) - - def test_integration_standard_nv_pairs(self): - t = tracker.Tracker("localhost", "cf") - t.set_platform("mob") - t.set_user_id("user12345") - t.set_app_id("angry-birds-android") - t.set_screen_resolution(100, 200) - t.set_color_depth(24) - t.set_timezone("Europe London") - t.set_lang("en") + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) + envelope_string = unquote_plus(from_querystring("cx", querystrings[-1])) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) + + def test_integration_standard_nv_pairs(self) -> None: + s = subject.Subject() + s.set_platform("mob") + s.set_user_id("user12345") + s.set_screen_resolution(100, 200) + s.set_color_depth(24) + s.set_timezone("Europe London") + s.set_lang("en") + + t = tracker.Tracker( + "cf", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): - val = t.track_page_view("localhost", "local host", None) - assertion_array = {"tna": "cf", "evn": "com.snowplowanalytics", "res": "100x200", "lang": "en", "aid": "angry-birds-android", "cd": "24", "tz": "Europe+London", "p": "mob", "tv": "py-" + _version.__version__} - for key in assertion_array: - self.assertEquals(from_querystring(key, val), assertion_array[key]) + t.track_page_view("localhost", "local host") + expected_fields = { + "tna": "cf", + "res": "100x200", + "lang": "en", + "aid": "angry-birds-android", + "cd": "24", + "tz": "Europe+London", + "p": "mob", + "tv": "py-" + _version.__version__, + } + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + self.assertIsNotNone(from_querystring("eid", querystrings[-1])) + self.assertIsNotNone(from_querystring("dtm", querystrings[-1])) + + def test_integration_identification_methods(self) -> None: + s = subject.Subject() + s.set_domain_user_id("4616bfb38f872d16") + s.set_domain_session_id("59ed13b1a5724dae") + s.set_domain_session_index(1) + s.set_ip_address("255.255.255.255") + s.set_useragent( + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)" + ) + s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") + + t = tracker.Tracker( + "cf", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) + with HTTMock(pass_response_content): + t.track_page_view("localhost", "local host") + expected_fields = { + "duid": "4616bfb38f872d16", + "sid": "59ed13b1a5724dae", + "vid": "1", + "ip": "255.255.255.255", + "ua": "Mozilla%2F5.0+%28compatible%3B+MSIE+9.0%3B+Windows+NT+6.0%3B+Trident%2F5.0%29", + "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5", + } + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + def test_integration_event_subject(self) -> None: + s = subject.Subject() + s.set_domain_user_id("4616bfb38f872d16") + s.set_lang("ES") + + t = tracker.Tracker( + "namespace", + [emitters.Emitter("localhost", method="get")], + s, + app_id="angry-birds-android", + ) + evSubject = ( + subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") + ) + with HTTMock(pass_response_content): + t.track_page_view("localhost", "local host", event_subject=evSubject) + expected_fields = {"duid": "1111aaa11a111a11", "lang": "EN"} + for key in expected_fields: + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) + + def test_integration_success_callback(self) -> None: + callback_success_queue = [] + callback_failure_queue = [] + callback_emitter = emitters.Emitter( + "localhost", + method="get", + on_success=lambda x: callback_success_queue.append(x), + on_failure=lambda x, y: callback_failure_queue.append(x), + ) + t = tracker.Tracker("namespace", [callback_emitter], default_subject) + with HTTMock(pass_response_content): + t.track_page_view("http://www.example.com") + expected = { + "e": "pv", + "url": "http://www.example.com", + } + self.assertEqual(len(callback_success_queue), 1) + for k in expected.keys(): + self.assertEqual(callback_success_queue[0][0][k], expected[k]) + self.assertEqual(callback_failure_queue, []) + + def test_integration_failure_callback(self) -> None: + callback_success_queue = [] + callback_failure_queue = [] + callback_emitter = emitters.Emitter( + "localhost", + method="get", + on_success=lambda x: callback_success_queue.append(x), + on_failure=lambda x, y: callback_failure_queue.append(x), + ) + t = tracker.Tracker("namespace", [callback_emitter], default_subject) + with HTTMock(fail_response_content): + t.track_page_view("http://www.example.com") + self.assertEqual(callback_success_queue, []) + self.assertEqual(callback_failure_queue[0], 0) + + def test_post_page_view(self) -> None: + t = tracker.Tracker("namespace", [default_emitter], default_subject) + with HTTMock(pass_post_response_content): + t.track_page_view("localhost", "local host", None) + expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} + request = querystrings[-1] + self.assertEqual( + request["schema"], + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4", + ) + for key in expected_fields: + self.assertEqual(request["data"][0][key], expected_fields[key]) + + def test_post_batched(self) -> None: + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=2 + ) + t = tracker.Tracker("namespace", default_emitter, default_subject) + with HTTMock(pass_post_response_content): + t.track_struct_event("Test", "A") + t.track_struct_event("Test", "B") + self.assertEqual(querystrings[-1]["data"][0]["se_ac"], "A") + self.assertEqual(querystrings[-1]["data"][1]["se_ac"], "B") + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + def test_timestamps(self) -> None: + emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=3) + t = tracker.Tracker("namespace", [emitter], default_subject) + with HTTMock(pass_post_response_content): + t.track_page_view("localhost", "stamp0", None, tstamp=None) + t.track_page_view("localhost", "stamp1", None, tstamp=1358933694000) + t.track_page_view("localhost", "stamp2", None, tstamp=1358933694000.00) + + expected_timestamps = [ + {"dtm": "1618790401000", "ttm": None, "stm": "1618790401000"}, + {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, + {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, + ] + request = querystrings[-1] + + for i, event in enumerate(expected_timestamps): + self.assertEqual( + request["data"][i].get("dtm"), expected_timestamps[i]["dtm"] + ) + self.assertEqual( + request["data"][i].get("ttm"), expected_timestamps[i]["ttm"] + ) + self.assertEqual( + request["data"][i].get("stm"), expected_timestamps[i]["stm"] + ) + + def test_bytelimit(self) -> None: + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=5, byte_limit=459 + ) + t = tracker.Tracker("namespace", default_emitter, default_subject) + with HTTMock(pass_post_response_content): + t.track_struct_event("Test", "A") # 153 bytes + t.track_struct_event("Test", "A") # 306 bytes + t.track_struct_event("Test", "A") # 459 bytes. Send + t.track_struct_event("Test", "AA") # 154 + + self.assertEqual(len(querystrings[-1]["data"]), 3) + self.assertEqual(default_emitter.bytes_queued, 156 + len(_version.__version__)) + + def test_unicode_get(self) -> None: + t = tracker.Tracker( + "namespace", [get_emitter], default_subject, encode_base64=False + ) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) + with HTTMock(pass_response_content): + t.track_page_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) + + url_string = unquote_plus(from_querystring("url", querystrings[-2])) + try: + self.assertEqual(url_string.decode("utf-8"), unicode_b) + except AttributeError: + # in python 3: str type contains unicode (so no 'decode') + self.assertEqual(url_string, unicode_b) + + context_string = unquote_plus(from_querystring("co", querystrings[-1])) + actual_a = json.loads(context_string)["data"][0]["data"]["test"] + self.assertEqual(actual_a, unicode_a) + + uepr_string = unquote_plus(from_querystring("ue_pr", querystrings[-1])) + actual_b = json.loads(uepr_string)["data"]["data"]["name"] + self.assertEqual(actual_b, unicode_b) + + def test_unicode_post(self) -> None: + t = tracker.Tracker( + "namespace", [default_emitter], default_subject, encode_base64=False + ) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) + with HTTMock(pass_post_response_content): + t.track_page_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) + + pv_event = querystrings[-2] + self.assertEqual(pv_event["data"][0]["url"], unicode_b) + + in_test_ctx = json.loads(pv_event["data"][0]["co"])["data"][0]["data"]["test"] + self.assertEqual(in_test_ctx, unicode_a) + + sv_event = querystrings[-1] + in_uepr_name = json.loads(sv_event["data"][0]["ue_pr"])["data"]["data"]["name"] + self.assertEqual(in_uepr_name, unicode_b) diff --git a/snowplow_tracker/test/unit/__init__.py b/snowplow_tracker/test/unit/__init__.py index 8b137891..e69de29b 100644 --- a/snowplow_tracker/test/unit/__init__.py +++ b/snowplow_tracker/test/unit/__init__.py @@ -1 +0,0 @@ - diff --git a/snowplow_tracker/test/unit/test_contracts.py b/snowplow_tracker/test/unit/test_contracts.py new file mode 100644 index 00000000..9a913bca --- /dev/null +++ b/snowplow_tracker/test/unit/test_contracts.py @@ -0,0 +1,110 @@ +# """ +# test_tracker.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import unittest + +from snowplow_tracker.contracts import ( + form_element, + greater_than, + non_empty, + non_empty_string, + one_of, + satisfies, +) + + +class TestContracts(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_greater_than_succeeds(self) -> None: + greater_than(10, 0) + + def test_greater_than_fails(self) -> None: + with self.assertRaises(ValueError): + greater_than(0, 10) + + def test_non_empty_succeeds(self) -> None: + non_empty(["something"]) + + def test_non_empty_fails(self) -> None: + with self.assertRaises(ValueError): + non_empty([]) + + def test_non_empty_string_succeeds(self) -> None: + non_empty_string("something") + + def test_non_empty_string_fails(self) -> None: + with self.assertRaises(ValueError): + non_empty_string("") + + def test_one_of_succeeds(self) -> None: + one_of("something", ["something", "something else"]) + + def test_one_of_fails(self) -> None: + with self.assertRaises(ValueError): + one_of("something", ["something else"]) + + def test_satisfies_succeeds(self) -> None: + satisfies(10, lambda v: v == 10) + + def test_satisfies_fails(self) -> None: + with self.assertRaises(ValueError): + satisfies(0, lambda v: v == 10) + + def test_form_element_no_type(self) -> None: + elem = {"name": "elemName", "value": "elemValue", "nodeName": "INPUT"} + form_element(elem) + + def test_form_element_type_valid(self) -> None: + elem = { + "name": "elemName", + "value": "elemValue", + "nodeName": "TEXTAREA", + "type": "button", + } + form_element(elem) + + def test_form_element_type_invalid(self) -> None: + elem = { + "name": "elemName", + "value": "elemValue", + "nodeName": "SELECT", + "type": "invalid", + } + with self.assertRaises(ValueError): + form_element(elem) + + def test_form_element_nodename_invalid(self) -> None: + elem = {"name": "elemName", "value": "elemValue", "nodeName": "invalid"} + with self.assertRaises(ValueError): + form_element(elem) + + def test_form_element_no_nodename(self) -> None: + elem = {"name": "elemName", "value": "elemValue"} + with self.assertRaises(ValueError): + form_element(elem) + + def test_form_element_no_value(self) -> None: + elem = {"name": "elemName", "nodeName": "INPUT"} + with self.assertRaises(ValueError): + form_element(elem) + + def test_form_element_no_name(self) -> None: + elem = {"value": "elemValue", "nodeName": "INPUT"} + with self.assertRaises(ValueError): + form_element(elem) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py new file mode 100644 index 00000000..f02be943 --- /dev/null +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -0,0 +1,585 @@ +# """ +# test_emitters.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import time +import unittest +import unittest.mock as mock +from freezegun import freeze_time +from typing import Any +from requests import ConnectTimeout + +from snowplow_tracker.emitters import Emitter, AsyncEmitter, DEFAULT_MAX_LENGTH + + +# helpers +def mocked_flush(*args: Any) -> None: + pass + + +def mocked_send_events(*args: Any) -> None: + pass + + +def mocked_http_success(*args: Any) -> bool: + return True + + +def mocked_http_failure(*args: Any) -> bool: + return False + + +def mocked_http_response_success(*args: Any) -> int: + return 200 + + +def mocked_http_response_failure(*args: Any) -> int: + return 400 + + +def mocked_http_response_failure_retry(*args: Any) -> int: + return 500 + + +class TestEmitters(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self) -> None: + e = Emitter("0.0.0.0") + self.assertEqual( + e.endpoint, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2" + ) + self.assertEqual(e.method, "post") + self.assertEqual(e.batch_size, 10) + self.assertEqual(e.event_store.event_buffer, []) + self.assertIsNone(e.byte_limit) + self.assertIsNone(e.bytes_queued) + self.assertIsNone(e.on_success) + self.assertIsNone(e.on_failure) + self.assertFalse(e.timer.is_active()) + self.assertIsNone(e.request_timeout) + + def test_init_batch_size(self) -> None: + e = Emitter("0.0.0.0", batch_size=10) + self.assertEqual(e.batch_size, 10) + + def test_init_post(self) -> None: + e = Emitter("0.0.0.0") + self.assertEqual(e.batch_size, DEFAULT_MAX_LENGTH) + + def test_init_byte_limit(self) -> None: + e = Emitter("0.0.0.0", byte_limit=512) + self.assertEqual(e.bytes_queued, 0) + + def test_init_requests_timeout(self) -> None: + e = Emitter("0.0.0.0", request_timeout=(2.5, 5)) + self.assertEqual(e.request_timeout, (2.5, 5)) + + def test_as_collector_uri(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + def test_as_collector_uri_get(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0", method="get") + self.assertEqual(uri, "https://0.0.0.0/i") + + def test_as_collector_uri_port(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0", port=9090) + self.assertEqual(uri, "https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2") + + def test_as_collector_uri_http(self) -> None: + uri = Emitter.as_collector_uri("0.0.0.0", protocol="http") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + def test_as_collector_uri_empty_string(self) -> None: + with self.assertRaises(ValueError): + Emitter.as_collector_uri("") + + def test_as_collector_uri_endpoint_protocol(self) -> None: + uri = Emitter.as_collector_uri("https://0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + def test_as_collector_uri_endpoint_protocol_http(self) -> None: + uri = Emitter.as_collector_uri("http://0.0.0.0") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_no_flush(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", method="get", batch_size=2) + nvPairs = {"n0": "v0", "n1": "v1"} + e.input(nvPairs) + + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) + self.assertIsNone(e.byte_limit) + self.assertFalse(e.reached_limit()) + mok_flush.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_flush_byte_limit(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=16) + nvPairs = {"n0": "v0", "n1": "v1"} + e.input(nvPairs) + + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) + self.assertTrue(e.reached_limit()) + self.assertEqual(mok_flush.call_count, 1) + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_flush_buffer(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) + nvPairs = {"n0": "v0", "n1": "v1"} + e.input(nvPairs) + + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertFalse(e.reached_limit()) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) + + nextPairs = {"n0": "v0"} + e.input(nextPairs) + # since we mock flush, the buffer is not empty + self.assertEqual(e.event_store.event_buffer, [nvPairs, nextPairs]) + self.assertTrue(e.reached_limit()) + self.assertEqual(mok_flush.call_count, 1) + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_bytes_queued(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) + nvPairs = {"n0": "v0", "n1": "v1"} + e.input(nvPairs) + + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertEqual(e.bytes_queued, 24) + + e.input(nvPairs) + self.assertEqual(e.bytes_queued, 48) + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_input_bytes_post(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0") + nvPairs = {"testString": "test", "testNum": 2.72} + e.input(nvPairs) + + self.assertEqual( + e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}] + ) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_flush(self, mok_send_events: Any) -> None: + mok_send_events.side_effect = mocked_http_response_success + + e = Emitter("0.0.0.0", batch_size=2, byte_limit=None) + nvPairs = {"n": "v"} + e.input(nvPairs) + e.input(nvPairs) + + self.assertEqual(mok_send_events.call_count, 1) + self.assertEqual(len(e.event_store.event_buffer), 0) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_flush_bytes_queued(self, mok_send_events: Any) -> None: + mok_send_events.side_effect = mocked_http_response_success + + e = Emitter("0.0.0.0", batch_size=2, byte_limit=256) + nvPairs = {"n": "v"} + e.input(nvPairs) + e.input(nvPairs) + + self.assertEqual(mok_send_events.call_count, 1) + self.assertEqual(len(e.event_store.event_buffer), 0) + self.assertEqual(e.bytes_queued, 0) + + @freeze_time("2021-04-14 00:00:02") # unix: 1618358402000 + def test_attach_sent_tstamp(self) -> None: + e = Emitter("0.0.0.0") + ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + + e.attach_sent_timestamp(ev_list) + reduced = True + for ev in ev_list: + reduced = reduced and "stm" in ev.keys() and ev["stm"] == "1618358402000" + self.assertTrue(reduced) + + @mock.patch("snowplow_tracker.Emitter.flush") + def test_flush_timer(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + e = Emitter("0.0.0.0", batch_size=10) + ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + for i in ev_list: + e.input(i) + + e.set_flush_timer(3) + self.assertEqual(len(e.event_store.event_buffer), 3) + time.sleep(5) + self.assertGreaterEqual(mok_flush.call_count, 1) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_success(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_success + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + mok_success.assert_called_once_with(evBuffer) + mok_failure.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_failure(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + mok_success.assert_not_called() + mok_failure.assert_called_once_with(0, evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_success(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_success + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + mok_success.assert_called_once_with(evBuffer) + mok_failure.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_failure(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + mok_success.assert_not_called() + mok_failure.assert_called_with(0, evBuffer) + + @mock.patch("snowplow_tracker.emitters.requests.post") + def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: + mok_post_request.side_effect = ConnectTimeout + e = Emitter("0.0.0.0") + response = e.http_post("dummy_string") + post_succeeded = Emitter.is_good_status_code(response) + + self.assertFalse(post_succeeded) + + @mock.patch("snowplow_tracker.emitters.requests.post") + def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: + mok_post_request.side_effect = ConnectTimeout + e = Emitter("0.0.0.0", method="get") + response = e.http_get({"a": "b"}) + get_succeeded = Emitter.is_good_status_code(response) + self.assertFalse(get_succeeded) + + ### + # AsyncEmitter + ### + @mock.patch("snowplow_tracker.AsyncEmitter.flush") + def test_async_emitter_input(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + ae = AsyncEmitter( + "0.0.0.0", port=9090, method="get", batch_size=3, thread_count=5 + ) + self.assertTrue(ae.queue.empty()) + + ae.input({"a": "aa"}) + ae.input({"b": "bb"}) + self.assertEqual(len(ae.event_store.event_buffer), 2) + self.assertTrue(ae.queue.empty()) + mok_flush.assert_not_called() + + ae.input({"c": "cc"}) # meet buffer size + self.assertEqual(mok_flush.call_count, 1) + + @mock.patch("snowplow_tracker.AsyncEmitter.send_events") + def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: + mok_send_events.side_effect = mocked_send_events + + ae = AsyncEmitter( + "0.0.0.0", + port=9090, + method="get", + batch_size=3, + thread_count=5, + byte_limit=1024, + ) + self.assertTrue(ae.queue.empty()) + + ae.input({"a": "aa"}) + ae.input({"b": "bb"}) + self.assertEqual(len(ae.event_store.event_buffer), 2) + self.assertTrue(ae.queue.empty()) + mok_send_events.assert_not_called() + + ae.sync_flush() + self.assertEqual(len(ae.event_store.event_buffer), 0) + self.assertEqual(ae.bytes_queued, 0) + self.assertEqual(mok_send_events.call_count, 1) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_async_send_events_get_success(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_success + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + ae.send_events(evBuffer) + mok_success.assert_called_once_with(evBuffer) + mok_failure.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + ae.send_events(evBuffer) + mok_success.assert_not_called() + mok_failure.assert_called_once_with(0, evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_async_send_events_post_success(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_success + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + ae.send_events(evBuffer) + mok_success.assert_called_once_with(evBuffer) + mok_failure.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + ae.send_events(evBuffer) + mok_success.assert_not_called() + mok_failure.assert_called_with(0, evBuffer) + + # Unicode + @mock.patch("snowplow_tracker.AsyncEmitter.flush") + def test_input_unicode_get(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", method="get", batch_size=2) + ae.input(payload) + + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) + + @mock.patch("snowplow_tracker.AsyncEmitter.flush") + def test_input_unicode_post(self, mok_flush: Any) -> None: + mok_flush.side_effect = mocked_flush + + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", batch_size=2) + ae.input(payload) + + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_no_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() + + @mock.patch("snowplow_tracker.Emitter.http_post") + def test_send_events_post_custom_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch("snowplow_tracker.Emitter.http_get") + def test_send_events_get_custom_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) diff --git a/snowplow_tracker/test/unit/test_event.py b/snowplow_tracker/test/unit/test_event.py new file mode 100644 index 00000000..e50da98d --- /dev/null +++ b/snowplow_tracker/test/unit/test_event.py @@ -0,0 +1,72 @@ +# """ +# test_event.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import json +import unittest +from snowplow_tracker.events import Event +from snowplow_tracker.subject import Subject +from snowplow_tracker.self_describing_json import SelfDescribingJson + +CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" + + +class TestEvent(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event = Event() + self.assertEqual(event.payload.nv_pairs, {}) + + def test_build_payload(self): + event_subject = Subject() + event = Event(event_subject=event_subject) + payload = event.build_payload(encode_base64=None, json_encoder=None) + + self.assertEqual(payload.nv_pairs, {"p": "pc"}) + + def test_build_payload_tstamp(self): + event_subject = Subject() + tstamp = 1399021242030 + + event = Event(event_subject=event_subject, true_timestamp=tstamp) + + payload = event.build_payload( + json_encoder=None, + encode_base64=None, + ) + + self.assertEqual(payload.nv_pairs, {"p": "pc", "ttm": 1399021242030}) + + def test_build_payload_context(self): + event_subject = Subject() + context = SelfDescribingJson("test.context.schema", {"user": "tester"}) + event_context = [context] + event = Event(event_subject=event_subject, context=event_context) + + payload = event.build_payload( + json_encoder=None, + encode_base64=False, + ) + + expected_context = { + "schema": CONTEXT_SCHEMA, + "data": [{"schema": "test.context.schema", "data": {"user": "tester"}}], + } + actual_context = json.loads(payload.nv_pairs["co"]) + + self.assertDictEqual(actual_context, expected_context) diff --git a/snowplow_tracker/test/unit/test_in_memory_event_store.py b/snowplow_tracker/test/unit/test_in_memory_event_store.py new file mode 100644 index 00000000..93a0c8b2 --- /dev/null +++ b/snowplow_tracker/test/unit/test_in_memory_event_store.py @@ -0,0 +1,106 @@ +# """ +# test_in_memory_event_store.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import unittest +from snowplow_tracker.event_store import InMemoryEventStore +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class TestInMemoryEventStore(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event_store = InMemoryEventStore(logger) + self.assertEqual(event_store.buffer_capacity, 10000) + self.assertEqual(event_store.event_buffer, []) + + def test_add_event(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + self.assertDictEqual(nvPairs, event_store.event_buffer[0]) + + def test_size(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.size(), 3) + + def test_add_failed_events_to_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPair1 = {"n0": "v0", "n1": "v1"} + nvPair2 = {"n2": "v2", "n3": "v3"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + payload_list = event_store.get_events_batch() + + event_store.cleanup(payload_list, True) + + self.assertEqual(event_store.event_buffer, payload_list) + + def test_remove_success_events_from_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + payload_list = event_store.get_events_batch() + event_store.cleanup(payload_list, False) + + self.assertEqual(event_store.event_buffer, []) + + def test_drop_new_events_buffer_full(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPair1 = {"n0": "v0"} + nvPair2 = {"n1": "v1"} + nvPair3 = {"n2": "v2"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + event_store.add_event(nvPair3) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + def test_get_events(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPairs = {"n0": "v0"} + batch = [nvPairs, nvPairs] + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.get_events_batch(), batch) diff --git a/snowplow_tracker/test/unit/test_page_ping.py b/snowplow_tracker/test/unit/test_page_ping.py new file mode 100644 index 00000000..7539ce43 --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_ping.py @@ -0,0 +1,38 @@ +import pytest + +from snowplow_tracker.events.page_ping import PagePing + + +class TestPagePing: + def test_getters(self): + pp = PagePing("url", "title", "referrer", 1, 2, 3, 4) + assert pp.page_url == "url" + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + + def test_setters(self): + pp = PagePing("url") + pp.page_title = "title" + pp.referrer = "referrer" + pp.min_x = 1 + pp.max_x = 2 + pp.min_y = 3 + pp.max_y = 4 + assert pp.page_title == "title" + assert pp.referrer == "referrer" + assert pp.min_x == 1 + assert pp.max_x == 2 + assert pp.min_y == 3 + assert pp.max_y == 4 + assert pp.page_url == "url" + + def test_page_url_non_empty_string(self): + pp = PagePing("url") + pp.page_url = "new_url" + assert pp.page_url == "new_url" + with pytest.raises(ValueError): + pp.page_url = "" diff --git a/snowplow_tracker/test/unit/test_page_view.py b/snowplow_tracker/test/unit/test_page_view.py new file mode 100644 index 00000000..3736710c --- /dev/null +++ b/snowplow_tracker/test/unit/test_page_view.py @@ -0,0 +1,27 @@ +import pytest + +from snowplow_tracker.events.page_view import PageView + + +class TestPageView: + def test_getters(self): + pv = PageView("url", "title", "referrer") + assert pv.page_url == "url" + assert pv.page_title == "title" + assert pv.referrer == "referrer" + + def test_setters(self): + pv = PageView("url", "title", "referrer") + pv.page_url = "new_url" + pv.page_title = "new_title" + pv.referrer = "new_referrer" + assert pv.page_url == "new_url" + assert pv.page_title == "new_title" + assert pv.referrer == "new_referrer" + + def test_page_url_non_empty_string(self): + pv = PageView("url") + pv.page_url = "new_url" + assert pv.page_url == "new_url" + with pytest.raises(ValueError): + pv.page_url = "" diff --git a/snowplow_tracker/test/unit/test_payload.py b/snowplow_tracker/test/unit/test_payload.py index 7bd3062a..c174e8f4 100644 --- a/snowplow_tracker/test/unit/test_payload.py +++ b/snowplow_tracker/test/unit/test_payload.py @@ -1,32 +1,29 @@ -""" - test_payload.py +# """ +# test_payload.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. - - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. - - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ +import json +import base64 import unittest -import time -from freezegun import freeze_time +from typing import Dict, Any + from snowplow_tracker import payload -def is_subset(dict1, dict2): +def is_subset(dict1: Dict[Any, Any], dict2: Dict[Any, Any]) -> bool: """ * is_subset(smaller_dict, larger_dict) Checks if dict1 has name, value pairs that also exist in dict2. @@ -42,69 +39,124 @@ def is_subset(dict1, dict2): return False -class TestPayload(unittest.TestCase): +def date_encoder(o: Any) -> str: + """Sample custom JSON encoder which converts dates into their ISO format""" + from datetime import date + from json.encoder import JSONEncoder + + if isinstance(o, date): + return o.isoformat() - def setUp(self): + return JSONEncoder.default(o) + + +class TestPayload(unittest.TestCase): + def setUp(self) -> None: pass - def test_object_generation(self): + def test_object_generation(self) -> None: p = payload.Payload() - self.assertTrue(is_subset({}, p.context)) - - def test_object_generation_2(self): - p = payload.Payload(None, {"test1": "result1", "test2": "result2", }) + self.assertDictEqual({}, p.nv_pairs) + + def test_object_generation_2(self) -> None: + p = payload.Payload( + { + "test1": "result1", + "test2": "result2", + } + ) output = {"test1": "result1", "test2": "result2"} - self.assertTrue(is_subset(output, p.context)) + self.assertDictEqual(output, p.nv_pairs) - def test_add(self): + def test_add(self) -> None: p = payload.Payload() p.add("name1", "value1") p.add("name2", "value2") - output = {"name1": "value1", "name2": "value2", } - self.assertTrue(is_subset(output, p.context)) + output = { + "name1": "value1", + "name2": "value2", + } + self.assertDictEqual(output, p.nv_pairs) - def test_add_dict(self): - p = payload.Payload(None, {"n1": "v1", "n2": "v2", }) - p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter - output = {"n1": "v1", "n2": "v2", "name3": 3, "name4": 4} - self.assertTrue(is_subset(output, p.context)) - - def test_get_transaction_id(self): + def test_add_empty_val(self) -> None: p = payload.Payload() - self.assertTrue(p.context["tid"] >= 100000 and - p.context["tid"] <= 999999) + p.add("name", "") + output = {} + self.assertDictEqual(output, p.nv_pairs) - @freeze_time("1970-01-01 00:00:01") - def test_get_timestamp(self): + def test_add_none(self) -> None: + p = payload.Payload() + p.add("name", None) + output = {} + self.assertDictEqual(output, p.nv_pairs) + + def test_add_dict(self) -> None: + p = payload.Payload( + { + "n1": "v1", + "n2": "v2", + } + ) + p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter + output = {"n1": "v1", "n2": "v2", "name3": 3, "name4": 4} + self.assertDictEqual(output, p.nv_pairs) + + def test_add_json_empty(self) -> None: + p = payload.Payload({"name": "value"}) + input = {} + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} + self.assertDictEqual(output, p.nv_pairs) + + def test_add_json_none(self) -> None: + p = payload.Payload({"name": "value"}) + input = None + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} + self.assertDictEqual(output, p.nv_pairs) + + def test_add_json_encode_false(self) -> None: p = payload.Payload() - self.assertTrue(p.context["dtm"] == 1000) # 1970-01-01 00:00:01 in ms + input = {"a": 1} + p.add_json(input, False, "ue_px", "ue_pr") + self.assertTrue("ue_pr" in p.nv_pairs.keys()) + self.assertFalse("ue_px" in p.nv_pairs.keys()) - def test_set_timestamp(self): + def test_add_json_encode_true(self) -> None: p = payload.Payload() - p.set_timestamp(0) - self.assertEquals(p.context["dtm"], 0) + input = {"a": 1} + p.add_json(input, True, "ue_px", "ue_pr") + self.assertFalse("ue_pr" in p.nv_pairs.keys()) + self.assertTrue("ue_px" in p.nv_pairs.keys()) - def test_set_timestamp(self): + def test_add_json_unicode_encode_false(self) -> None: p = payload.Payload() - p.set_timestamp(12345654321) - self.assertEquals(p.context["dtm"], 12345654321000) + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, False, "ue_px", "ue_pr") + ue_pr = json.loads(p.nv_pairs["ue_pr"]) + self.assertDictEqual(input, ue_pr) - def test_add_unstruct_1(self): + def test_add_json_unicode_encode_true(self) -> None: p = payload.Payload() - try: - p.add_unstruct({"product_id": "ASO01043", - "price$flt": 33, # ERROR - "walrus$tms": int(time.time() * 1000), - }, False, "ue_px", "ue_pe") - except RuntimeError as e: - self.assertEquals("price$flt in dict is not a flt", str(e)) - - def test_add_unstruct_2(self): + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, True, "ue_px", "ue_pr") + ue_px = json.loads( + base64.urlsafe_b64decode(p.nv_pairs["ue_px"]).decode("utf-8") + ) + self.assertDictEqual(input, ue_px) + + def test_add_json_with_custom_enc(self) -> None: + from datetime import date + p = payload.Payload() - try: - p.add_unstruct({"product_id": "ASO01043", - "price$flt": 33.3, - "walrus$tms": "hello world!", # ERROR - }, True, "ue_px", "ue_pe") - except RuntimeError as e: - self.assertEquals("walrus$tms in dict is not a tms", str(e)) + + input = {"key1": date(2020, 2, 1)} + + p.add_json(input, False, "name1", "name1", date_encoder) + + results = json.loads(p.nv_pairs["name1"]) + self.assertTrue(is_subset({"key1": "2020-02-01"}, results)) + + def test_subject_get(self) -> None: + p = payload.Payload({"name1": "val1"}) + self.assertDictEqual(p.get(), p.nv_pairs) diff --git a/snowplow_tracker/test/unit/test_structured_event.py b/snowplow_tracker/test/unit/test_structured_event.py new file mode 100644 index 00000000..fdf00014 --- /dev/null +++ b/snowplow_tracker/test/unit/test_structured_event.py @@ -0,0 +1,24 @@ +from snowplow_tracker.events.structured_event import StructuredEvent + + +class TestStructuredEvent: + def test_getters(self): + se = StructuredEvent("category", "action", "label", "property", 1) + assert se.category == "category" + assert se.action == "action" + assert se.label == "label" + assert se.property_ == "property" + assert se.value == 1 + + def test_setters(self): + se = StructuredEvent("category", "action") + se.category = "new_category" + se.action = "new_action" + se.label = "new_label" + se.property_ = "new_property" + se.value = 2 + assert se.category == "new_category" + assert se.action == "new_action" + assert se.label == "new_label" + assert se.property_ == "new_property" + assert se.value == 2 diff --git a/snowplow_tracker/test/unit/test_subject.py b/snowplow_tracker/test/unit/test_subject.py new file mode 100644 index 00000000..953a0a74 --- /dev/null +++ b/snowplow_tracker/test/unit/test_subject.py @@ -0,0 +1,116 @@ +# """ +# test_subject.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +import unittest +import pytest + +from snowplow_tracker import subject as _subject + + +class TestSubject(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_subject_0(self) -> None: + s = _subject.Subject() + self.assertDictEqual(s.standard_nv_pairs, {"p": _subject.DEFAULT_PLATFORM}) + + s.set_platform("srv") + s.set_user_id("1234") + s.set_screen_resolution(1920, 1080) + s.set_viewport(1080, 1080) + s.set_color_depth(1080) + s.set_timezone("PST") + s.set_lang("EN") + s.set_domain_user_id("domain-user-id") + s.set_domain_session_id("domain-session-id") + s.set_domain_session_index(1) + s.set_ip_address("127.0.0.1") + s.set_useragent("useragent-string") + s.set_network_user_id("network-user-id") + + exp = { + "p": "srv", + "uid": "1234", + "res": "1920x1080", + "vp": "1080x1080", + "cd": 1080, + "tz": "PST", + "lang": "EN", + "ip": "127.0.0.1", + "ua": "useragent-string", + "duid": "domain-user-id", + "sid": "domain-session-id", + "vid": 1, + "tnuid": "network-user-id", + } + self.assertDictEqual(s.standard_nv_pairs, exp) + + def test_subject_1(self) -> None: + s = _subject.Subject().set_platform("srv").set_user_id("1234").set_lang("EN") + + exp = {"p": "srv", "uid": "1234", "lang": "EN"} + self.assertDictEqual(s.standard_nv_pairs, exp) + + with pytest.raises(KeyError): + s.standard_nv_pairs["res"] + with pytest.raises(KeyError): + s.standard_nv_pairs["vp"] + with pytest.raises(KeyError): + s.standard_nv_pairs["cd"] + with pytest.raises(KeyError): + s.standard_nv_pairs["tz"] + with pytest.raises(KeyError): + s.standard_nv_pairs["ip"] + with pytest.raises(KeyError): + s.standard_nv_pairs["ua"] + with pytest.raises(KeyError): + s.standard_nv_pairs["duid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["sid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["vid"] + with pytest.raises(KeyError): + s.standard_nv_pairs["tnuid"] + + def test_combine_subject(self) -> None: + s = _subject.Subject() + s.set_color_depth(10) + s.set_domain_session_id("domain_session_id") + + s2 = _subject.Subject() + s2.set_domain_user_id("domain_user_id") + s2.set_lang("en") + + fin_payload_dict = s.combine_subject(s2) + + expected_fin_payload_dict = { + "p": "pc", + "cd": 10, + "sid": "domain_session_id", + "duid": "domain_user_id", + "lang": "en", + } + + expected_subject = { + "p": "pc", + "cd": 10, + "sid": "domain_session_id", + } + + self.assertDictEqual(fin_payload_dict, expected_fin_payload_dict) + self.assertDictEqual(s.standard_nv_pairs, expected_subject) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index e92babbb..3009790a 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -1,40 +1,1249 @@ -""" - test_tracker.py +# """ +# test_tracker.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +import re +import json +import unittest +import unittest.mock as mock +from freezegun import freeze_time +from typing import Any, Optional -import unittest +from snowplow_tracker.contracts import disable_contracts, enable_contracts from snowplow_tracker.tracker import Tracker +from snowplow_tracker.tracker import VERSION as TRACKER_VERSION +from snowplow_tracker.subject import Subject +from snowplow_tracker.payload import Payload +from snowplow_tracker.self_describing_json import SelfDescribingJson +from snowplow_tracker.events import Event, SelfDescribing, ScreenView + +UNSTRUCT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0" +CONTEXT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" +LINK_CLICK_SCHEMA = "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1" +ADD_TO_CART_SCHEMA = "iglu:com.snowplowanalytics.snowplow/add_to_cart/jsonschema/1-0-0" +REMOVE_FROM_CART_SCHEMA = ( + "iglu:com.snowplowanalytics.snowplow/remove_from_cart/jsonschema/1-0-0" +) +FORM_CHANGE_SCHEMA = "iglu:com.snowplowanalytics.snowplow/change_form/jsonschema/1-0-0" +FORM_SUBMIT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/submit_form/jsonschema/1-0-0" +SITE_SEARCH_SCHEMA = "iglu:com.snowplowanalytics.snowplow/site_search/jsonschema/1-0-0" +MOBILE_SCREEN_VIEW_SCHEMA = ( + "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0" +) +SCREEN_VIEW_SCHEMA = "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0" + +# helpers +_TEST_UUID = "5628c4c6-3f8a-43f8-a09f-6ff68f68dfb6" +geoSchema = "iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0" +geoData = {"latitude": -23.2, "longitude": 43.0} +movSchema = "iglu:com.acme_company/movie_poster/jsonschema/2-1-1" +movData = {"movie": "TestMovie", "year": 2021} + + +def mocked_uuid() -> str: + return _TEST_UUID + + +def mocked_track( + event: Any, + context: Optional[Any] = None, + tstamp: Optional[Any] = None, + event_subject: Optional[Any] = None, +) -> None: + pass + + +def mocked_complete_payload( + event: Any, + event_subject: Optional[Any], + context: Optional[Any], + tstamp: Optional[Any], +) -> Payload: + pass + + +def mocked_track_trans_item(*args: Any, **kwargs: Any) -> None: + pass + + +def mocked_track_unstruct(*args: Any, **kwargs: Any) -> None: + pass + + +class ContractsDisabled(object): + def __enter__(self) -> None: + disable_contracts() + + def __exit__(self, type: Any, value: Any, traceback: Any) -> None: + enable_contracts() class TestTracker(unittest.TestCase): + def create_patch(self, name: str) -> Any: + patcher = mock.patch(name) + thing = patcher.start() + thing.side_effect = mock.MagicMock + self.addCleanup(patcher.stop) + return thing - def setUp(self): + def setUp(self) -> None: pass - """ - Testing URI generator - """ + def test_initialisation(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + t = Tracker("cloudfront", [e], encode_base64=False, app_id="AF003") + self.assertEqual(t.standard_nv_pairs["tna"], "cloudfront") + self.assertEqual(t.standard_nv_pairs["aid"], "AF003") + self.assertEqual(t.encode_base64, False) + + def test_initialisation_default_optional(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + t = Tracker("namespace", e) + self.assertEqual(t.emitters, [e]) + self.assertTrue(t.standard_nv_pairs["aid"] is None) + self.assertEqual(t.encode_base64, True) + + def test_initialisation_emitter_list(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + + t = Tracker("namespace", [e1, e2]) + self.assertEqual(t.emitters, [e1, e2]) + + def test_initialisation_error(self) -> None: + with self.assertRaises(ValueError): + Tracker("namespace", []) + + def test_initialization_with_subject(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + s = Subject() + t = Tracker("namespace", e, subject=s) + self.assertIs(t.subject, s) + + def test_get_uuid(self) -> None: + eid = Tracker.get_uuid() + self.assertIsNotNone( + re.match( + r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\Z", eid + ) + ) + + @freeze_time("1970-01-01 00:00:01") + def test_get_timestamp(self) -> None: + tstamp = Tracker.get_timestamp() + self.assertEqual(tstamp, 1000) # 1970-01-01 00:00:01 in ms + + def test_get_timestamp_1(self) -> None: + tstamp = Tracker.get_timestamp(1399021242030) + self.assertEqual(tstamp, 1399021242030) + + def test_get_timestamp_2(self) -> None: + tstamp = Tracker.get_timestamp(1399021242240.0303) + self.assertEqual(tstamp, 1399021242240) + + @freeze_time("1970-01-01 00:00:01") + def test_get_timestamp_3(self) -> None: + tstamp = Tracker.get_timestamp("1399021242030") # test wrong arg type + self.assertEqual(tstamp, 1000) # 1970-01-01 00:00:01 in ms + + @mock.patch("snowplow_tracker.Tracker.track") + def test_alias_of_track_self_describing_event(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + t = Tracker("namespace", e) + evJson = SelfDescribingJson("test.schema", {"n": "v"}) + # call the alias + t.track_self_describing_event(evJson) + self.assertEqual(mok_track.call_count, 1) + + def test_flush(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + + t = Tracker("namespace", [e1, e2]) + t.flush() + e1.flush.assert_not_called() + self.assertEqual(e1.sync_flush.call_count, 1) + e2.flush.assert_not_called() + self.assertEqual(e2.sync_flush.call_count, 1) + + def test_flush_async(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + + t = Tracker("namespace", [e1, e2]) + t.flush(is_async=True) + self.assertEqual(e1.flush.call_count, 1) + e1.sync_flush.assert_not_called() + self.assertEqual(e2.flush.call_count, 1) + e2.sync_flush.assert_not_called() + + def test_set_subject(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + t = Tracker("namespace", e) + new_subject = Subject() + self.assertIsNot(t.subject, new_subject) + t.set_subject(new_subject) + self.assertIs(t.subject, new_subject) + + def test_add_emitter(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + + t = Tracker("namespace", e1) + t.add_emitter(e2) + self.assertEqual(t.emitters, [e1, e2]) + + ### + # test track and complete payload methods + ### + + def test_track(self) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e1 = mokEmitter() + e2 = mokEmitter() + e3 = mokEmitter() + + t = Tracker("namespace", [e1, e2, e3]) + + mok_event = self.create_patch("snowplow_tracker.events.Event") + t.track(mok_event) + mok_payload = mok_event.build_payload().nv_pairs + + e1.input.assert_called_once_with(mok_payload) + e2.input.assert_called_once_with(mok_payload) + e3.input.assert_called_once_with(mok_payload) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload(self, mok_uuid: Any) -> None: + mok_uuid.side_effect = mocked_uuid + + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + t = Tracker("namespace", e) + s = Subject() + event = Event(event_subject=s) + payload = t.complete_payload(event).nv_pairs + + expected = { + "eid": _TEST_UUID, + "dtm": 1618790401000, + "tv": TRACKER_VERSION, + "p": "pc", + "tna": "namespace", + } + self.assertDictEqual(payload, expected) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload_tstamp(self, mok_uuid: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_uuid.side_effect = mocked_uuid + t = Tracker("namespace", e) + s = Subject() + time_in_millis = 100010001000 + event = Event(true_timestamp=time_in_millis, event_subject=s) + + payload = t.complete_payload(event=event).nv_pairs + + expected = { + "tna": "namespace", + "eid": _TEST_UUID, + "dtm": 1618790401000, + "ttm": time_in_millis, + "tv": TRACKER_VERSION, + "p": "pc", + } + self.assertDictEqual(payload, expected) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload_co(self, mok_uuid: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_uuid.side_effect = mocked_uuid + + t = Tracker("namespace", e, encode_base64=False) + + geo_ctx = SelfDescribingJson(geoSchema, geoData) + mov_ctx = SelfDescribingJson(movSchema, movData) + ctx_array = [geo_ctx, mov_ctx] + event = Event(context=ctx_array) + payload = t.complete_payload(event=event).nv_pairs + + expected_co = { + "schema": CONTEXT_SCHEMA, + "data": [ + {"schema": geoSchema, "data": geoData}, + {"schema": movSchema, "data": movData}, + ], + } + self.assertIn("co", payload) + self.assertDictEqual(json.loads(payload["co"]), expected_co) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload_cx(self, mok_uuid: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_uuid.side_effect = mocked_uuid + + t = Tracker("namespace", e, encode_base64=True) + + geo_ctx = SelfDescribingJson(geoSchema, geoData) + mov_ctx = SelfDescribingJson(movSchema, movData) + ctx_array = [geo_ctx, mov_ctx] + event = Event(context=ctx_array) + payload = t.complete_payload(event=event).nv_pairs + + self.assertIn("cx", payload) + + @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 + @mock.patch("snowplow_tracker.Tracker.get_uuid") + def test_complete_payload_event_subject(self, mok_uuid: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_uuid.side_effect = mocked_uuid + + t = Tracker("namespace", e) + event_subject = Subject().set_lang("EN").set_user_id("tester") + event = Event(event_subject=event_subject) + payload = t.complete_payload(event=event).nv_pairs + + expected = { + "tna": "namespace", + "eid": _TEST_UUID, + "dtm": 1618790401000, + "tv": TRACKER_VERSION, + "p": "pc", + "lang": "EN", + "uid": "tester", + } + self.assertDictEqual(payload, expected) + + ### + # test track_x methods + ### + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e, encode_base64=False) + event_json = SelfDescribingJson("test.sde.schema", {"n": "v"}) + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json) + self.assertEqual(mok_track.call_count, 1) + + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + # payload + actual_ue_pr = json.loads(actual_pairs["ue_pr"]) + + expectedUePr = { + "data": {"data": {"n": "v"}, "schema": "test.sde.schema"}, + "schema": UNSTRUCT_SCHEMA, + } + + self.assertDictEqual(actual_ue_pr, expectedUePr) + self.assertEqual(actual_pairs["e"], "ue") + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event_all_args(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e, encode_base64=False) + event_json = SelfDescribingJson("test.schema", {"n": "v"}) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + event_context = [ctx] + event_tstamp = 1399021242030 + + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json, event_context, event_tstamp) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + # payload + actualUePr = json.loads(actual_pairs["ue_pr"]) + + expectedUePr = { + "data": {"data": {"n": "v"}, "schema": "test.schema"}, + "schema": UNSTRUCT_SCHEMA, + } + + self.assertDictEqual(actualUePr, expectedUePr) + self.assertEqual(actual_pairs["e"], "ue") + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_self_describing_event_encode(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e, encode_base64=True) + event_json = SelfDescribingJson("test.sde.schema", {"n": "v"}) + + event = SelfDescribing(event_json=event_json) + actual_pairs = event.build_payload( + encode_base64=t.encode_base64, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track_self_describing_event(event_json) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + self.assertTrue("ue_px" in actual_pairs.keys()) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_struct_event(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + ev_tstamp = 1399021242030 + t.track_struct_event( + "Mixes", + "Play", + "Test", + "TestProp", + value=3.14, + context=[ctx], + tstamp=ev_tstamp, + ) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + actual_payload_arg = complete_args_dict["event"].payload + actual_pairs = actual_payload_arg.nv_pairs + + expected_pairs = { + "e": "se", + "se_ca": "Mixes", + "se_ac": "Play", + "se_la": "Test", + "se_pr": "TestProp", + "se_va": 3.14, + } + self.assertDictEqual(actual_pairs, expected_pairs) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_page_view(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + ev_tstamp = 1399021242030 + t.track_page_view( + "example.com", + "Example", + "docs.snowplow.io", + context=[ctx], + tstamp=ev_tstamp, + ) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + actual_payload_arg = complete_args_dict["event"].payload + actualPairs = actual_payload_arg.nv_pairs + + expectedPairs = { + "e": "pv", + "url": "example.com", + "page": "Example", + "refr": "docs.snowplow.io", + } + self.assertDictEqual(actualPairs, expectedPairs) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_page_ping(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + ev_tstamp = 1399021242030 + t.track_page_ping( + "example.com", + "Example", + "docs.snowplow.io", + 0, + 1, + 2, + 3, + context=[ctx], + tstamp=ev_tstamp, + ) + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_dict), 1) + + actual_payload_arg = complete_args_dict["event"].payload + actual_pairs = actual_payload_arg.nv_pairs + + expectedPairs = { + "e": "pp", + "url": "example.com", + "page": "Example", + "refr": "docs.snowplow.io", + "pp_mix": 0, + "pp_max": 1, + "pp_miy": 2, + "pp_may": 3, + } + self.assertDictEqual(actual_pairs, expectedPairs) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_item(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + ev_tstamp = 1399021242030 + t.track_ecommerce_transaction_item( + order_id="1234", + sku="sku1234", + price=3.14, + quantity=1, + name="itemName", + category="itemCategory", + currency="itemCurrency", + context=[ctx], + tstamp=ev_tstamp, + ) + self.assertEqual(mok_track.call_count, 1) + complete_args_list = mok_track.call_args_list[0][1] + self.assertEqual(len(complete_args_list), 1) + + actual_payload_arg = complete_args_list["event"].payload + actual_pairs = actual_payload_arg.nv_pairs + + expectedPairs = { + "e": "ti", + "ti_id": "1234", + "ti_sk": "sku1234", + "ti_nm": "itemName", + "ti_ca": "itemCategory", + "ti_pr": 3.14, + "ti_qu": 1, + "ti_cu": "itemCurrency", + } + self.assertDictEqual(actual_pairs, expectedPairs) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_no_items(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + t.track_ecommerce_transaction( + "1234", + 10, + "transAffiliation", + 2.5, + 1.5, + "transCity", + "transState", + "transCountry", + "transCurrency", + context=[ctx], + tstamp=evTstamp, + ) + self.assertEqual(mok_track.call_count, 1) + completeArgsList = mok_track.call_args_list[0][1] + self.assertEqual(len(completeArgsList), 1) + + actualPayloadArg = completeArgsList["event"].payload + actualPairs = actualPayloadArg.nv_pairs + + expectedPairs = { + "e": "tr", + "tr_id": "1234", + "tr_tt": 10, + "tr_af": "transAffiliation", + "tr_tx": 2.5, + "tr_sh": 1.5, + "tr_ci": "transCity", + "tr_st": "transState", + "tr_co": "transCountry", + "tr_cu": "transCurrency", + } + self.assertDictEqual(actualPairs, expectedPairs) + + @mock.patch("snowplow_tracker.Tracker.track_ecommerce_transaction_item") + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_ecommerce_transaction_with_items( + self, mok_track: Any, mok_track_trans_item: Any + ) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + mok_track_trans_item.side_effect = mocked_track_trans_item + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + transItems = [ + {"sku": "sku1234", "quantity": 3, "price": 3.14}, + {"sku": "sku5678", "quantity": 1, "price": 2.72}, + ] + t.track_ecommerce_transaction( + order_id="1234", + total_value=10, + affiliation="transAffiliation", + tax_value=2.5, + shipping=1.5, + city="transCity", + state="transState", + country="transCountry", + currency="transCurrency", + items=transItems, + context=[ctx], + tstamp=evTstamp, + ) + + # Transaction + callCompleteArgsList = mok_track.call_args_list + firstCallArgsList = callCompleteArgsList[0][1] + self.assertEqual(len(firstCallArgsList), 1) + + actualPayloadArg = firstCallArgsList["event"].payload + actualPairs = actualPayloadArg.nv_pairs + + expectedTransPairs = { + "e": "tr", + "tr_id": "1234", + "tr_tt": 10, + "tr_af": "transAffiliation", + "tr_tx": 2.5, + "tr_sh": 1.5, + "tr_ci": "transCity", + "tr_st": "transState", + "tr_co": "transCountry", + "tr_cu": "transCurrency", + } + self.assertDictEqual(actualPairs, expectedTransPairs) + + # Items + calls_to_track_trans_item = mok_track_trans_item.call_count + self.assertEqual(calls_to_track_trans_item, 2) + callTrackItemsArgsList = mok_track_trans_item.call_args_list + # 1st item + firstItemCallArgs = callTrackItemsArgsList[0][0] + self.assertEqual((), firstItemCallArgs) + firstItemCallKwargs = callTrackItemsArgsList[0][1] + + expectedFirstItemPairs = { + "sku": "sku1234", + "quantity": 3, + "price": 3.14, + "order_id": "1234", + "currency": "transCurrency", + "tstamp": evTstamp, + "event_subject": None, + "context": [ctx], + } + + self.assertDictEqual(firstItemCallKwargs, expectedFirstItemPairs) + # 2nd item + secItemCallArgs = callTrackItemsArgsList[1][0] + self.assertEqual((), secItemCallArgs) + secItemCallKwargs = callTrackItemsArgsList[1][1] + + expectedSecItemPairs = { + "sku": "sku5678", + "quantity": 1, + "price": 2.72, + "order_id": "1234", + "currency": "transCurrency", + "tstamp": evTstamp, + "event_subject": None, + "context": [ctx], + } + + self.assertDictEqual(secItemCallKwargs, expectedSecItemPairs) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_link_click(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_link_click( + "example.com", + "elemId", + ["elemClass1", "elemClass2"], + "_blank", + "elemContent", + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": LINK_CLICK_SCHEMA, + "data": { + "targetUrl": "example.com", + "elementId": "elemId", + "elementClasses": ["elemClass1", "elemClass2"], + "elementTarget": "_blank", + "elementContent": "elemContent", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + + t.track_link_click("example.com") + + expected = { + "schema": LINK_CLICK_SCHEMA, + "data": { + "targetUrl": "example.com", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_add_to_cart( + "sku1234", + 3, + "testName", + "testCategory", + 3.14, + "testCurrency", + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": ADD_TO_CART_SCHEMA, + "data": { + "sku": "sku1234", + "quantity": 3, + "name": "testName", + "category": "testCategory", + "unitPrice": 3.14, + "currency": "testCurrency", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + + t.track_add_to_cart("sku1234", 1) + + expected = { + "schema": ADD_TO_CART_SCHEMA, + "data": {"sku": "sku1234", "quantity": 1}, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_remove_from_cart( + "sku1234", + 3, + "testName", + "testCategory", + 3.14, + "testCurrency", + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": REMOVE_FROM_CART_SCHEMA, + "data": { + "sku": "sku1234", + "quantity": 3, + "name": "testName", + "category": "testCategory", + "unitPrice": 3.14, + "currency": "testCurrency", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_remove_from_cart_optional_none( + self, mok_track_unstruct: Any + ) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + + t.track_remove_from_cart("sku1234", 1) + + expected = { + "schema": REMOVE_FROM_CART_SCHEMA, + "data": {"sku": "sku1234", "quantity": 1}, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_change(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_form_change( + "testFormId", + "testElemId", + "INPUT", + "testValue", + "text", + ["testClass1", "testClass2"], + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": FORM_CHANGE_SCHEMA, + "data": { + "formId": "testFormId", + "elementId": "testElemId", + "nodeName": "INPUT", + "value": "testValue", + "type": "text", + "elementClasses": ["testClass1", "testClass2"], + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + t.track_form_change("testFormId", "testElemId", "INPUT", "testValue") + + expected = { + "schema": FORM_CHANGE_SCHEMA, + "data": { + "formId": "testFormId", + "elementId": "testElemId", + "nodeName": "INPUT", + "value": "testValue", + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + elems = [ + { + "name": "user_email", + "value": "fake@email.fake", + "nodeName": "INPUT", + "type": "email", + } + ] + + t.track_form_submit( + "testFormId", + ["testClass1", "testClass2"], + elems, + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": FORM_SUBMIT_SCHEMA, + "data": { + "formId": "testFormId", + "formClasses": ["testClass1", "testClass2"], + "elements": elems, + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit_invalid_element_type( + self, mok_track_unstruct: Any + ) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + elems = [ + { + "name": "user_email", + "value": "fake@email.fake", + "nodeName": "INPUT", + "type": "invalid", + } + ] + + with self.assertRaises(ValueError): + t.track_form_submit( + "testFormId", + ["testClass1", "testClass2"], + elems, + context=[ctx], + tstamp=evTstamp, + ) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit_invalid_element_type_disabled_contracts( + self, mok_track_unstruct: Any + ) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + elems = [ + { + "name": "user_email", + "value": "fake@email.fake", + "nodeName": "INPUT", + "type": "invalid", + } + ] + + with ContractsDisabled(): + t.track_form_submit( + "testFormId", + ["testClass1", "testClass2"], + elems, + context=[ctx], + tstamp=evTstamp, + ) + + expected = { + "schema": FORM_SUBMIT_SCHEMA, + "data": { + "formId": "testFormId", + "formClasses": ["testClass1", "testClass2"], + "elements": elems, + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + t.track_form_submit("testFormId") + + expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + t.track_form_submit("testFormId", elements=[]) + + expected = {"schema": FORM_SUBMIT_SCHEMA, "data": {"formId": "testFormId"}} + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_site_search(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_site_search( + ["track", "search"], {"new": True}, 100, 10, context=[ctx], tstamp=evTstamp + ) + + expected = { + "schema": SITE_SEARCH_SCHEMA, + "data": { + "terms": ["track", "search"], + "filters": {"new": True}, + "totalResults": 100, + "pageResults": 10, + }, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + t.track_site_search(["track", "search"]) + + expected = { + "schema": SITE_SEARCH_SCHEMA, + "data": {"terms": ["track", "search"]}, + } + + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertTrue(callArgs["context"] is None) + self.assertTrue(callArgs["tstamp"] is None) + + @mock.patch("snowplow_tracker.Tracker.track") + def test_track_mobile_screen_view(self, mok_track: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track.side_effect = mocked_track + + t = Tracker("namespace", e) + + screen_view = ScreenView(name="screenName", id_="screenId") + actual_pairs = screen_view.build_payload( + encode_base64=False, + json_encoder=t.json_encoder, + ).nv_pairs + + t.track(screen_view) + + self.assertEqual(mok_track.call_count, 1) + complete_args_dict = mok_track.call_args_list[0][0] + self.assertEqual(len(complete_args_dict), 1) + actual_ue_pr = json.loads(actual_pairs["ue_pr"]) + + expected = { + "schema": MOBILE_SCREEN_VIEW_SCHEMA, + "data": {"id": "screenId", "name": "screenName"}, + } + + complete_args_dict = mok_track.call_args_list[0][1] + complete_args_dict = mok_track.call_args_list[0][1] + self.assertDictEqual(actual_ue_pr["data"], expected) + + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_screen_view(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker("namespace", e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_screen_view("screenName", "screenId", context=[ctx], tstamp=evTstamp) + + expected = { + "schema": SCREEN_VIEW_SCHEMA, + "data": {"name": "screenName", "id": "screenId"}, + } - def test_as_collector_uri(self): - host = "d3rkrsqld9gmqf.cloudfront.net" - output = Tracker(host).collector_uri - exp_output = "http://d3rkrsqld9gmqf.cloudfront.net/i" - self.assertEquals(output, exp_output) + callArgs = mok_track_unstruct.call_args_list[0][1] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs["event_json"].to_json(), expected) + self.assertIs(callArgs["context"][0], ctx) + self.assertEqual(callArgs["tstamp"], evTstamp) diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index bc17fe5e..4dc489dc 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -1,342 +1,1051 @@ -""" - tracker.py +# """ +# tracker.py - Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. - This program is licensed to you under the Apache License Version 2.0, - and you may not use this file except in compliance with the Apache License - Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - http://www.apache.org/licenses/LICENSE-2.0. +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. - Unless required by applicable law or agreed to in writing, - software distributed under the Apache License Version 2.0 is distributed on - an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - express or implied. See the Apache License Version 2.0 for the specific - language governing permissions and limitations there under. +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ - Authors: Anuj More, Alex Dean, Fred Blundun - Copyright: Copyright (c) 2013-2014 Snowplow Analytics Ltd - License: Apache License Version 2.0 -""" +import time +import uuid +from typing import Any, Optional, Union, List, Dict, Sequence +from warnings import warn -import requests -from snowplow_tracker import payload, _version -from contracts import contract, new_contract +from snowplow_tracker import payload, SelfDescribingJson +from snowplow_tracker.subject import Subject +from snowplow_tracker.contracts import non_empty_string, one_of, non_empty, form_element +from snowplow_tracker.constants import ( + VERSION, + DEFAULT_ENCODE_BASE64, + BASE_SCHEMA_PATH, + SCHEMA_TAG, +) +from snowplow_tracker.events import ( + Event, + PagePing, + PageView, + SelfDescribing, + StructuredEvent, + ScreenView, +) +from snowplow_tracker.typing import ( + JsonEncoderFunction, + EmitterProtocol, + FORM_NODE_NAMES, + FORM_TYPES, + FormNodeName, + ElementClasses, + FormClasses, +) """ -Constants & config +Tracker class """ -VERSION = "py-%s" % _version.__version__ -DEFAULT_ENCODE_BASE64 = True -DEFAULT_PLATFORM = "pc" -SUPPORTED_PLATFORMS = set(["pc", "tv", "mob", "cnsl", "iot"]) -DEFAULT_VENDOR = "com.snowplowanalytics" +class Tracker: + def __init__( + self, + namespace: str, + emitters: Union[List[EmitterProtocol], EmitterProtocol], + subject: Optional[Subject] = None, + app_id: Optional[str] = None, + encode_base64: bool = DEFAULT_ENCODE_BASE64, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + :param namespace: Identifier for the Tracker instance + :type namespace: string + :param emitters: Emitters to which events will be sent + :type emitters: list[>0](emitter) | emitter + :param subject: Subject to be tracked + :type subject: subject | None + :param app_id: Application ID + :type app_id: string_or_none + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None + """ + if subject is None: + subject = Subject() -""" -Tracker class -""" + if isinstance(emitters, list): + non_empty(emitters) + self.emitters = emitters + else: + self.emitters = [emitters] + self.subject: Optional[Subject] = subject + self.encode_base64 = encode_base64 + self.json_encoder = json_encoder -class Tracker: + self.standard_nv_pairs = {"tv": VERSION, "tna": namespace, "aid": app_id} + self.timer = None - new_contract("non_empty_string", lambda s: isinstance(s, str) - and len(s) > 0) - new_contract("string_or_none", lambda s: (isinstance(s, str) - and len(s) > 0) or s is None) - new_contract("payload", lambda s: isinstance(s, payload.Payload)) + @staticmethod + def get_uuid() -> str: + """ + Set transaction ID for the payload once during the lifetime of the + event. - def __init__(self, collector_uri, namespace=""): + :rtype: string """ - Constructor + return str(uuid.uuid4()) + + @staticmethod + def get_timestamp(tstamp: Optional[float] = None) -> int: + """ + :param tstamp: User-input timestamp or None + :type tstamp: int | float | None + :rtype: int """ - self.collector_uri = self.as_collector_uri(collector_uri) + if isinstance( + tstamp, + ( + int, + float, + ), + ): + return int(tstamp) + return int(time.time() * 1000) + + """ + Tracking methods + """ + + def track( + self, + event: Event, + ) -> Optional[str]: + """ + Send the event payload to a emitter. Returns the tracked event ID. + :param event: Event + :type event: events.Event + :rtype: String + """ + + payload = self.complete_payload( + event=event, + ) + + for emitter in self.emitters: + emitter.input(payload.nv_pairs) - self.config = { - "encode_base64": DEFAULT_ENCODE_BASE64 - } + if "eid" in payload.nv_pairs.keys(): + return payload.nv_pairs["eid"] - self.standard_nv_pairs = { - "p": DEFAULT_PLATFORM, - "tv": VERSION, - "tna": namespace - } + return None - @contract - def as_collector_uri(self, host): + def complete_payload( + self, + event: Event, + ) -> payload.Payload: + payload = event.build_payload( + encode_base64=self.encode_base64, + json_encoder=self.json_encoder, + subject=self.subject, + ) + + payload.add("eid", Tracker.get_uuid()) + payload.add("dtm", Tracker.get_timestamp()) + payload.add_dict(self.standard_nv_pairs) + + return payload + + def track_page_view( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - Method to create a URL + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_page_view will be removed in future versions. Please use the new PageView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + + pv = PageView( + page_url=page_url, + page_title=page_title, + referrer=referrer, + event_subject=event_subject, + context=context, + true_timestamp=tstamp, + ) + + self.track(event=pv) + return self - :param host: URL input by user - :type host: str - :rtype: str + def track_page_ping( + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - return "".join(["http://", host, "/i"]) + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_page_ping will be removed in future versions. Please use the new PagePing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) - """ - Fire a GET request - """ + pp = PagePing( + page_url=page_url, + page_title=page_title, + referrer=referrer, + min_x=min_x, + max_x=max_x, + min_y=min_y, + max_y=max_y, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) - @contract - def http_get(self, payload): + self.track(event=pp) + return self + + def track_link_click( + self, + target_url: str, + element_id: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + element_target: Optional[str] = None, + element_content: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param target_url: Target URL of the link + :type target_url: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param element_target: ID attribute of the HTML element + :type element_target: string_or_none + :param element_content: The content of the HTML element + :type element_content: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - Send a GET request to the collector URI (generated from the - new_tracker methods) and return the relevant error message if any. + warn( + "track_link_click will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(target_url) - :param payload: Generated dict track() - :type payload: payload + properties: Dict[str, Union[str, ElementClasses]] = {} + properties["targetUrl"] = target_url + if element_id is not None: + properties["elementId"] = element_id + if element_classes is not None: + properties["elementClasses"] = element_classes + if element_target is not None: + properties["elementTarget"] = element_target + if element_content is not None: + properties["elementContent"] = element_content + + event_json = SelfDescribingJson( + "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_add_to_cart( + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_add_to_cart will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(sku) - r = requests.get(self.collector_uri, params=payload.context) - code = r.status_code - if code < 0 or code > 600: - return "".join(["Unrecognised status code [", str(code), "]"]) - elif code >= 400 and code < 500: - return "".join(["HTTP status code [", str(code), - "] is a client error"]) - elif code >= 500: - return "".join(["HTTP status code [", str(code), - "] is a server error"]) - return r.url + properties: Union[Dict[str, Union[str, float, int]]] = {} + properties["sku"] = sku + properties["quantity"] = quantity + if name is not None: + properties["name"] = name + if category is not None: + properties["category"] = category + if unit_price is not None: + properties["unitPrice"] = unit_price + if currency is not None: + properties["currency"] = currency - """ - Setter methods - """ + event_json = SelfDescribingJson( + "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - @contract - def set_base64_to(self, value): + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_remove_from_cart( + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param value: Boolean value - :type value: bool + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - self.config["encode_base64"] = value + warn( + "track_remove_from_cart will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(sku) + + properties: Dict[str, Union[str, float, int]] = {} + properties["sku"] = sku + properties["quantity"] = quantity + if name is not None: + properties["name"] = name + if category is not None: + properties["category"] = category + if unit_price is not None: + properties["unitPrice"] = unit_price + if currency is not None: + properties["currency"] = currency + + event_json = SelfDescribingJson( + "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) - @contract - def set_platform(self, value): + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_form_change( + self, + form_id: str, + element_id: Optional[str], + node_name: FormNodeName, + value: Optional[str], + type_: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param value: One of ["pc", "tv", "mob", "cnsl", "iot"] - :type value: str + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param node_name: Type of input element + :type node_name: form_node_name + :param value: Value of the input element + :type value: string_or_none + :param type_: Type of data the element represents + :type type_: non_empty_string, form_type + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - if value in SUPPORTED_PLATFORMS: - self.standard_nv_pairs["p"] = value - else: - raise RuntimeError(value + " is not a supported platform") + warn( + "track_form_change will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) - @contract - def set_user_id(self, user_id): + non_empty_string(form_id) + one_of(node_name, FORM_NODE_NAMES) + if type_ is not None: + one_of(type_.lower(), FORM_TYPES) + + properties: Dict[str, Union[Optional[str], ElementClasses]] = dict() + properties["formId"] = form_id + properties["elementId"] = element_id + properties["nodeName"] = node_name + properties["value"] = value + if type_ is not None: + properties["type"] = type_ + if element_classes is not None: + properties["elementClasses"] = element_classes + + event_json = SelfDescribingJson( + "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_form_submit( + self, + form_id: str, + form_classes: Optional[FormClasses] = None, + elements: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param form_classes: Classes of the HTML form + :type form_classes: list(str) | tuple(str,\\*) | None + :param elements: Classes of the HTML form + :type elements: list(form_element) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_form_submit will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(form_id) + + for element in elements or []: + form_element(element) + + properties: Dict[ + str, Union[str, ElementClasses, FormClasses, List[Dict[str, Any]]] + ] = dict() + properties["formId"] = form_id + if form_classes is not None: + properties["formClasses"] = form_classes + if elements is not None and len(elements) > 0: + properties["elements"] = elements + + event_json = SelfDescribingJson( + "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_site_search( + self, + terms: Sequence[str], + filters: Optional[Dict[str, Union[str, bool]]] = None, + total_results: Optional[int] = None, + page_results: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param terms: Search terms + :type terms: seq[>=1](str) + :param filters: Filters applied to the search + :type filters: dict(str:str|bool) | None + :param total_results: Total number of results returned + :type total_results: int | None + :param page_results: Total number of pages of results + :type page_results: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_site_search will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + non_empty(terms) + + properties: Dict[ + str, Union[Sequence[str], Dict[str, Union[str, bool]], int] + ] = {} + properties["terms"] = terms + if filters is not None: + properties["filters"] = filters + if total_results is not None: + properties["totalResults"] = total_results + if page_results is not None: + properties["pageResults"] = page_results + + event_json = SelfDescribingJson( + "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self + + def track_ecommerce_transaction_item( + self, + order_id: str, + sku: str, + price: float, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + This is an internal method called by track_ecommerce_transaction. + It is not for public use. + + :param order_id: Order ID + :type order_id: non_empty_string + :param sku: Item SKU + :type sku: non_empty_string + :param price: Item price + :type price: int | float + :param quantity: Item quantity + :type quantity: int + :param name: Item name + :type name: string_or_none + :param category: Item category + :type category: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_ecommerce_transaction_item will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(order_id) + non_empty_string(sku) + + event = Event( + event_subject=event_subject, context=context, true_timestamp=tstamp + ) + event.payload.add("e", "ti") + event.payload.add("ti_id", order_id) + event.payload.add("ti_sk", sku) + event.payload.add("ti_nm", name) + event.payload.add("ti_ca", category) + event.payload.add("ti_pr", price) + event.payload.add("ti_qu", quantity) + event.payload.add("ti_cu", currency) + + self.track(event=event) + return self + + def track_ecommerce_transaction( + self, + order_id: str, + total_value: float, + affiliation: Optional[str] = None, + tax_value: Optional[float] = None, + shipping: Optional[float] = None, + city: Optional[str] = None, + state: Optional[str] = None, + country: Optional[str] = None, + currency: Optional[str] = None, + items: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param user_id: User ID - :type user_id: non_empty_string + :param order_id: ID of the eCommerce transaction + :type order_id: non_empty_string + :param total_value: Total transaction value + :type total_value: int | float + :param affiliation: Transaction affiliation + :type affiliation: string_or_none + :param tax_value: Transaction tax value + :type tax_value: int | float | None + :param shipping: Delivery cost charged + :type shipping: int | float | None + :param city: Delivery address city + :type city: string_or_none + :param state: Delivery address state + :type state: string_or_none + :param country: Delivery address country + :type country: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param items: The items in the transaction + :type items: list(dict(str:\\*)) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - self.standard_nv_pairs["uid"] = user_id + warn( + "track_ecommerce_transaction will be deprecated in future versions.", + DeprecationWarning, + stacklevel=2, + ) + non_empty_string(order_id) + + event = Event( + event_subject=event_subject, context=context, true_timestamp=tstamp + ) + event.payload.add("e", "tr") + event.payload.add("tr_id", order_id) + event.payload.add("tr_tt", total_value) + event.payload.add("tr_af", affiliation) + event.payload.add("tr_tx", tax_value) + event.payload.add("tr_sh", shipping) + event.payload.add("tr_ci", city) + event.payload.add("tr_st", state) + event.payload.add("tr_co", country) + event.payload.add("tr_cu", currency) + + tstamp = Tracker.get_timestamp(tstamp) + + self.track(event=event) + + if items is None: + items = [] + for item in items: + item["order_id"] = order_id + item["currency"] = currency + item["tstamp"] = tstamp + item["event_subject"] = event_subject + item["context"] = context + self.track_ecommerce_transaction_item(**item) + + return self + + def track_screen_view( + self, + name: Optional[str] = None, + id_: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": + """ + :param name: The name of the screen view event + :type name: string_or_none + :param id_: Screen view ID + :type id_: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker + """ + warn( + "track_screen_view will be removed in future versions. Please use the new ScreenView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + screen_view_properties = {} + if name is not None: + screen_view_properties["name"] = name + if id_ is not None: + screen_view_properties["id"] = id_ + + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), + screen_view_properties, + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self - @contract - def set_app_id(self, app_id): + def track_mobile_screen_view( + self, + name: str, + id_: Optional[str] = None, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param app_id: App ID - :type app_id: str + :param name: The name of the screen view event + :type name: string_or_none + :param id_: Screen view ID. This must be of type UUID. + :type id_: string | None + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - self.standard_nv_pairs["aid"] = app_id + warn( + "track_mobile_screen_view will be removed in future versions. Please use the new ScreenView class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + if id_ is None: + id_ = self.get_uuid() - @contract - def set_screen_resolution(self, width, height): + sv = ScreenView( + name=name, + id_=id_, + type=type, + previous_name=previous_name, + previous_id=previous_id, + previous_type=previous_type, + transition_type=transition_type, + event_subject=event_subject, + context=context, + true_timestamp=tstamp, + ) + + self.track(event=sv) + return self + + def track_struct_event( + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[Union[int, float]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param width: Width of the screen - :param height: Height of the screen - :type width: int,>0 - :type height: int,>0 + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - self.standard_nv_pairs["res"] = "".join([str(width), "x", str(height)]) + warn( + "track_struct_event will be removed in future versions. Please use the new Structured class to track the event.", + DeprecationWarning, + stacklevel=2, + ) + se = StructuredEvent( + category=category, + action=action, + label=label, + property_=property_, + value=value, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + + self.track( + event=se, + ) + return self - @contract - def set_viewport(self, width, height): + def track_self_describing_event( + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param width: Width of the viewport - :param height: Height of the viewport - :type width: int,>0 - :type height: int,>0 + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - self.standard_nv_pairs["vp"] = "".join([str(width), "x", str(height)]) + warn( + "track_self_describing_event will be removed in future versions. Please use the new SelfDescribing class to track the event.", + DeprecationWarning, + stacklevel=2, + ) - @contract - def set_color_depth(self, depth): + sd = SelfDescribing( + event_json=event_json, + context=context, + true_timestamp=tstamp, + event_subject=event_subject, + ) + self.track( + event=sd, + ) + return self + + # Alias + def track_unstruct_event( + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[Subject] = None, + ) -> "Tracker": """ - :param depth: Depth of the color on the screen - :type depth: int + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: Tracker """ - self.standard_nv_pairs["cd"] = depth + warn( + "track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.", + DeprecationWarning, + stacklevel=2, + ) + + self.track_self_describing_event( + event_json=event_json, + context=context, + tstamp=tstamp, + event_subject=event_subject, + ) + return self - @contract - def set_timezone(self, timezone): + def flush(self, is_async: bool = False) -> "Tracker": """ - Set timezone for the Tracker object. + Flush the emitter - :param timezone: Timezone as a string - :type timezone: non_empty_string + :param is_async: Whether the flush is done asynchronously. Default is False + :type is_async: bool + :rtype: tracker """ - self.standard_nv_pairs["tz"] = timezone + for emitter in self.emitters: + if is_async: + if hasattr(emitter, "flush"): + emitter.flush() + else: + if hasattr(emitter, "sync_flush"): + emitter.sync_flush() + return self - @contract - def set_lang(self, lang): + def set_subject(self, subject: Optional[Subject]) -> "Tracker": """ - Set language. + Set the subject of the events fired by the tracker - :param lang: Language the application is set to - :type lang: non_empty_string + :param subject: Subject to be tracked + :type subject: subject | None + :rtype: tracker """ - self.standard_nv_pairs["lang"] = lang + self.subject = subject + return self - """ - Tracking methods - """ + def add_emitter(self, emitter: EmitterProtocol) -> "Tracker": + """ + Add a new emitter to which events should be passed + + :param emitter: New emitter + :type emitter: emitter + :rtype: tracker + """ + self.emitters.append(emitter) + return self - @contract - def track(self, pb, snowplow_schema=True): - """ - Called by all tracking events to add the standard name-value pairs - to the Payload object irrespective of the tracked event. - - :param pb: Payload builder - :type pb: payload - :param snowplow_schema: Whether the event schema is authored by Snowplow - :type snowplow_schema: bool - """ - pb.add_dict(self.standard_nv_pairs) - if snowplow_schema: - pb.add("evn", DEFAULT_VENDOR) - return self.http_get(pb) - - @contract - def track_page_view(self, page_url, page_title=None, referrer=None, tstamp=None): - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - """ - pb = payload.Payload(tstamp) - pb.add("e", "pv") # pv: page view - pb.add("url", page_url) - pb.add("page", page_title) - pb.add("refr", referrer) - return self.track(pb) - - @contract - def track_ecommerce_transaction(self, order_id, tr_total_value, - tr_affiliation=None, tr_tax_value=None, tr_shipping=None, - tr_city=None, tr_state=None, tr_country=None, - tstamp=None): - """ - :param order_id: ID of the eCommerce transaction - :type order_id: non_empty_string - :param tr_total_value: Total transaction value - :type tr_total_value: int | float - :param tr_affiliation: Transaction affiliation - :type tr_affiliation: string_or_none - :param tr_tax_value: Transaction tax value - :type tr_tax_value: int | float | None - :param tr_shipping: Delivery cost charged - :type tr_shipping: int | float | None - :param tr_city: Delivery address city - :type tr_city: string_or_none - :param tr_state: Delivery address state - :type tr_state: string_or_none - :param tr_country: Delivery address country - :type tr_country: string_or_none - """ - pb = payload.Payload(tstamp) - pb.add("e", "tr") - pb.add("tr_id", order_id) - pb.add("tr_af", tr_affiliation) - pb.add("tr_tt", tr_total_value) - pb.add("tr_tx", tr_tax_value) - pb.add("tr_sh", tr_shipping) - pb.add("tr_ci", tr_city) - pb.add("tr_st", tr_state) - pb.add("tr_co", tr_country) - return self.track(pb) - - @contract - def track_ecommerce_transaction_item(self, ti_id, ti_sku, ti_price, ti_quantity, - ti_name=None, ti_category=None, - tstamp=None): - """ - :param ti_id: Order ID - :type ti_id: non_empty_string - :param ti_sku: Item SKU - :type ti_sku: non_empty_string - :param ti_price: Item price - :type ti_price: int | float - :param ti_quantity: Item quantity - :type ti_quantity: int - :param ti_name: Item name - :type ti_name: string_or_none - :param ti_category: Item category - :type ti_category: string_or_none - """ - pb = payload.Payload(tstamp) - pb.add("e", "ti") - pb.add("ti_id", ti_id) - pb.add("ti_sk", ti_sku) - pb.add("ti_nm", ti_name) - pb.add("ti_ca", ti_category) - pb.add("ti_pr", ti_price) - pb.add("ti_qu", ti_quantity) - return self.track(pb) - - @contract - def track_screen_view(self, name, id_=None, tstamp=None): - """ - :param name: The name of the screen view event - :type name: non_empty_string - :param id_: Screen view ID - :type id_: string_or_none - """ - return self.track_unstruct_event("screen_view", {"name": name, "id": id_}, tstamp, True) - - @contract - def track_struct_event(self, category, action, label=None, property_=None, value=None, - tstamp=None): - """ - :param category: Category of the event - :type category: non_empty_string - :param action: The event itself - :type action: non_empty_string - :param label: Refer to the object the action is - performed on - :type label: string_or_none - :param property_: Property associated with either the action - or the object - :type property_: string_or_none - :param value: A value associated with the user action - :type value: int | float | None - """ - pb = payload.Payload(tstamp) - pb.add("e", "se") - pb.add("se_ca", category) - pb.add("se_ac", action) - pb.add("se_la", label) - pb.add("se_pr", property_) - pb.add("se_va", value) - return self.track(pb) - - @contract - def track_unstruct_event(self, event_name, dict_, tstamp=None, snowplow_schema=False): - """ - :param event_name: The name of the event - :type event_name: non_empty_string - :param dict_: The properties of the event - :type dict_: dict(str:*) - :param snowplow_schema: Whether the event schema is authored by Snowplow - :type snowplow_schema: bool - """ - pb = payload.Payload(tstamp) - - pb.add("e", "ue") - pb.add("ue_na", event_name) - pb.add_unstruct(dict_, self.config["encode_base64"], "ue_px", "ue_pr") - return self.track(pb, snowplow_schema) + def get_namespace(self) -> str: + # As app_id is added to the standard_nv_pairs dict above with a type of Optional[str], the type for + # the whole standard_nv_pairs dict is inferred to be dict[str, Optional[str]]. + # But, we know that "tna" should always be present in the dict, since namespace is a required argument. + # + # This ignores MyPy saying Incompatible return value type (got "str | None", expected "str") + return self.standard_nv_pairs["tna"] # type: ignore diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py new file mode 100644 index 00000000..6a574dc2 --- /dev/null +++ b/snowplow_tracker/tracker_configuration.py @@ -0,0 +1,60 @@ +# """ +# tracker_configuration.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Optional +from snowplow_tracker.typing import JsonEncoderFunction + + +class TrackerConfiguration(object): + def __init__( + self, + encode_base64: bool = True, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + Configuration for additional tracker configuration options. + :param encode_base64: Whether JSONs in the payload should be base-64 encoded. Default is True. + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object. + :type json_encoder: function | None + """ + + self.encode_base64 = encode_base64 + self.json_encoder = json_encoder + + @property + def encode_base64(self) -> bool: + """ + Whether JSONs in the payload should be base-64 encoded. Default is True. + """ + return self._encode_base64 + + @encode_base64.setter + def encode_base64(self, value: bool): + if isinstance(value, bool) or value is None: + self._encode_base64 = value + + @property + def json_encoder(self) -> Optional[JsonEncoderFunction]: + """ + Custom JSON serializer that gets called on non-serializable object. + """ + return self._json_encoder + + @json_encoder.setter + def json_encoder(self, value: Optional[JsonEncoderFunction]): + self._json_encoder = value diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py new file mode 100644 index 00000000..3e973562 --- /dev/null +++ b/snowplow_tracker/typing.py @@ -0,0 +1,74 @@ +# """ +# typing.py + +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. +# """ + +from typing import Dict, List, Callable, Any, Optional, Union, Tuple +from typing_extensions import Protocol, Literal + +PayloadDict = Dict[str, Any] +PayloadDictList = List[PayloadDict] +JsonEncoderFunction = Callable[[Any], Any] + +# tracker +FORM_NODE_NAMES = {"INPUT", "TEXTAREA", "SELECT"} +FORM_TYPES = { + "button", + "checkbox", + "color", + "date", + "datetime", + "datetime-local", + "email", + "file", + "hidden", + "image", + "month", + "number", + "password", + "radio", + "range", + "reset", + "search", + "submit", + "tel", + "text", + "time", + "url", + "week", +} +FormNodeName = Literal["INPUT", "TEXTAREA", "SELECT"] +ElementClasses = Union[List[str], Tuple[str, Any]] +FormClasses = Union[List[str], Tuple[str, Any]] + +# emitters +HttpProtocol = Literal["http", "https"] +Method = Literal["get", "post"] +SuccessCallback = Callable[[PayloadDictList], None] +FailureCallback = Callable[[int, PayloadDictList], None] + +# subject +SUPPORTED_PLATFORMS = {"pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"} +SupportedPlatform = Literal["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] + + +class EmitterProtocol(Protocol): + def input(self, payload: PayloadDict) -> None: ... + + def flush(self) -> None: ... + + def async_flush(self) -> None: ... + + def sync_flush(self) -> None: ...