Skip to content

Commit e604fd5

Browse files
authored
Amendments to auto-scan service and CronJob manifest (canada-ca#752)
* Amendments to auto-scan service and CronJob manifest to support scheduled scanning * 8 hour interval -> 6 hour interval
1 parent fedb354 commit e604fd5

6 files changed

Lines changed: 48 additions & 50 deletions

File tree

platform/overlays/gke/knative/config/autoscan.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ apiVersion: batch/v1beta1
22
kind: CronJob
33
metadata:
44
name: autoscan
5-
namespace: tracker
5+
namespace: scanners
66
spec:
7-
schedule: "0 2 * * *"
7+
schedule: "*/360 * * * *"
8+
concurrencyPolicy: Forbid
89
jobTemplate:
910
spec:
1011
template:

services/auto-scan/Dockerfile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@ COPY . ./
88
RUN apt-get update
99
RUN apt-get install -y --no-install-recommends apt-utils && \
1010
apt-get install -y --no-install-recommends python3 python3-pip && \
11-
apt-get install -y --no-install-recommends python3-setuptools python3-wheel
11+
apt-get install -y --no-install-recommends python3-setuptools python3-wheel && \
12+
apt-get install -y --no-install-recommends build-essential python3-dev
1213

1314
# Install dependencies.
14-
RUN python3 -m pip install -r requirements.txt
15+
RUN pip3 install -r requirements.txt
16+
17+
RUN chmod +x docker-entrypoint.sh
1518

16-
RUN chmod +x autoscan.py
1719
RUN useradd -r -u 1001 autoscan
1820
USER autoscan
1921

20-
ENTRYPOINT ["python3", "autoscan.py"]
22+
CMD exec ~/./docker-entrypoint.sh

services/auto-scan/autoscan.py

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import datetime
77
import databases
88
import sqlalchemy
9-
import bcrypt
9+
import traceback
1010
from sqlalchemy.sql import select
1111
from sqlalchemy.dialects.postgresql import ARRAY
1212

@@ -19,7 +19,7 @@
1919
DB_HOST = os.getenv("DB_HOST")
2020

2121
DATABASE_URI = f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
22-
DISPATCHER_URL = "http://dispatcher.tracker.svc.cluster.local"
22+
QUEUE_URL = "http://scan-queue.scanners.svc.cluster.local"
2323

2424
metadata = sqlalchemy.MetaData()
2525

@@ -219,47 +219,35 @@
219219
sqlalchemy.Column("ref_links", sqlalchemy.String),
220220
)
221221

222-
Classification = sqlalchemy.Table(
223-
"classification",
224-
metadata,
225-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
226-
sqlalchemy.Column("UNCLASSIFIED", sqlalchemy.String),
227-
)
228-
229222

230-
def Service(database=databases.Database(DATABASE_URI), client=requests):
223+
def Dispatch(database=databases.Database(DATABASE_URI), client=requests):
231224

232-
async def dispatch_web(domain, scan_id):
225+
async def dispatch_https(domain, scan_id):
233226

234227
payload = {
235228
"scan_id": scan_id,
236229
"domain": domain.get("domain"),
237-
"selectors": domain.get("selectors"),
238-
"user_init": False,
239230
}
240-
headers = {
241-
"Content-Type": "application/json",
242-
"Data": str(payload),
243-
"Scan-Type": "web",
231+
client.post(QUEUE_URL+"/https", json=payload)
232+
233+
async def dispatch_ssl(domain, scan_id):
234+
235+
payload = {
236+
"scan_id": scan_id,
237+
"domain": domain.get("domain"),
244238
}
245-
client.post(DISPATCHER_URL + "/receive", headers=headers)
239+
client.post(QUEUE_URL+"/ssl", json=payload)
246240

247-
async def dispatch_mail(domain, scan_id):
241+
async def dispatch_dns(domain, scan_id):
248242

249243
payload = {
250244
"scan_id": scan_id,
251245
"domain": domain.get("domain"),
252246
"selectors": domain.get("selectors"),
253-
"user_init": False,
254-
}
255-
headers = {
256-
"Content-Type": "application/json",
257-
"Data": str(payload),
258-
"Scan-Type": "mail",
259247
}
260-
client.post(DISPATCHER_URL + "/receive", headers=headers)
248+
client.post(QUEUE_URL+"/dns", json=payload)
261249

262-
async def main():
250+
async def scan():
263251
logging.info("Retrieving domains for scheduled scan...")
264252
try:
265253
await database.connect()
@@ -273,8 +261,14 @@ async def main():
273261
system = await database.fetch_one(system_user_query)
274262

275263
scan_time = datetime.datetime.utcnow()
264+
count = 0
276265

277266
for domain in domains:
267+
count = count + 1
268+
dispatched.append(domain.get('domain'))
269+
logging.info(f"Dispatching scan number {count} of {len(domains)}")
270+
logging.info(f"Requesting scan for {domain.get('domain')}")
271+
278272
web_insert = Web_scans.insert().values(
279273
domain_id=domain.get("id"),
280274
scan_date=scan_time,
@@ -297,25 +291,23 @@ async def main():
297291
web_scan_query = select([Web_scans]).order_by(Web_scans.c.id.desc())
298292
web_scan = await database.fetch_one(web_scan_query)
299293

300-
dispatched.append(dispatch_web(domain, web_scan.get("id")))
301-
dispatched.append(dispatch_mail(domain, mail_scan.get("id")))
302-
303-
for domain in dispatched:
304-
await domain
294+
await dispatch_https(domain, web_scan.get("id"))
295+
await dispatch_ssl(domain, web_scan.get("id"))
296+
await dispatch_dns(domain, mail_scan.get("id"))
305297

306298
await database.disconnect()
307-
return {"Dispatched": [domain.get('domain') for domain in domains]}
299+
return [domain for domain in dispatched]
308300

309301
except Exception as e:
310302
try:
311303
await database.disconnect()
312304
except:
313305
pass
314-
logging.error(e)
315-
return {"Dispatched": []}
316-
317-
return asyncio.run(main())
306+
logging.error(f"An unexpected error occurred while initiating scheduled scan: {str(e)}\n\nFull traceback: {traceback.format_exc()}")
307+
return [domain for domain in dispatched]
318308

309+
return asyncio.run(scan())
319310

320311
if __name__ == "__main__":
321-
logging.info(Service())
312+
dispatched_domains = Dispatch()
313+
logging.info(f"Dispatched scans for: {str(dispatched_domains)}")
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/sh
2+
3+
sleep 20
4+
python3 autoscan.py
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
asyncio
22
pytest-asyncio
33
requests>=2.18.4
4-
graphene
54
SQLAlchemy
5+
psycopg2-binary
66
databases[postgresql]
77
pretend
8-
bcrypt

services/auto-scan/tests/test_autoscan.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from sqlalchemy import create_engine
55
from sqlalchemy.orm import sessionmaker
66
from pretend import stub
7-
from autoscan import Service, Domains, Users
7+
from autoscan import Dispatch, Domains, Users
88

99
TEST_DATABASE_URI = "postgresql://track_dmarc:postgres@testdb/track_dmarc"
1010
engine = create_engine(TEST_DATABASE_URI)
@@ -27,8 +27,8 @@ def test_retrieval():
2727
test_session.execute(Domains.insert().values(domain=domain["domain"]))
2828
test_session.execute(Domains.insert().values(domain=domain["domain"]))
2929

30-
client_stub = stub(post=lambda url, headers: None)
30+
client_stub = stub(post=lambda url, json: None)
3131

32-
response = Service(database=databases.Database(TEST_DATABASE_URI), client=client_stub)
32+
dispatched = Dispatch(database=databases.Database(TEST_DATABASE_URI), client=client_stub)
3333

34-
assert all(domain["domain"] in response["Dispatched"] for domain in input_domains)
34+
assert all(domain["domain"] in dispatched for domain in input_domains)

0 commit comments

Comments
 (0)