Skip to content

Commit e9a8a30

Browse files
authored
Arango service update (canada-ca#1203)
* Conversion of existing services from postgres to arango * Correct port * Build fix * Connection string amendment * Don't create 'Service' if any .envs are not set * Miscellaneous fixes * Update connection string for autoscan and setup for results * Miscellaneous fixes * Update dkim selector parsing logic and str() the dates * Update dkim selector tag generation and result handling * Update dkim selector tag generation * Update dkim selector tag indexing
1 parent c9cd38b commit e9a8a30

29 files changed

Lines changed: 281 additions & 1655 deletions

services/auto-scan/autoscan.py

Lines changed: 51 additions & 297 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
import os
22
import sys
3-
import asyncio
43
import logging
54
import requests
65
import datetime
7-
import databases
8-
import sqlalchemy
96
import traceback
10-
from sqlalchemy.sql import select
11-
from sqlalchemy.dialects.postgresql import ARRAY
7+
from arango import ArangoClient
8+
from uuid import uuid4 as unique_id
129

1310
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
1411

@@ -18,313 +15,70 @@
1815
DB_NAME = os.getenv("DB_NAME")
1916
DB_HOST = os.getenv("DB_HOST")
2017

21-
DATABASE_URI = f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
2218
QUEUE_URL = "http://scan-queue.scanners.svc.cluster.local"
2319

24-
metadata = sqlalchemy.MetaData()
2520

26-
Domains = sqlalchemy.Table(
27-
"domains",
28-
metadata,
29-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
30-
sqlalchemy.Column("domain", sqlalchemy.String),
31-
sqlalchemy.Column("last_run", sqlalchemy.DateTime),
32-
sqlalchemy.Column("selectors", ARRAY(sqlalchemy.String)),
33-
sqlalchemy.Column(
34-
"organization_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("organizations.id")
35-
),
36-
)
21+
def dispatch_https(domain, client):
22+
payload = {
23+
"domain_key": domain["_key"],
24+
"domain": domain["domain"],
25+
"uuid": None,
26+
}
27+
client.post(QUEUE_URL + "/https", json=payload)
3728

38-
Dmarc_Reports = sqlalchemy.Table(
39-
"dmarc_reports",
40-
metadata,
41-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True),
42-
sqlalchemy.Column(
43-
"domain_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("domains.id")
44-
),
45-
sqlalchemy.Column("start_date", sqlalchemy.DateTime),
46-
sqlalchemy.Column("end_date", sqlalchemy.DateTime),
47-
sqlalchemy.Column("report", sqlalchemy.JSON),
48-
sqlalchemy.Column(
49-
"organization_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("organizations.id")
50-
),
51-
)
5229

53-
Organizations = sqlalchemy.Table(
54-
"organizations",
55-
metadata,
56-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
57-
sqlalchemy.Column("name", sqlalchemy.String),
58-
sqlalchemy.Column("slug", sqlalchemy.String, index=True),
59-
sqlalchemy.Column("acronym", sqlalchemy.String),
60-
sqlalchemy.Column("org_tags", sqlalchemy.JSON),
61-
)
30+
def dispatch_ssl(domain, client):
31+
payload = {
32+
"domain_key": domain["_key"],
33+
"domain": domain["domain"],
34+
"uuid": None,
35+
}
36+
client.post(QUEUE_URL + "/ssl", json=payload)
6237

63-
Users = sqlalchemy.Table(
64-
"users",
65-
metadata,
66-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True),
67-
sqlalchemy.Column("user_name", sqlalchemy.String),
68-
sqlalchemy.Column("display_name", sqlalchemy.String),
69-
sqlalchemy.Column("user_password", sqlalchemy.String),
70-
sqlalchemy.Column("preferred_lang", sqlalchemy.String),
71-
sqlalchemy.Column("failed_login_attempts", sqlalchemy.Integer, default=0),
72-
sqlalchemy.Column(
73-
"failed_login_attempt_time", sqlalchemy.Float, default=0, nullable=True
74-
),
75-
sqlalchemy.Column("tfa_validated", sqlalchemy.Boolean, default=False),
76-
)
7738

78-
User_affiliations = sqlalchemy.Table(
79-
"user_affiliations",
80-
metadata,
81-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True),
82-
sqlalchemy.Column(
83-
"user_id",
84-
sqlalchemy.Integer,
85-
sqlalchemy.ForeignKey(
86-
"users.id",
87-
onupdate="CASCADE",
88-
ondelete="CASCADE",
89-
name="user_affiliations_users_id_fkey",
90-
),
91-
primary_key=True,
92-
),
93-
sqlalchemy.Column(
94-
"organization_id",
95-
sqlalchemy.Integer,
96-
sqlalchemy.ForeignKey(
97-
"organizations.id",
98-
onupdate="CASCADE",
99-
ondelete="CASCADE",
100-
name="user_affiliations_organization_id_fkey",
101-
),
102-
),
103-
sqlalchemy.Column("permission", sqlalchemy.String),
104-
)
39+
def dispatch_dns(domain, client):
40+
payload = {
41+
"domain_key": domain["_key"],
42+
"domain": domain["domain"],
43+
"selectors": domain.get("selectors", None),
44+
"uuid": None,
45+
}
46+
client.post(QUEUE_URL + "/dns", json=payload)
10547

106-
Web_scans = sqlalchemy.Table(
107-
"web_scans",
108-
metadata,
109-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
110-
sqlalchemy.Column(
111-
"domain_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("domains.id")
112-
),
113-
sqlalchemy.Column("scan_date", sqlalchemy.DateTime),
114-
sqlalchemy.Column(
115-
"initiated_by", sqlalchemy.Integer, sqlalchemy.ForeignKey("users.id")
116-
),
117-
)
11848

119-
Mail_scans = sqlalchemy.Table(
120-
"mail_scans",
121-
metadata,
122-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
123-
sqlalchemy.Column(
124-
"domain_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("domains.id")
125-
),
126-
sqlalchemy.Column("scan_date", sqlalchemy.DateTime),
127-
sqlalchemy.Column("selectors", ARRAY(sqlalchemy.String)),
128-
sqlalchemy.Column("dmarc_phase", sqlalchemy.Integer),
129-
sqlalchemy.Column(
130-
"initiated_by", sqlalchemy.Integer, sqlalchemy.ForeignKey("users.id")
131-
),
132-
)
49+
def scan(db_host, db_name, user_name, password, http_client=requests):
50+
logging.info("Retrieving domains for scheduled scan...")
51+
try:
52+
# Establish DB connection
53+
arango_client = ArangoClient(hosts=db_host)
54+
db = arango_client.db(db_name, username=user_name, password=password)
13355

134-
Dmarc_scans = sqlalchemy.Table(
135-
"dmarc_scans",
136-
metadata,
137-
sqlalchemy.Column(
138-
"id",
139-
sqlalchemy.Integer,
140-
sqlalchemy.ForeignKey("mail_scans.id"),
141-
primary_key=True,
142-
),
143-
sqlalchemy.Column("dmarc_scan", sqlalchemy.JSON),
144-
)
56+
logging.info("Querying domains...")
14557

146-
Dkim_scans = sqlalchemy.Table(
147-
"dkim_scans",
148-
metadata,
149-
sqlalchemy.Column(
150-
"id",
151-
sqlalchemy.Integer,
152-
sqlalchemy.ForeignKey("mail_scans.id"),
153-
primary_key=True,
154-
),
155-
sqlalchemy.Column("dkim_scan", sqlalchemy.JSON),
156-
)
58+
domains = db.collection("domains").all()
15759

158-
Mx_scans = sqlalchemy.Table(
159-
"mx_scans",
160-
metadata,
161-
sqlalchemy.Column(
162-
"id",
163-
sqlalchemy.Integer,
164-
sqlalchemy.ForeignKey("mail_scans.id"),
165-
primary_key=True,
166-
),
167-
sqlalchemy.Column("mx_scan", sqlalchemy.JSON),
168-
)
60+
scan_time = str(datetime.datetime.utcnow())
61+
count = 0
16962

170-
Spf_scans = sqlalchemy.Table(
171-
"spf_scans",
172-
metadata,
173-
sqlalchemy.Column(
174-
"id",
175-
sqlalchemy.Integer,
176-
sqlalchemy.ForeignKey("mail_scans.id"),
177-
primary_key=True,
178-
),
179-
sqlalchemy.Column("spf_scan", sqlalchemy.JSON),
180-
)
63+
for domain in domains:
64+
count = count + 1
65+
logging.info(f"Dispatching scan number {count} of {len(domains)}")
66+
logging.info(f"Requesting scan for {domain['domain']}")
18167

182-
Https_scans = sqlalchemy.Table(
183-
"https_scans",
184-
metadata,
185-
sqlalchemy.Column(
186-
"id",
187-
sqlalchemy.Integer,
188-
sqlalchemy.ForeignKey("web_scans.id"),
189-
primary_key=True,
190-
),
191-
sqlalchemy.Column("https_scan", sqlalchemy.JSON),
192-
)
68+
db.collection("domains").update_match({"_key": domain["_key"]}, {"lastRan": scan_time})
19369

194-
Ssl_scans = sqlalchemy.Table(
195-
"ssl_scans",
196-
metadata,
197-
sqlalchemy.Column(
198-
"id",
199-
sqlalchemy.Integer,
200-
sqlalchemy.ForeignKey("web_scans.id"),
201-
primary_key=True,
202-
),
203-
sqlalchemy.Column("ssl_scan", sqlalchemy.JSON),
204-
)
205-
206-
Ciphers = sqlalchemy.Table(
207-
"ciphers",
208-
metadata,
209-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
210-
sqlalchemy.Column("cipher_type", sqlalchemy.String),
211-
)
212-
213-
Guidance = sqlalchemy.Table(
214-
"guidance",
215-
metadata,
216-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
217-
sqlalchemy.Column("tag_id", sqlalchemy.String),
218-
sqlalchemy.Column("tag_name", sqlalchemy.String),
219-
sqlalchemy.Column("guidance", sqlalchemy.String),
220-
sqlalchemy.Column("ref_links", ARRAY(sqlalchemy.String)),
221-
)
222-
223-
Summaries = sqlalchemy.Table(
224-
"summaries",
225-
metadata,
226-
sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True),
227-
sqlalchemy.Column("name", sqlalchemy.String),
228-
sqlalchemy.Column("count", sqlalchemy.Integer),
229-
sqlalchemy.Column("percentage", sqlalchemy.Float),
230-
sqlalchemy.Column("type", sqlalchemy.String),
231-
)
232-
233-
234-
def Dispatch(database=databases.Database(DATABASE_URI), client=requests):
235-
async def dispatch_https(domain, scan_id):
236-
237-
payload = {
238-
"scan_id": scan_id,
239-
"domain": domain.get("domain"),
240-
}
241-
client.post(QUEUE_URL + "/https", json=payload)
242-
243-
async def dispatch_ssl(domain, scan_id):
244-
245-
payload = {
246-
"scan_id": scan_id,
247-
"domain": domain.get("domain"),
248-
}
249-
client.post(QUEUE_URL + "/ssl", json=payload)
250-
251-
async def dispatch_dns(domain, scan_id):
252-
253-
payload = {
254-
"scan_id": scan_id,
255-
"domain": domain.get("domain"),
256-
"selectors": domain.get("selectors"),
257-
}
258-
client.post(QUEUE_URL + "/dns", json=payload)
259-
260-
async def scan():
261-
logging.info("Retrieving domains for scheduled scan...")
262-
try:
263-
await database.connect()
264-
logging.info("Querying domains...")
265-
266-
query = select([Domains])
267-
domains = await database.fetch_all(query)
268-
dispatched = []
269-
270-
system_user_query = select([Users]).where(Users.c.user_name == "system")
271-
system = await database.fetch_one(system_user_query)
272-
273-
scan_time = datetime.datetime.utcnow()
274-
count = 0
275-
276-
for domain in domains:
277-
count = count + 1
278-
dispatched.append(domain.get("domain"))
279-
logging.info(f"Dispatching scan number {count} of {len(domains)}")
280-
logging.info(f"Requesting scan for {domain.get('domain')}")
281-
282-
web_insert = Web_scans.insert().values(
283-
domain_id=domain.get("id"),
284-
scan_date=scan_time,
285-
initiated_by=system.get("id"),
286-
)
287-
mail_insert = Mail_scans.insert().values(
288-
domain_id=domain.get("id"),
289-
scan_date=scan_time,
290-
selectors=domain.get("selectors"),
291-
initiated_by=system.get("id"),
292-
)
293-
294-
update_domain = (
295-
Domains.update()
296-
.values(last_run=scan_time)
297-
.where(Domains.c.id == domain.get("id"))
298-
)
299-
300-
for insertion in [web_insert, mail_insert, update_domain]:
301-
await database.execute(insertion)
302-
303-
mail_scan_query = select([Mail_scans]).order_by(Mail_scans.c.id.desc())
304-
mail_scan = await database.fetch_one(mail_scan_query)
305-
web_scan_query = select([Web_scans]).order_by(Web_scans.c.id.desc())
306-
web_scan = await database.fetch_one(web_scan_query)
307-
308-
await dispatch_https(domain, web_scan.get("id"))
309-
await dispatch_ssl(domain, web_scan.get("id"))
310-
await dispatch_dns(domain, mail_scan.get("id"))
311-
312-
await database.disconnect()
313-
return [domain for domain in dispatched]
314-
315-
except Exception as e:
316-
try:
317-
await database.disconnect()
318-
except:
319-
pass
320-
logging.error(
321-
f"An unexpected error occurred while initiating scheduled scan: {str(e)}\n\nFull traceback: {traceback.format_exc()}"
322-
)
323-
return [domain for domain in dispatched]
324-
325-
return asyncio.run(scan())
70+
dispatch_https(domain, http_client)
71+
dispatch_ssl(domain, http_client)
72+
dispatch_dns(domain, http_client)
32673

74+
except Exception as e:
75+
logging.error(
76+
f"An unexpected error occurred while initiating scheduled scan: {str(e)}\n\nFull traceback: {traceback.format_exc()}"
77+
)
78+
return count-1
79+
logging.info("Domains have been dispatched for scanning.")
80+
return count
32781

32882
if __name__ == "__main__":
329-
dispatched_domains = Dispatch()
330-
logging.info(f"Dispatched scans for: {str(dispatched_domains)}")
83+
dispatched_count = scan(DB_HOST, DB_NAME, DB_USER, DB_PASS)
84+
logging.info(f"Dispatched scans for {dispatched_count} domains.")

0 commit comments

Comments
 (0)