Skip to content

Commit cd53fa1

Browse files
author
Richard Jones
committed
That's the last of the RDBMS migration steps done! Yay!
Note that the code currently has some unit testing issues: - Metakit needs some attention in a couple of areas - RDBMS backends are having trouble ordering their journal entries correctly. I'm going to be migrating them to use TIMESTAMP for the date column, but that's not necessarily going to fix things as mysql and postgresql both appear to have second granularity. Sqlite will ignore the data type as usual ;) Next up is the datatype-ification of the RDBMS backends. Part of that will involve the migration to numeric IDs, which will also be done in the *dbm backends (already done in metakit). The ID exposed *above* the hyperdb will be String, since so many things assume a string ID now.
1 parent 6c74f37 commit cd53fa1

File tree

8 files changed

+159
-19
lines changed

8 files changed

+159
-19
lines changed

TODO.txt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
This file has been re-purposed to contain specifically the items that need
22
doing before the next release:
33

4-
- indexing in RDBMSes
5-
- add tests for group-by-multilink so I finally implement it for the RDBMSes
6-
- s/getnode/getitem in backends (and s/Node/Item)
74
- have rdbms backends look up the journal for actor if it's not set
8-
- ensure index creation is triggered by the version 1->2 update
9-
5+
- migrate rdbms backends to use typed columns
106
- migrate to numeric ID values (fixes bug 817217)
7+
8+
- ensure index creation is triggered by the version 1->2 update
9+
(and other upgrade tests)

roundup/admin.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1717
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1818
#
19-
# $Id: admin.py,v 1.62 2004-02-11 23:55:08 richard Exp $
19+
# $Id: admin.py,v 1.63 2004-03-21 23:39:08 richard Exp $
2020

2121
'''Administration commands for maintaining Roundup trackers.
2222
'''
@@ -748,7 +748,7 @@ def do_create(self, args, pwre = re.compile(r'{(\w+)}(.+)')):
748748
# convert types
749749
for propname, value in props.items():
750750
try:
751-
props[key] = hyperdb.rawToHyperdb(self.db, cl, None,
751+
props[propname] = hyperdb.rawToHyperdb(self.db, cl, None,
752752
propname, value)
753753
except hyperdb.HyperdbValueError, message:
754754
raise UsageError, message

roundup/backends/back_metakit.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# $Id: back_metakit.py,v 1.65 2004-03-19 05:27:55 richard Exp $
1+
# $Id: back_metakit.py,v 1.66 2004-03-21 23:39:08 richard Exp $
22
'''Metakit backend for Roundup, originally by Gordon McMillan.
33
44
Known Current Bugs:
@@ -1873,12 +1873,12 @@ def add_text(self, identifier, text, mime_type='text/plain'):
18731873
ids[oldpos].ignore = 1
18741874
self.changed = 1
18751875
pos = ids.append(tblid=tblid,nodeid=nodeid,propid=propid)
1876-
1876+
18771877
wordlist = re.findall(r'\b\w{2,25}\b', text.upper())
18781878
words = {}
18791879
for word in wordlist:
1880-
if not self.disallows.has_key(word):
1881-
words[word] = 1
1880+
if not self.disallows.has_key(word):
1881+
words[word] = 1
18821882
words = words.keys()
18831883

18841884
index = self.db.view('index').ordered(1)

roundup/backends/back_mysql.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,29 @@ def open_connection(self):
124124
self.create_version_2_tables()
125125

126126
def create_version_2_tables(self):
127+
# OTK store
127128
self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), '
128-
'otk_value VARCHAR(255), otk_time FLOAT(20))')
129+
'otk_value VARCHAR(255), otk_time FLOAT(20)) '
130+
'TYPE=%s'%self.mysql_backend)
129131
self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)')
132+
133+
# Sessions store
130134
self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), '
131-
'session_time FLOAT(20), session_value VARCHAR(255))')
135+
'session_time FLOAT(20), session_value VARCHAR(255)) '
136+
'TYPE=%s'%self.mysql_backend)
132137
self.cursor.execute('CREATE INDEX sessions_key_idx ON '
133138
'sessions(session_key)')
134139

140+
# full-text indexing store
141+
self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), '
142+
'_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT) '
143+
'TYPE=%s'%self.mysql_backend)
144+
self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), '
145+
'_textid INT) TYPE=%s'%self.mysql_backend)
146+
self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
147+
sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
148+
self.cursor.execute(sql, ('_textids', 1))
149+
135150
def add_actor_column(self):
136151
# update existing tables to have the new actor column
137152
tables = self.database_schema['tables']

roundup/backends/back_postgresql.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,26 @@ def open_connection(self):
113113
self.create_version_2_tables()
114114

115115
def create_version_2_tables(self):
116+
# OTK store
116117
self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), '
117118
'otk_value VARCHAR(255), otk_time FLOAT(20))')
118119
self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)')
120+
121+
# Sessions store
119122
self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), '
120123
'session_time FLOAT(20), session_value VARCHAR(255))')
121124
self.cursor.execute('CREATE INDEX sessions_key_idx ON '
122125
'sessions(session_key)')
123126

127+
# full-text indexing store
128+
self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), '
129+
'_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT4) ')
130+
self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), '
131+
'_textid INT4)')
132+
self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
133+
sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
134+
self.cursor.execute(sql, ('_textids', 1))
135+
124136
def add_actor_column(self):
125137
# update existing tables to have the new actor column
126138
tables = self.database_schema['tables']

roundup/backends/back_sqlite.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# $Id: back_sqlite.py,v 1.17 2004-03-18 01:58:45 richard Exp $
1+
# $Id: back_sqlite.py,v 1.18 2004-03-21 23:39:08 richard Exp $
22
'''Implements a backend for SQLite.
33
44
See https://pysqlite.sourceforge.net/ for pysqlite info
@@ -65,6 +65,15 @@ def create_version_2_tables(self):
6565
self.cursor.execute('create index sessions_key_idx on '
6666
'sessions(session_key)')
6767

68+
# full-text indexing store
69+
self.cursor.execute('CREATE TABLE _textids (_class varchar, '
70+
'_itemid varchar, _prop varchar, _textid integer) ')
71+
self.cursor.execute('CREATE TABLE _words (_word varchar, '
72+
'_textid integer)')
73+
self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
74+
sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
75+
self.cursor.execute(sql, ('_textids', 1))
76+
6877
def add_actor_column(self):
6978
# update existing tables to have the new actor column
7079
tables = self.database_schema['tables']

roundup/backends/indexer_rdbms.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
''' This implements the full-text indexer over two RDBMS tables. The first
2+
is a mapping of words to occurance IDs. The second maps the IDs to (Class,
3+
propname, itemid) instances.
4+
'''
5+
import re
6+
7+
from indexer_dbm import Indexer
8+
9+
class Indexer(Indexer):
10+
disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1}
11+
def __init__(self, db):
12+
self.db = db
13+
self.reindex = 0
14+
15+
def close(self):
16+
'''close the indexing database'''
17+
# just nuke the circular reference
18+
self.db = None
19+
20+
def force_reindex(self):
21+
'''Force a reindexing of the database. This essentially
22+
empties the tables ids and index and sets a flag so
23+
that the databases are reindexed'''
24+
self.reindex = 1
25+
26+
def should_reindex(self):
27+
'''returns True if the indexes need to be rebuilt'''
28+
return self.reindex
29+
30+
def add_text(self, identifier, text, mime_type='text/plain'):
31+
''' "identifier" is (classname, itemid, property) '''
32+
if mime_type != 'text/plain':
33+
return
34+
35+
# first, find the id of the (classname, itemid, property)
36+
a = self.db.arg
37+
sql = 'select _textid from _textids where _class=%s and '\
38+
'_itemid=%s and _prop=%s'%(a, a, a)
39+
self.db.cursor.execute(sql, identifier)
40+
r = self.db.cursor.fetchone()
41+
if not r:
42+
id = self.db.newid('_textids')
43+
sql = 'insert into _textids (_textid, _class, _itemid, _prop)'\
44+
' values (%s, %s, %s, %s)'%(a, a, a, a)
45+
self.db.cursor.execute(sql, (id, ) + identifier)
46+
else:
47+
id = int(r[0])
48+
# clear out any existing indexed values
49+
sql = 'delete from _words where _textid=%s'%a
50+
self.db.cursor.execute(sql, (id, ))
51+
52+
# ok, find all the words in the text
53+
wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper())
54+
words = {}
55+
for word in wordlist:
56+
if not self.disallows.has_key(word):
57+
words[word] = 1
58+
words = words.keys()
59+
60+
# for each word, add an entry in the db
61+
for word in words:
62+
# don't dupe
63+
sql = 'select * from _words where _word=%s and _textid=%s'%(a, a)
64+
self.db.cursor.execute(sql, (word, id))
65+
if self.db.cursor.fetchall():
66+
continue
67+
sql = 'insert into _words (_word, _textid) values (%s, %s)'%(a, a)
68+
self.db.cursor.execute(sql, (word, id))
69+
70+
def find(self, wordlist):
71+
'''look up all the words in the wordlist.
72+
If none are found return an empty dictionary
73+
* more rules here
74+
'''
75+
l = [word.upper() for word in wordlist if 26 > len(word) > 2]
76+
77+
a = ','.join([self.db.arg] * len(l))
78+
sql = 'select distinct(_textid) from _words where _word in (%s)'%a
79+
self.db.cursor.execute(sql, tuple(l))
80+
r = self.db.cursor.fetchall()
81+
if not r:
82+
return {}
83+
a = ','.join([self.db.arg] * len(r))
84+
sql = 'select _class, _itemid, _prop from _textids '\
85+
'where _textid in (%s)'%a
86+
self.db.cursor.execute(sql, tuple([int(id) for (id,) in r]))
87+
# self.search_index has the results as {some id: identifier} ...
88+
# sigh
89+
r = {}
90+
k = 0
91+
for c,n,p in self.db.cursor.fetchall():
92+
key = (str(c), str(n), str(p))
93+
r[k] = key
94+
k += 1
95+
return r
96+
97+
def save_index(self):
98+
# the normal RDBMS backend transaction mechanisms will handle this
99+
pass
100+
101+
def rollback(self):
102+
# the normal RDBMS backend transaction mechanisms will handle this
103+
pass
104+

roundup/backends/rdbms_common.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# $Id: rdbms_common.py,v 1.82 2004-03-19 04:47:59 richard Exp $
1+
# $Id: rdbms_common.py,v 1.83 2004-03-21 23:39:08 richard Exp $
22
''' Relational database (SQL) backend common code.
33
44
Basics:
@@ -39,7 +39,7 @@
3939

4040
# support
4141
from blobfiles import FileStorage
42-
from indexer_dbm import Indexer
42+
from indexer_rdbms import Indexer
4343
from sessions_rdbms import Sessions, OneTimeKeys
4444
from roundup.date import Range
4545

@@ -59,7 +59,7 @@ def __init__(self, config, journaltag=None):
5959
self.config, self.journaltag = config, journaltag
6060
self.dir = config.DATABASE
6161
self.classes = {}
62-
self.indexer = Indexer(self.dir)
62+
self.indexer = Indexer(self)
6363
self.security = security.Security(self)
6464

6565
# additional transaction support for external files and the like
@@ -177,7 +177,7 @@ def post_init(self):
177177
self.reindex()
178178

179179
# commit
180-
self.conn.commit()
180+
self.sql_commit()
181181

182182
# update this number when we need to make changes to the SQL structure
183183
# of the backen database
@@ -591,7 +591,7 @@ def newid(self, classname):
591591
if __debug__:
592592
print >>hyperdb.DEBUG, 'newid', (self, sql, classname)
593593
self.cursor.execute(sql, (classname, ))
594-
newid = self.cursor.fetchone()[0]
594+
newid = int(self.cursor.fetchone()[0])
595595

596596
# update the counter
597597
sql = 'update ids set num=%s where name=%s'%(self.arg, self.arg)
@@ -1066,6 +1066,7 @@ def sql_close(self):
10661066
def close(self):
10671067
''' Close off the connection.
10681068
'''
1069+
self.indexer.close()
10691070
self.sql_close()
10701071

10711072
#

0 commit comments

Comments
 (0)