Skip to content

Commit 3fe4927

Browse files
committed
Implement exact string search
.. in the 'filter' method of hyperdb.Class (and the corresponding backend implementations).
1 parent 40768ed commit 3fe4927

File tree

8 files changed

+278
-132
lines changed

8 files changed

+278
-132
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ Features:
8585
- issue2551043: Add X-Roundup-issue-id email header. Add a new header
8686
to make it easier to filter notification emails without having to
8787
parse the subject line. (John Rouillard)
88+
- The database filter function now can also do an exact string search.
8889

8990
Fixed:
9091

doc/design.txt

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -515,26 +515,45 @@ Here is the interface provided by the hyperdatabase::
515515
db.issue.find(messages={'1':1,'3':1}, files={'7':1})
516516
"""
517517

518-
def filter(self, search_matches, filterspec, sort, group):
518+
def filter(self, search_matches, filterspec, sort, group,
519+
retired, exact_match_spec):
519520
"""Return a list of the ids of the active nodes in this class that
520521
match the 'filter' spec, sorted by the group spec and then the
521-
sort spec.
522+
sort spec. The arguments sort, group, retired, and
523+
exact_match_spec are optional.
522524

523-
"search_matches" is a container type
525+
"search_matches" is a container type which by default is
526+
None and optionally contains IDs of items to match. If
527+
non-empty only IDs of the initial set are returned.
524528

525529
"filterspec" is {propname: value(s)}
530+
"exact_match_spec" is the same format as "filterspec" but
531+
specifies exact match for the given propnames. This only
532+
makes a difference for String properties, these specify case
533+
insensitive substring search when in "filterspec" and exact
534+
match when in exact_match_spec.
526535

527536
"sort" and "group" are [(dir, prop), ...] where dir is '+', '-'
528537
or None and prop is a prop name or None. Note that for
529538
backward-compatibility reasons a single (dir, prop) tuple is
530539
also allowed.
531540

541+
The parameter retired when set to False, returns only live
542+
(un-retired) results. When setting it to True, only retired
543+
items are returned. If None, both retired and unretired
544+
items are returned. The default is False, i.e. only live
545+
items are returned by default.
546+
532547
The filter must match all properties specificed. If the property
533548
value to match is a list:
534549

535550
1. String properties must match all elements in the list, and
536551
2. Other properties must match any of the elements in the list.
537552

553+
This also means that for strings in exact_match_spec it
554+
doesn't make sense to specify multiple values because those
555+
cannot all be matched.
556+
538557
The propname in filterspec and prop in a sort/group spec may be
539558
transitive, i.e., it may contain properties of the form
540559
link.link.link.name, e.g. you can search for all issues where

roundup/backends/back_anydbm.py

Lines changed: 87 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import os, marshal, re, weakref, string, copy, time, shutil, logging
2626

2727
from roundup.anypy.dbm_ import anydbm, whichdb
28-
from roundup.anypy.strings import b2s, bs2b, repr_export, eval_import
28+
from roundup.anypy.strings import b2s, bs2b, repr_export, eval_import, is_us
2929

3030
from roundup import hyperdb, date, password, roundupdb, security, support
3131
from roundup.backends import locking
@@ -1690,12 +1690,16 @@ def getnodeids(self, db=None, retired=None):
16901690
return res
16911691

16921692
def _filter(self, search_matches, filterspec, proptree,
1693-
num_re = re.compile(r'^\d+$'), retired=False):
1693+
num_re = re.compile(r'^\d+$'), retired=False,
1694+
exact_match_spec={}):
16941695
"""Return a list of the ids of the nodes in this class that
16951696
match the 'filter' spec, sorted by the group spec and then the
16961697
sort spec.
16971698
16981699
"filterspec" is {propname: value(s)}
1700+
same for "exact_match_spec". The latter specifies exact matching
1701+
for String type while String types in "filterspec" are searched
1702+
for as case insensitive substring match.
16991703
17001704
"sort" and "group" are (dir, prop) where dir is '+', '-' or None
17011705
and prop is a prop name or None
@@ -1726,82 +1730,86 @@ def _filter(self, search_matches, filterspec, proptree,
17261730
INTERVAL = 'spec:interval'
17271731
OTHER = 'spec:other'
17281732

1729-
for k, v in filterspec.items():
1730-
propclass = props[k]
1731-
if isinstance(propclass, hyperdb.Link):
1732-
if type(v) is not type([]):
1733-
v = [v]
1734-
u = []
1735-
for entry in v:
1733+
for exact, filtertype in enumerate((filterspec, exact_match_spec)):
1734+
for k, v in filtertype.items():
1735+
propclass = props[k]
1736+
if isinstance(propclass, hyperdb.Link):
1737+
if type(v) is not type([]):
1738+
v = [v]
1739+
u = []
1740+
for entry in v:
1741+
# the value -1 is a special "not set" sentinel
1742+
if entry == '-1':
1743+
entry = None
1744+
u.append(entry)
1745+
l.append((LINK, k, u))
1746+
elif isinstance(propclass, hyperdb.Multilink):
17361747
# the value -1 is a special "not set" sentinel
1737-
if entry == '-1':
1738-
entry = None
1739-
u.append(entry)
1740-
l.append((LINK, k, u))
1741-
elif isinstance(propclass, hyperdb.Multilink):
1742-
# the value -1 is a special "not set" sentinel
1743-
if v in ('-1', ['-1']):
1744-
v = []
1745-
elif type(v) is not type([]):
1746-
v = [v]
1747-
l.append((MULTILINK, k, v))
1748-
elif isinstance(propclass, hyperdb.String) and k != 'id':
1749-
if type(v) is not type([]):
1750-
v = [v]
1751-
for v in v:
1752-
# simple glob searching
1753-
v = re.sub(r'([\|\{\}\\\.\+\[\]\(\)])', r'\\\1', v)
1754-
v = v.replace('?', '.')
1755-
v = v.replace('*', '.*?')
1756-
l.append((STRING, k, re.compile(v, re.I)))
1757-
elif isinstance(propclass, hyperdb.Date):
1758-
try:
1759-
date_rng = propclass.range_from_raw(v, self.db)
1760-
l.append((DATE, k, date_rng))
1761-
except ValueError:
1762-
# If range creation fails - ignore that search parameter
1763-
pass
1764-
elif isinstance(propclass, hyperdb.Interval):
1765-
try:
1766-
intv_rng = date.Range(v, date.Interval)
1767-
l.append((INTERVAL, k, intv_rng))
1768-
except ValueError:
1769-
# If range creation fails - ignore that search parameter
1770-
pass
1771-
1772-
elif isinstance(propclass, hyperdb.Boolean):
1773-
if type(v) == type(""):
1774-
v = v.split(',')
1775-
if type(v) != type([]):
1776-
v = [v]
1777-
bv = []
1778-
for val in v:
1779-
if type(val) is type(''):
1780-
bv.append(propclass.from_raw (val))
1781-
else:
1782-
bv.append(val)
1783-
l.append((OTHER, k, bv))
1784-
1785-
elif k == 'id':
1786-
if type(v) != type([]):
1787-
v = v.split(',')
1788-
l.append((OTHER, k, [str(int(val)) for val in v]))
1789-
1790-
elif isinstance(propclass, hyperdb.Number):
1791-
if type(v) != type([]):
1792-
try :
1748+
if v in ('-1', ['-1']):
1749+
v = []
1750+
elif type(v) is not type([]):
1751+
v = [v]
1752+
l.append((MULTILINK, k, v))
1753+
elif isinstance(propclass, hyperdb.String) and k != 'id':
1754+
if type(v) is not type([]):
1755+
v = [v]
1756+
for x in v:
1757+
if exact:
1758+
l.append((STRING, k, x))
1759+
else:
1760+
# simple glob searching
1761+
x = re.sub(r'([\|\{\}\\\.\+\[\]\(\)])', r'\\\1', x)
1762+
x = x.replace('?', '.')
1763+
x = x.replace('*', '.*?')
1764+
l.append((STRING, k, re.compile(x, re.I)))
1765+
elif isinstance(propclass, hyperdb.Date):
1766+
try:
1767+
date_rng = propclass.range_from_raw(v, self.db)
1768+
l.append((DATE, k, date_rng))
1769+
except ValueError:
1770+
# If range creation fails - ignore that search parameter
1771+
pass
1772+
elif isinstance(propclass, hyperdb.Interval):
1773+
try:
1774+
intv_rng = date.Range(v, date.Interval)
1775+
l.append((INTERVAL, k, intv_rng))
1776+
except ValueError:
1777+
# If range creation fails - ignore that search parameter
1778+
pass
1779+
1780+
elif isinstance(propclass, hyperdb.Boolean):
1781+
if type(v) == type(""):
17931782
v = v.split(',')
1794-
except AttributeError :
1783+
if type(v) != type([]):
17951784
v = [v]
1796-
l.append((OTHER, k, [float(val) for val in v]))
1785+
bv = []
1786+
for val in v:
1787+
if type(val) is type(''):
1788+
bv.append(propclass.from_raw (val))
1789+
else:
1790+
bv.append(val)
1791+
l.append((OTHER, k, bv))
17971792

1798-
elif isinstance(propclass, hyperdb.Integer):
1799-
if type(v) != type([]):
1800-
try :
1793+
elif k == 'id':
1794+
if type(v) != type([]):
18011795
v = v.split(',')
1802-
except AttributeError :
1803-
v = [v]
1804-
l.append((OTHER, k, [int(val) for val in v]))
1796+
l.append((OTHER, k, [str(int(val)) for val in v]))
1797+
1798+
elif isinstance(propclass, hyperdb.Number):
1799+
if type(v) != type([]):
1800+
try :
1801+
v = v.split(',')
1802+
except AttributeError :
1803+
v = [v]
1804+
l.append((OTHER, k, [float(val) for val in v]))
1805+
1806+
elif isinstance(propclass, hyperdb.Integer):
1807+
if type(v) != type([]):
1808+
try :
1809+
v = v.split(',')
1810+
except AttributeError :
1811+
v = [v]
1812+
l.append((OTHER, k, [int(val) for val in v]))
18051813

18061814
filterspec = l
18071815

@@ -1848,8 +1856,12 @@ def _filter(self, search_matches, filterspec, proptree,
18481856
elif t == STRING:
18491857
if nv is None:
18501858
nv = ''
1851-
# RE search
1852-
match = v.search(nv)
1859+
if is_us(v):
1860+
# Exact match
1861+
match = (nv == v)
1862+
else:
1863+
# RE search
1864+
match = v.search(nv)
18531865
elif t == DATE or t == INTERVAL:
18541866
if nv is None:
18551867
match = v is None

roundup/backends/back_mysql.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,7 @@ def sql_close(self):
592592
raise
593593

594594
class MysqlClass:
595+
case_sensitive_equal = 'COLLATE utf8_bin ='
595596

596597
def supports_subselects(self):
597598
# TODO: AFAIK its version dependent for MySQL

roundup/backends/back_sqlite.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,12 +455,13 @@ def load_journal(self, classname, cols, nodeid):
455455

456456
class sqliteClass:
457457
def filter(self, search_matches, filterspec, sort=(None,None),
458-
group=(None,None), retired=False):
458+
group=(None,None), retired=False, exact_match_spec={}):
459459
""" If there's NO matches to a fetch, sqlite returns NULL
460460
instead of nothing
461461
"""
462462
return [f for f in rdbms_common.Class.filter(self, search_matches,
463-
filterspec, sort=sort, group=group, retired=retired) if f]
463+
filterspec, sort=sort, group=group, retired=retired,
464+
exact_match_spec=exact_match_spec) if f]
464465

465466
class Class(sqliteClass, rdbms_common.Class):
466467
pass

0 commit comments

Comments
 (0)