Skip to content

Commit 2bb214c

Browse files
author
Richard Jones
committed
missed one
1 parent 786aa21 commit 2bb214c

File tree

1 file changed

+290
-0
lines changed

1 file changed

+290
-0
lines changed

TAL/HTMLTALParser.py

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
##############################################################################
2+
#
3+
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
4+
# All Rights Reserved.
5+
#
6+
# This software is subject to the provisions of the Zope Public License,
7+
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
8+
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
9+
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
10+
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
11+
# FOR A PARTICULAR PURPOSE
12+
#
13+
##############################################################################
14+
"""
15+
Parse HTML and compile to TALInterpreter intermediate code.
16+
"""
17+
18+
import sys
19+
import string
20+
21+
from TALGenerator import TALGenerator
22+
from TALDefs import ZOPE_METAL_NS, ZOPE_TAL_NS, METALError, TALError
23+
from HTMLParser import HTMLParser, HTMLParseError
24+
25+
BOOLEAN_HTML_ATTRS = [
26+
# List of Boolean attributes in HTML that may be given in
27+
# minimized form (e.g. <img ismap> rather than <img ismap="">)
28+
# From http://www.w3.org/TR/xhtml1/#guidelines (C.10)
29+
"compact", "nowrap", "ismap", "declare", "noshade", "checked",
30+
"disabled", "readonly", "multiple", "selected", "noresize",
31+
"defer"
32+
]
33+
34+
EMPTY_HTML_TAGS = [
35+
# List of HTML tags with an empty content model; these are
36+
# rendered in minimized form, e.g. <img />.
37+
# From http://www.w3.org/TR/xhtml1/#dtds
38+
"base", "meta", "link", "hr", "br", "param", "img", "area",
39+
"input", "col", "basefont", "isindex", "frame",
40+
]
41+
42+
PARA_LEVEL_HTML_TAGS = [
43+
# List of HTML elements that close open paragraph-level elements
44+
# and are themselves paragraph-level.
45+
"h1", "h2", "h3", "h4", "h5", "h6", "p",
46+
]
47+
48+
BLOCK_CLOSING_TAG_MAP = {
49+
"tr": ("tr", "td", "th"),
50+
"td": ("td", "th"),
51+
"th": ("td", "th"),
52+
"li": ("li",),
53+
"dd": ("dd", "dt"),
54+
"dt": ("dd", "dt"),
55+
}
56+
57+
BLOCK_LEVEL_HTML_TAGS = [
58+
# List of HTML tags that denote larger sections than paragraphs.
59+
"blockquote", "table", "tr", "th", "td", "thead", "tfoot", "tbody",
60+
"noframe", "ul", "ol", "li", "dl", "dt", "dd", "div",
61+
]
62+
63+
TIGHTEN_IMPLICIT_CLOSE_TAGS = (PARA_LEVEL_HTML_TAGS
64+
+ BLOCK_CLOSING_TAG_MAP.keys())
65+
66+
67+
class NestingError(HTMLParseError):
68+
"""Exception raised when elements aren't properly nested."""
69+
70+
def __init__(self, tagstack, endtag, position=(None, None)):
71+
self.endtag = endtag
72+
if tagstack:
73+
if len(tagstack) == 1:
74+
msg = ('Open tag <%s> does not match close tag </%s>'
75+
% (tagstack[0], endtag))
76+
else:
77+
msg = ('Open tags <%s> do not match close tag </%s>'
78+
% (string.join(tagstack, '>, <'), endtag))
79+
else:
80+
msg = 'No tags are open to match </%s>' % endtag
81+
HTMLParseError.__init__(self, msg, position)
82+
83+
class EmptyTagError(NestingError):
84+
"""Exception raised when empty elements have an end tag."""
85+
86+
def __init__(self, tag, position=(None, None)):
87+
self.tag = tag
88+
msg = 'Close tag </%s> should be removed' % tag
89+
HTMLParseError.__init__(self, msg, position)
90+
91+
class OpenTagError(NestingError):
92+
"""Exception raised when a tag is not allowed in another tag."""
93+
94+
def __init__(self, tagstack, tag, position=(None, None)):
95+
self.tag = tag
96+
msg = 'Tag <%s> is not allowed in <%s>' % (tag, tagstack[-1])
97+
HTMLParseError.__init__(self, msg, position)
98+
99+
class HTMLTALParser(HTMLParser):
100+
101+
# External API
102+
103+
def __init__(self, gen=None):
104+
HTMLParser.__init__(self)
105+
if gen is None:
106+
gen = TALGenerator(xml=0)
107+
self.gen = gen
108+
self.tagstack = []
109+
self.nsstack = []
110+
self.nsdict = {'tal': ZOPE_TAL_NS, 'metal': ZOPE_METAL_NS}
111+
112+
def parseFile(self, file):
113+
f = open(file)
114+
data = f.read()
115+
f.close()
116+
self.parseString(data)
117+
118+
def parseString(self, data):
119+
self.feed(data)
120+
self.close()
121+
while self.tagstack:
122+
self.implied_endtag(self.tagstack[-1], 2)
123+
assert self.nsstack == [], self.nsstack
124+
125+
def getCode(self):
126+
return self.gen.getCode()
127+
128+
def getWarnings(self):
129+
return ()
130+
131+
# Overriding HTMLParser methods
132+
133+
def handle_starttag(self, tag, attrs):
134+
self.close_para_tags(tag)
135+
self.scan_xmlns(attrs)
136+
tag, attrlist, taldict, metaldict = self.process_ns(tag, attrs)
137+
self.tagstack.append(tag)
138+
self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
139+
self.getpos())
140+
if tag in EMPTY_HTML_TAGS:
141+
self.implied_endtag(tag, -1)
142+
143+
def handle_startendtag(self, tag, attrs):
144+
self.close_para_tags(tag)
145+
self.scan_xmlns(attrs)
146+
tag, attrlist, taldict, metaldict = self.process_ns(tag, attrs)
147+
if taldict.get("content"):
148+
self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
149+
self.getpos())
150+
self.gen.emitEndElement(tag, implied=-1)
151+
else:
152+
self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
153+
self.getpos(), isend=1)
154+
self.pop_xmlns()
155+
156+
def handle_endtag(self, tag):
157+
if tag in EMPTY_HTML_TAGS:
158+
# </img> etc. in the source is an error
159+
raise EmptyTagError(tag, self.getpos())
160+
self.close_enclosed_tags(tag)
161+
self.gen.emitEndElement(tag)
162+
self.pop_xmlns()
163+
self.tagstack.pop()
164+
165+
def close_para_tags(self, tag):
166+
if tag in EMPTY_HTML_TAGS:
167+
return
168+
close_to = -1
169+
if BLOCK_CLOSING_TAG_MAP.has_key(tag):
170+
blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag]
171+
for i in range(len(self.tagstack)):
172+
t = self.tagstack[i]
173+
if t in blocks_to_close:
174+
if close_to == -1:
175+
close_to = i
176+
elif t in BLOCK_LEVEL_HTML_TAGS:
177+
close_to = -1
178+
elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS:
179+
i = len(self.tagstack) - 1
180+
while i >= 0:
181+
closetag = self.tagstack[i]
182+
if closetag in BLOCK_LEVEL_HTML_TAGS:
183+
break
184+
if closetag in PARA_LEVEL_HTML_TAGS:
185+
if closetag != "p":
186+
raise OpenTagError(self.tagstack, tag, self.getpos())
187+
close_to = i
188+
i = i - 1
189+
if close_to >= 0:
190+
while len(self.tagstack) > close_to:
191+
self.implied_endtag(self.tagstack[-1], 1)
192+
193+
def close_enclosed_tags(self, tag):
194+
if tag not in self.tagstack:
195+
raise NestingError(self.tagstack, tag, self.getpos())
196+
while tag != self.tagstack[-1]:
197+
self.implied_endtag(self.tagstack[-1], 1)
198+
assert self.tagstack[-1] == tag
199+
200+
def implied_endtag(self, tag, implied):
201+
assert tag == self.tagstack[-1]
202+
assert implied in (-1, 1, 2)
203+
isend = (implied < 0)
204+
if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS:
205+
# Pick out trailing whitespace from the program, and
206+
# insert the close tag before the whitespace.
207+
white = self.gen.unEmitWhitespace()
208+
else:
209+
white = None
210+
self.gen.emitEndElement(tag, isend=isend, implied=implied)
211+
if white:
212+
self.gen.emitRawText(white)
213+
self.tagstack.pop()
214+
self.pop_xmlns()
215+
216+
def handle_charref(self, name):
217+
self.gen.emitRawText("&#%s;" % name)
218+
219+
def handle_entityref(self, name):
220+
self.gen.emitRawText("&%s;" % name)
221+
222+
def handle_data(self, data):
223+
self.gen.emitRawText(data)
224+
225+
def handle_comment(self, data):
226+
self.gen.emitRawText("<!--%s-->" % data)
227+
228+
def handle_decl(self, data):
229+
self.gen.emitRawText("<!%s>" % data)
230+
231+
def handle_pi(self, data):
232+
self.gen.emitRawText("<?%s>" % data)
233+
234+
# Internal thingies
235+
236+
def scan_xmlns(self, attrs):
237+
nsnew = {}
238+
for key, value in attrs:
239+
if key[:6] == "xmlns:":
240+
nsnew[key[6:]] = value
241+
if nsnew:
242+
self.nsstack.append(self.nsdict)
243+
self.nsdict = self.nsdict.copy()
244+
self.nsdict.update(nsnew)
245+
else:
246+
self.nsstack.append(self.nsdict)
247+
248+
def pop_xmlns(self):
249+
self.nsdict = self.nsstack.pop()
250+
251+
def fixname(self, name):
252+
if ':' in name:
253+
prefix, suffix = string.split(name, ':', 1)
254+
if prefix == 'xmlns':
255+
nsuri = self.nsdict.get(suffix)
256+
if nsuri in (ZOPE_TAL_NS, ZOPE_METAL_NS):
257+
return name, name, prefix
258+
else:
259+
nsuri = self.nsdict.get(prefix)
260+
if nsuri == ZOPE_TAL_NS:
261+
return name, suffix, 'tal'
262+
elif nsuri == ZOPE_METAL_NS:
263+
return name, suffix, 'metal'
264+
return name, name, 0
265+
266+
def process_ns(self, name, attrs):
267+
attrlist = []
268+
taldict = {}
269+
metaldict = {}
270+
name, namebase, namens = self.fixname(name)
271+
for item in attrs:
272+
key, value = item
273+
key, keybase, keyns = self.fixname(key)
274+
ns = keyns or namens # default to tag namespace
275+
if ns and ns != 'unknown':
276+
item = (key, value, ns)
277+
if ns == 'tal':
278+
if taldict.has_key(keybase):
279+
raise TALError("duplicate TAL attribute " +
280+
`keybase`, self.getpos())
281+
taldict[keybase] = value
282+
elif ns == 'metal':
283+
if metaldict.has_key(keybase):
284+
raise METALError("duplicate METAL attribute " +
285+
`keybase`, self.getpos())
286+
metaldict[keybase] = value
287+
attrlist.append(item)
288+
if namens in ('metal', 'tal'):
289+
taldict['tal tag'] = namens
290+
return name, attrlist, taldict, metaldict

0 commit comments

Comments
 (0)