Skip to content

Commit dd37257

Browse files
committed
Only print the first 100 lines of a long diff. New soup2html code for spacing associated with certain tags.
- Legacy-Id: 337
1 parent 9b4b6c5 commit dd37257

2 files changed

Lines changed: 11 additions & 5 deletions

File tree

ietf/tests.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ def doUrlsTest(self, lst):
178178
print "OK cmp %s" % (url)
179179
else:
180180
contextlines = 0
181-
diff = "\n".join(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm=""))
181+
difflist = list(unified_diff(goodtext, testtext, master, url, "", "", contextlines, lineterm=""))
182+
diff = "\n".join(difflist)
182183
for chunk in self.diffchunks:
183184
#print "*** Checking for chunk:", chunk[:24]
184185
while re.search(chunk, diff):
@@ -201,7 +202,9 @@ def doUrlsTest(self, lst):
201202
print "OK cmp %s" % (url)
202203
else:
203204
print "Diff: %s" % (url)
204-
print diff
205+
print "\n".join(difflist[:100])
206+
if len(difflist) > 100:
207+
print "... (skipping %s lines of diff)" % (len(difflist)-100)
205208
else:
206209
print "OK cmp %s" % (url)
207210

ietf/utils/soup2text.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
except:
88
from BeautifulSoup import Tag, BeautifulSoup, NavigableString
99

10-
block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", ]
10+
block_tags = ["[document]", "html", "body", "div", "blockquote", "table", "tr", "p", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "li"]
11+
space_tags = ["th", "td", "br"]
1112
ignore_tags = ["head", "script", "style"]
1213
pre_tags = ["pre"]
1314
entities = [("&lt;", "<"), ("&gt;", ">"),
@@ -86,7 +87,10 @@ def render(node, encoding='latin-1', pre=False):
8687
blocks.append(child.text+"\n\n")
8788
node.is_block = True
8889
else:
89-
words.append(child.text)
90+
if child.text:
91+
if child.name in space_tags and not words[-1][-1] in [" ", "\t", "\n"]:
92+
words.append(" ")
93+
words.append(child.text)
9094
else:
9195
raise ValueError("Unexpected node type: '%s'" % child)
9296
if words:
@@ -111,7 +115,6 @@ def soup2text(html):
111115
# some preprocessing to handle common pathological cases
112116
html = re.sub("<br */?>[ \t\n]*(<br */?>)+", "<p/>", html)
113117
html = re.sub("<br */?>([^\n])", r"<br />\n\1", html)
114-
html = re.sub("([^ \t\n])(</t[hd].*?>)", r"\1 \2", html)
115118
soup = TextSoup(html)
116119
return str(soup)
117120

0 commit comments

Comments
 (0)