1313entities = [("<" , "<" ), (">" , ">" ),
1414 (""" , '"' ), ("'" , "'" ),
1515 (" " , " " ),
16- ("&" , "&" ), ]
16+ ("&" , "&" ), ] # ampersand last
17+
18+ def unescape (text ):
19+ # Unescape character codes (if possible)
20+ start = 0
21+ while True :
22+ try :
23+ pos = text .index ("&#" , start )
24+ except ValueError :
25+ break
26+ match = re .match ("&#\d+;" , text [pos :])
27+ if match :
28+ str = match .group ()
29+ num = int (str [2 :- 1 ])
30+ if num < 256 :
31+ text = text [:pos ] + chr (num ) + text [pos + len (str ):]
32+ start = pos + 1
33+ else :
34+ start = pos + len (str )
35+ else :
36+ start = pos + 2
37+ # unescape character entities
38+ for entity , char in entities :
39+ text = text .replace (entity , char ) # replace ampersand last
40+ return text
1741
1842def para (words , pre ):
1943 text = " " .join (words )
@@ -23,8 +47,7 @@ def para(words, pre):
2347 now = words [i - 1 ]+ " " + words [i ]
2448 fix = words [i - 1 ]+ words [i ]
2549 text = text .replace (now , fix )
26- for entity , char in entities :
27- text = text .replace (entity , char )
50+ text = unescape (text )
2851 if not pre :
2952 text = re .sub ("[\r \n \t ]+" , " " , text )
3053 text = textwrap .fill (text )
0 commit comments