1
1
2
2
from __future__ import print_function
3
- from roundup .anypy .strings import u2s , uchr
4
3
5
4
import sys
5
+
6
+ from roundup .anypy .strings import u2s , uchr
7
+
6
8
_pyver = sys .version_info [0 ]
7
9
8
10
@@ -24,7 +26,7 @@ def html2text(html):
24
26
for script in soup (["script" , "style" ]):
25
27
script .extract ()
26
28
27
- return u2s (soup .get_text (' \n ' , strip = True ))
29
+ return u2s (soup .get_text (" \n " , strip = True ))
28
30
29
31
self .html2text = html2text
30
32
else :
@@ -33,12 +35,12 @@ def html2text(html):
33
35
# use the fallback below if beautiful soup is not installed.
34
36
try :
35
37
# Python 3+.
36
- from html .parser import HTMLParser
37
38
from html .entities import name2codepoint
39
+ from html .parser import HTMLParser
38
40
except ImportError :
39
41
# Python 2.
40
- from HTMLParser import HTMLParser
41
42
from htmlentitydefs import name2codepoint
43
+ from HTMLParser import HTMLParser
42
44
43
45
class DumbHTMLParser (HTMLParser ):
44
46
# class attribute
@@ -81,7 +83,7 @@ def handle_entityref(self, name):
81
83
self .text = self .text + c
82
84
except UnicodeEncodeError :
83
85
# print a space as a placeholder
84
- self .text = self .text + ' '
86
+ self .text = self .text + " "
85
87
86
88
def html2text (html ):
87
89
if _pyver == 3 :
@@ -95,8 +97,8 @@ def html2text(html):
95
97
self .html2text = html2text
96
98
97
99
98
- if "__main__" == __name__ :
99
- html = '''
100
+ if __name__ == "__main__" :
101
+ html = """
100
102
<body>
101
103
<script>
102
104
this must not be in output
@@ -145,7 +147,7 @@ def html2text(html):
145
147
</script>
146
148
</div>
147
149
</body>
148
- '''
150
+ """
149
151
150
152
html2text = dehtml ("dehtml" ).html2text
151
153
if html2text :
0 commit comments