Skip to content

Commit 2141899

Browse files
author
Johannes Gijsbers
committed
*** empty log message ***
1 parent d44660a commit 2141899

File tree

3 files changed

+252
-153
lines changed

3 files changed

+252
-153
lines changed

CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ Fixed:
4747
- allowed negative ids (ie. new item markers) in HTMLClass.getItem,
4848
allowing "db/file_with_status/-1/status/menu" to generate a useful
4949
widget
50+
- The mail gateway now searches recursively for the text/plain and the
51+
attachments of a message (sf bug 841241).
5052

5153
Cleanup:
5254
- replace curuserid attribute on Database with the extended getuid() method.

roundup/mailgw.py

Lines changed: 129 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class node. Any parts of other types are each stored in separate files
7373
an exception, the original message is bounced back to the sender with the
7474
explanatory message given in the exception.
7575
76-
$Id: mailgw.py,v 1.140 2003-12-19 01:50:19 richard Exp $
76+
$Id: mailgw.py,v 1.141 2004-01-17 13:49:06 jlgijsbers Exp $
7777
"""
7878

7979
import string, re, os, mimetools, cStringIO, smtplib, socket, binascii, quopri
@@ -145,7 +145,7 @@ class Message(mimetools.Message):
145145
''' subclass mimetools.Message so we can retrieve the parts of the
146146
message...
147147
'''
148-
def getPart(self):
148+
def getpart(self):
149149
''' Get a single part of a multipart message and return it as a new
150150
Message instance.
151151
'''
@@ -164,12 +164,136 @@ def getPart(self):
164164
s.seek(0)
165165
return Message(s)
166166

167+
def getparts(self):
168+
"""Get all parts of this multipart message."""
169+
# skip over the intro to the first boundary
170+
self.getpart()
171+
172+
# accumulate the other parts
173+
parts = []
174+
while 1:
175+
part = self.getpart()
176+
if part is None:
177+
break
178+
parts.append(part)
179+
return parts
180+
167181
def getheader(self, name, default=None):
168182
hdr = mimetools.Message.getheader(self, name, default)
169183
if hdr:
170184
hdr = hdr.replace('\n','') # Inserted by rfc822.readheaders
171185
return rfc2822.decode_header(hdr)
172-
186+
187+
def getname(self):
188+
"""Find an appropriate name for this message."""
189+
if self.gettype() == 'message/rfc822':
190+
# handle message/rfc822 specially - the name should be
191+
# the subject of the actual e-mail embedded here
192+
self.fp.seek(0)
193+
name = Message(self.fp).getheader('subject')
194+
else:
195+
# try name on Content-Type
196+
name = self.getparam('name')
197+
if not name:
198+
disp = self.getheader('content-disposition', None)
199+
if disp:
200+
name = getparam(disp, 'filename')
201+
202+
if name:
203+
return name.strip()
204+
205+
def getbody(self):
206+
"""Get the decoded message body."""
207+
self.rewindbody()
208+
encoding = self.getencoding()
209+
data = None
210+
if encoding == 'base64':
211+
# BUG: is base64 really used for text encoding or
212+
# are we inserting zip files here.
213+
data = binascii.a2b_base64(self.fp.read())
214+
elif encoding == 'quoted-printable':
215+
# the quopri module wants to work with files
216+
decoded = cStringIO.StringIO()
217+
quopri.decode(self.fp, decoded)
218+
data = decoded.getvalue()
219+
elif encoding == 'uuencoded':
220+
data = binascii.a2b_uu(self.fp.read())
221+
else:
222+
# take it as text
223+
data = self.fp.read()
224+
225+
# Encode message to unicode
226+
charset = rfc2822.unaliasCharset(self.getparam("charset"))
227+
if charset:
228+
# Do conversion only if charset specified
229+
edata = unicode(data, charset).encode('utf-8')
230+
# Convert from dos eol to unix
231+
edata = edata.replace('\r\n', '\n')
232+
else:
233+
# Leave message content as is
234+
edata = data
235+
236+
return edata
237+
238+
# General multipart handling:
239+
# Take the first text/plain part, anything else is considered an
240+
# attachment.
241+
# multipart/mixed: multiple "unrelated" parts.
242+
# multipart/signed (rfc 1847):
243+
# The control information is carried in the second of the two
244+
# required body parts.
245+
# ACTION: Default, so if content is text/plain we get it.
246+
# multipart/encrypted (rfc 1847):
247+
# The control information is carried in the first of the two
248+
# required body parts.
249+
# ACTION: Not handleable as the content is encrypted.
250+
# multipart/related (rfc 1872, 2112, 2387):
251+
# The Multipart/Related content-type addresses the MIME
252+
# representation of compound objects.
253+
# ACTION: Default. If we are lucky there is a text/plain.
254+
# TODO: One should use the start part and look for an Alternative
255+
# that is text/plain.
256+
# multipart/Alternative (rfc 1872, 1892):
257+
# only in "related" ?
258+
# multipart/report (rfc 1892):
259+
# e.g. mail system delivery status reports.
260+
# ACTION: Default. Could be ignored or used for Delivery Notification
261+
# flagging.
262+
# multipart/form-data:
263+
# For web forms only.
264+
265+
def extract_content(self, parent_type=None):
266+
"""Extract the body and the attachments recursively."""
267+
content_type = self.gettype()
268+
content = None
269+
attachments = []
270+
271+
if content_type == 'text/plain':
272+
content = self.getbody()
273+
elif content_type[:10] == 'multipart/':
274+
for part in self.getparts():
275+
new_content, new_attach = part.extract_content(content_type)
276+
277+
# If we haven't found a text/plain part yet, take this one,
278+
# otherwise make it an attachment.
279+
if not content:
280+
content = new_content
281+
elif new_content:
282+
attachments.append(part.as_attachment())
283+
284+
attachments.extend(new_attach)
285+
elif (parent_type == 'multipart/signed' and
286+
content_type == 'application/pgp-signature'):
287+
# ignore it so it won't be saved as an attachment
288+
pass
289+
else:
290+
attachments.append(self.as_attachment())
291+
return content, attachments
292+
293+
def as_attachment(self):
294+
"""Return this message as an attachment."""
295+
return (self.getname(), self.gettype(), self.getbody())
296+
173297
class MailGW:
174298

175299
# Matches subjects like:
@@ -371,37 +495,6 @@ def handle_Message(self, message):
371495
m.append(s.getvalue())
372496
self.mailer.bounce_message(message, sendto, m)
373497

374-
def get_part_data_decoded(self,part):
375-
encoding = part.getencoding()
376-
data = None
377-
if encoding == 'base64':
378-
# BUG: is base64 really used for text encoding or
379-
# are we inserting zip files here.
380-
data = binascii.a2b_base64(part.fp.read())
381-
elif encoding == 'quoted-printable':
382-
# the quopri module wants to work with files
383-
decoded = cStringIO.StringIO()
384-
quopri.decode(part.fp, decoded)
385-
data = decoded.getvalue()
386-
elif encoding == 'uuencoded':
387-
data = binascii.a2b_uu(part.fp.read())
388-
else:
389-
# take it as text
390-
data = part.fp.read()
391-
392-
# Encode message to unicode
393-
charset = rfc2822.unaliasCharset(part.getparam("charset"))
394-
if charset:
395-
# Do conversion only if charset specified
396-
edata = unicode(data, charset).encode('utf-8')
397-
# Convert from dos eol to unix
398-
edata = edata.replace('\r\n', '\n')
399-
else:
400-
# Leave message content as is
401-
edata = data
402-
403-
return edata
404-
405498
def handle_message(self, message):
406499
''' message - a Message instance
407500
@@ -684,118 +777,13 @@ def handle_message(self, message):
684777
messageid = "<%s.%s.%s%s@%s>"%(time.time(), random.random(),
685778
classname, nodeid, self.instance.config.MAIL_DOMAIN)
686779

687-
#
688780
# now handle the body - find the message
689-
#
690-
content_type = message.gettype()
691-
attachments = []
692-
# General multipart handling:
693-
# Take the first text/plain part, anything else is considered an
694-
# attachment.
695-
# multipart/mixed: multiple "unrelated" parts.
696-
# multipart/signed (rfc 1847):
697-
# The control information is carried in the second of the two
698-
# required body parts.
699-
# ACTION: Default, so if content is text/plain we get it.
700-
# multipart/encrypted (rfc 1847):
701-
# The control information is carried in the first of the two
702-
# required body parts.
703-
# ACTION: Not handleable as the content is encrypted.
704-
# multipart/related (rfc 1872, 2112, 2387):
705-
# The Multipart/Related content-type addresses the MIME
706-
# representation of compound objects.
707-
# ACTION: Default. If we are lucky there is a text/plain.
708-
# TODO: One should use the start part and look for an Alternative
709-
# that is text/plain.
710-
# multipart/Alternative (rfc 1872, 1892):
711-
# only in "related" ?
712-
# multipart/report (rfc 1892):
713-
# e.g. mail system delivery status reports.
714-
# ACTION: Default. Could be ignored or used for Delivery Notification
715-
# flagging.
716-
# multipart/form-data:
717-
# For web forms only.
718-
if content_type == 'multipart/mixed':
719-
# skip over the intro to the first boundary
720-
part = message.getPart()
721-
content = None
722-
while 1:
723-
# get the next part
724-
part = message.getPart()
725-
if part is None:
726-
break
727-
# parse it
728-
subtype = part.gettype()
729-
if subtype == 'text/plain' and not content:
730-
# The first text/plain part is the message content.
731-
content = self.get_part_data_decoded(part)
732-
elif subtype == 'message/rfc822':
733-
# handle message/rfc822 specially - the name should be
734-
# the subject of the actual e-mail embedded here
735-
i = part.fp.tell()
736-
mailmess = Message(part.fp)
737-
name = mailmess.getheader('subject')
738-
part.fp.seek(i)
739-
attachments.append((name, 'message/rfc822', part.fp.read()))
740-
elif subtype == 'multipart/alternative':
741-
# Search for text/plain in message with attachment and
742-
# alternative text representation
743-
# skip over intro to first boundary
744-
part.getPart()
745-
while 1:
746-
# get the next part
747-
subpart = part.getPart()
748-
if subpart is None:
749-
break
750-
# parse it
751-
if subpart.gettype() == 'text/plain' and not content:
752-
content = self.get_part_data_decoded(subpart)
753-
else:
754-
# try name on Content-Type
755-
name = part.getparam('name')
756-
if name:
757-
name = name.strip()
758-
if not name:
759-
disp = part.getheader('content-disposition', None)
760-
if disp:
761-
name = getparam(disp, 'filename')
762-
if name:
763-
name = name.strip()
764-
# this is just an attachment
765-
data = self.get_part_data_decoded(part)
766-
attachments.append((name, part.gettype(), data))
767-
if content is None:
768-
raise MailUsageError, '''
769-
Roundup requires the submission to be plain text. The message parser could
770-
not find a text/plain part to use.
771-
'''
772-
773-
elif content_type[:10] == 'multipart/':
774-
# skip over the intro to the first boundary
775-
message.getPart()
776-
content = None
777-
while 1:
778-
# get the next part
779-
part = message.getPart()
780-
if part is None:
781-
break
782-
# parse it
783-
if part.gettype() == 'text/plain' and not content:
784-
content = self.get_part_data_decoded(part)
785-
if content is None:
786-
raise MailUsageError, '''
787-
Roundup requires the submission to be plain text. The message parser could
788-
not find a text/plain part to use.
789-
'''
790-
791-
elif content_type != 'text/plain':
781+
content, attachments = message.extract_content()
782+
if content is None:
792783
raise MailUsageError, '''
793784
Roundup requires the submission to be plain text. The message parser could
794785
not find a text/plain part to use.
795786
'''
796-
797-
else:
798-
content = self.get_part_data_decoded(message)
799787

800788
# figure how much we should muck around with the email body
801789
keep_citations = getattr(self.instance.config, 'EMAIL_KEEP_QUOTED_TEXT',

0 commit comments

Comments
 (0)