1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
From 20a32910f4a521788ba93a8c809cf0db702bed5b Mon Sep 17 00:00:00 2001
From: Kurt McKee <contactme@kurtmckee.org>
Date: Mon, 17 Dec 2012 10:41:13 -0600
Subject: [PATCH] Fix chardet support in Python 3
Fixes issue 384.
Thanks to Google user Arfrever.TFA for reporting this!
---
NEWS | 1 +
feedparser/feedparser.py | 6 +++++-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/feedparser/feedparser.py b/feedparser/feedparser.py
index c78e6a3..e977ae8 100644
--- a/feedparser/feedparser.py
+++ b/feedparser/feedparser.py
@@ -3766,7 +3766,11 @@ def convert_to_utf8(http_headers, data):
chardet_encoding = None
tried_encodings = []
if chardet:
- chardet_encoding = unicode(chardet.detect(data)['encoding'] or '', 'ascii', 'ignore')
+ chardet_encoding = chardet.detect(data)['encoding']
+ if not chardet_encoding:
+ chardet_encoding = ''
+ if not isinstance(chardet_encoding, unicode):
+ chardet_encoding = unicode(chardet_encoding, 'ascii', 'ignore')
# try: HTTP encoding, declared XML encoding, encoding sniffed from BOM
for proposed_encoding in (rfc3023_encoding, xml_encoding, bom_encoding,
chardet_encoding, u'utf-8', u'windows-1252', u'iso-8859-2'):
--
1.8.5.5
|