1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
|
# Copyright 2009 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Base classes for feeds.
Supported feeds include Atom, Javascript, and HTML-snippets.
Future support may include feeds such as sparklines.
"""
__metaclass__ = type
__all__ = [
'FeedBase',
'FeedEntry',
'FeedPerson',
'FeedTypedData',
'MINUTES',
]
import operator
import os
import time
from urlparse import urljoin
from xml.sax.saxutils import escape as xml_escape
from BeautifulSoup import BeautifulSoup
from z3c.ptcompat import ViewPageTemplateFile
from zope.component import getUtility
from zope.datetime import rfc1123_date
from zope.interface import implements
from lp.services.config import config
from lp.services.feeds.interfaces.feed import (
IFeed,
IFeedEntry,
IFeedPerson,
IFeedTypedData,
UnsupportedFeedFormat,
)
from lp.services.propertycache import cachedproperty
from lp.services.utils import utc_now
from lp.services.webapp import (
canonical_url,
LaunchpadView,
urlappend,
urlparse,
)
# XXX: bac 2007-09-20 bug=153795: modules in canonical.lazr should not import
# from canonical.launchpad, but we're doing it here as an expediency to get a
# working prototype.
from lp.services.webapp.interfaces import ILaunchpadRoot
from lp.services.webapp.vhosts import allvhosts
SUPPORTED_FEEDS = ('.atom', '.html')
MINUTES = 60 # Seconds in a minute.
class FeedBase(LaunchpadView):
"""See `IFeed`.
Base class for feeds.
"""
implements(IFeed)
# convert to seconds
max_age = config.launchpad.max_feed_cache_minutes * MINUTES
quantity = 25
items = None
rootsite = 'mainsite'
template_files = {'atom': 'templates/feed-atom.pt',
'html': 'templates/feed-html.pt'}
def __init__(self, context, request):
super(FeedBase, self).__init__(context, request)
self.format = self.feed_format
self.root_url = canonical_url(getUtility(ILaunchpadRoot),
rootsite=self.rootsite)
@property
def title(self):
"""See `IFeed`."""
raise NotImplementedError
@property
def link_self(self):
"""See `IFeed`."""
# The self link is the URL for this particular feed. For example:
# http://feeds.launchpad.net/ubuntu/announcments.atom
path = "%s.%s" % (self.feedname, self.format)
return urlappend(canonical_url(self.context, rootsite="feeds"),
path)
@property
def site_url(self):
"""See `IFeed`."""
return allvhosts.configs['mainsite'].rooturl[:-1]
@property
def link_alternate(self):
"""See `IFeed`."""
return canonical_url(self.context, rootsite=self.rootsite)
@property
def feed_id(self):
"""See `IFeed`.
Override this method if the context used does not create a
meaningful id.
"""
# Get the creation date, if available. Otherwise use a fixed date, as
# allowed by the RFC.
if getattr(self.context, 'datecreated', None) is not None:
datecreated = self.context.datecreated.date().isoformat()
elif getattr(self.context, 'date_created', None) is not None:
datecreated = self.context.date_created.date().isoformat()
else:
datecreated = "2008"
url_path = urlparse(self.link_alternate)[2]
if self.rootsite != 'mainsite':
id_ = 'tag:launchpad.net,%s:/%s%s' % (
datecreated,
self.rootsite,
url_path)
else:
id_ = 'tag:launchpad.net,%s:%s' % (
datecreated,
url_path)
return id_
def getItems(self):
"""See `IFeed`."""
if self.items is None:
self.items = self._getItemsWorker()
return self.items
def _getItemsWorker(self):
"""Create the list of items.
Called by getItems which may cache the results. The caching is
necessary since `getItems` is called multiple times in the course of
constructing a single feed and pulling together the list of items is
potentially expensive.
"""
raise NotImplementedError
@property
def feed_format(self):
"""See `IFeed`."""
# If the full URL is http://feeds.launchpad.dev/announcements.atom/foo
# getURL() will return http://feeds.launchpad.dev/announcements.atom
# when traversing the feed, which will allow os.path.splitext()
# to split off ".atom" correctly.
path = self.request.getURL()
extension = os.path.splitext(path)[1]
if extension in SUPPORTED_FEEDS:
return extension[1:]
else:
raise UnsupportedFeedFormat('%s is not supported' % path)
@property
def logo(self):
"""See `IFeed`."""
raise NotImplementedError
@property
def icon(self):
"""See `IFeed`."""
return "%s/@@/launchpad" % self.site_url
@cachedproperty
def date_updated(self):
"""See `IFeed`."""
sorted_items = sorted(self.getItems(),
key=operator.attrgetter('last_modified'),
reverse=True)
if len(sorted_items) == 0:
# datetime.isoformat() doesn't place the necessary "+00:00"
# for the feedvalidator's check of the iso8601 date format
# unless a timezone is specified with tzinfo.
return utc_now()
last_modified = sorted_items[0].last_modified
if last_modified is None:
raise AssertionError, 'All feed entries require a date updated.'
return last_modified
def render(self):
"""See `IFeed`."""
expires = rfc1123_date(time.time() + self.max_age)
if self.date_updated is not None:
last_modified = rfc1123_date(
time.mktime(self.date_updated.timetuple()))
else:
last_modified = rfc1123_date(time.time())
response = self.request.response
response.setHeader('Expires', expires)
response.setHeader('Cache-Control', 'max-age=%d' % self.max_age)
response.setHeader('X-Cache-Control', 'max-age=%d' % self.max_age)
response.setHeader('Last-Modified', last_modified)
if self.format == 'atom':
return self.renderAtom()
elif self.format == 'html':
return self.renderHTML()
else:
raise UnsupportedFeedFormat("Format %s is not supported" %
self.format)
def renderAtom(self):
"""See `IFeed`."""
self.request.response.setHeader('content-type',
'application/atom+xml;charset=utf-8')
template_file = ViewPageTemplateFile(self.template_files['atom'])
result = template_file(self)
# XXX EdwinGrubbs 2008-01-10 bug=181903
# Zope3 requires the content-type to start with "text/" if
# the result is a unicode object.
return result.encode('utf-8')
def renderHTML(self):
"""See `IFeed`."""
return ViewPageTemplateFile(self.template_files['html'])(self)
class FeedEntry:
"""See `IFeedEntry`.
An individual entry for a feed.
"""
implements(IFeedEntry)
def __init__(self,
title,
link_alternate,
date_created,
date_updated,
date_published=None,
authors=None,
contributors=None,
content=None,
id_=None,
generator=None,
logo=None,
icon=None):
self.title = title
self.link_alternate = link_alternate
self.content = content
self.date_created = date_created
self.date_updated = date_updated
self.date_published = date_published
if date_updated is None:
raise AssertionError, 'date_updated is required by RFC 4287'
if authors is None:
authors = []
self.authors = authors
self.contributors = contributors
if id_ is None:
self.id = self.construct_id()
else:
self.id = id_
@property
def last_modified(self):
if self.date_published is not None:
return max(self.date_published, self.date_updated)
return self.date_updated
def construct_id(self):
url_path = urlparse(self.link_alternate)[2]
return 'tag:launchpad.net,%s:%s' % (
self.date_created.date().isoformat(),
url_path)
class FeedTypedData:
"""Data for a feed that includes its type."""
implements(IFeedTypedData)
content_types = ['text', 'html', 'xhtml']
def __init__(self, content, content_type='text', root_url=None):
self._content = content
if content_type not in self.content_types:
raise UnsupportedFeedFormat("%s: is not valid" % content_type)
self.content_type = content_type
self.root_url = root_url
@property
def content(self):
if (self.content_type in ('html', 'xhtml') and
self.root_url is not None):
# Unqualified hrefs must be qualified using the original subdomain
# or they will try be served from http://feeds.launchpad.net,
# which will not work.
soup = BeautifulSoup(self._content)
a_tags = soup.findAll('a')
for a_tag in a_tags:
if a_tag['href'].startswith('/'):
a_tag['href'] = urljoin(self.root_url, a_tag['href'])
altered_content = unicode(soup)
else:
altered_content = self._content
if self.content_type in ('text', 'html'):
altered_content = xml_escape(altered_content)
elif self.content_type == 'xhtml':
soup = BeautifulSoup(
altered_content,
convertEntities=BeautifulSoup.HTML_ENTITIES)
altered_content = unicode(soup)
return altered_content
class FeedPerson:
"""See `IFeedPerson`.
If this class is consistently used we will not accidentally leak email
addresses.
"""
implements(IFeedPerson)
def __init__(self, person, rootsite):
self.name = person.displayname
# We don't want to disclose email addresses in public feeds.
self.email = None
self.uri = canonical_url(person, rootsite=rootsite)
|