~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# Copyright 2009 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""Base classes for feeds.

Supported feeds include Atom, Javascript, and HTML-snippets.
Future support may include feeds such as sparklines.
"""

__metaclass__ = type

__all__ = [
    'FeedBase',
    'FeedEntry',
    'FeedPerson',
    'FeedTypedData',
    'MINUTES',
    ]

import operator
import os
import time
from urlparse import urljoin
from xml.sax.saxutils import escape as xml_escape

from BeautifulSoup import BeautifulSoup
from z3c.ptcompat import ViewPageTemplateFile
from zope.component import getUtility
from zope.datetime import rfc1123_date
from zope.interface import implements

from lp.services.config import config
from lp.services.feeds.interfaces.feed import (
    IFeed,
    IFeedEntry,
    IFeedPerson,
    IFeedTypedData,
    UnsupportedFeedFormat,
    )
from lp.services.propertycache import cachedproperty
from lp.services.utils import utc_now
from lp.services.webapp import (
    canonical_url,
    LaunchpadView,
    urlappend,
    urlparse,
    )
# XXX: bac 2007-09-20 bug=153795: modules in canonical.lazr should not import
# from canonical.launchpad, but we're doing it here as an expediency to get a
# working prototype.
from lp.services.webapp.interfaces import ILaunchpadRoot
from lp.services.webapp.vhosts import allvhosts


SUPPORTED_FEEDS = ('.atom', '.html')
MINUTES = 60 # Seconds in a minute.


class FeedBase(LaunchpadView):
    """See `IFeed`.

    Base class for feeds.
    """

    implements(IFeed)

    # convert to seconds
    max_age = config.launchpad.max_feed_cache_minutes * MINUTES
    quantity = 25
    items = None
    rootsite = 'mainsite'
    template_files = {'atom': 'templates/feed-atom.pt',
                      'html': 'templates/feed-html.pt'}

    def __init__(self, context, request):
        super(FeedBase, self).__init__(context, request)
        self.format = self.feed_format
        self.root_url = canonical_url(getUtility(ILaunchpadRoot),
                                      rootsite=self.rootsite)

    @property
    def title(self):
        """See `IFeed`."""
        raise NotImplementedError

    @property
    def link_self(self):
        """See `IFeed`."""

        # The self link is the URL for this particular feed.  For example:
        # http://feeds.launchpad.net/ubuntu/announcments.atom
        path = "%s.%s" % (self.feedname, self.format)
        return urlappend(canonical_url(self.context, rootsite="feeds"),
                         path)

    @property
    def site_url(self):
        """See `IFeed`."""
        return allvhosts.configs['mainsite'].rooturl[:-1]

    @property
    def link_alternate(self):
        """See `IFeed`."""
        return canonical_url(self.context, rootsite=self.rootsite)

    @property
    def feed_id(self):
        """See `IFeed`.

        Override this method if the context used does not create a
        meaningful id.
        """
        # Get the creation date, if available.  Otherwise use a fixed date, as
        # allowed by the RFC.
        if getattr(self.context, 'datecreated', None) is not None:
            datecreated = self.context.datecreated.date().isoformat()
        elif getattr(self.context, 'date_created', None) is not None:
            datecreated = self.context.date_created.date().isoformat()
        else:
            datecreated = "2008"
        url_path = urlparse(self.link_alternate)[2]
        if self.rootsite != 'mainsite':
            id_ = 'tag:launchpad.net,%s:/%s%s' % (
                datecreated,
                self.rootsite,
                url_path)
        else:
            id_ = 'tag:launchpad.net,%s:%s' % (
                datecreated,
                url_path)
        return id_

    def getItems(self):
        """See `IFeed`."""
        if self.items is None:
            self.items = self._getItemsWorker()
        return self.items

    def _getItemsWorker(self):
        """Create the list of items.

        Called by getItems which may cache the results.  The caching is
        necessary since `getItems` is called multiple times in the course of
        constructing a single feed and pulling together the list of items is
        potentially expensive.
        """
        raise NotImplementedError

    @property
    def feed_format(self):
        """See `IFeed`."""
        # If the full URL is http://feeds.launchpad.dev/announcements.atom/foo
        # getURL() will return http://feeds.launchpad.dev/announcements.atom
        # when traversing the feed, which will allow os.path.splitext()
        # to split off ".atom" correctly.
        path = self.request.getURL()
        extension = os.path.splitext(path)[1]
        if extension in SUPPORTED_FEEDS:
            return extension[1:]
        else:
            raise UnsupportedFeedFormat('%s is not supported' % path)

    @property
    def logo(self):
        """See `IFeed`."""
        raise NotImplementedError

    @property
    def icon(self):
        """See `IFeed`."""
        return "%s/@@/launchpad" % self.site_url

    @cachedproperty
    def date_updated(self):
        """See `IFeed`."""
        sorted_items = sorted(self.getItems(),
                              key=operator.attrgetter('last_modified'),
                              reverse=True)
        if len(sorted_items) == 0:
            # datetime.isoformat() doesn't place the necessary "+00:00"
            # for the feedvalidator's check of the iso8601 date format
            # unless a timezone is specified with tzinfo.
            return utc_now()
        last_modified = sorted_items[0].last_modified
        if last_modified is None:
            raise AssertionError, 'All feed entries require a date updated.'
        return last_modified

    def render(self):
        """See `IFeed`."""
        expires = rfc1123_date(time.time() + self.max_age)
        if self.date_updated is not None:
            last_modified = rfc1123_date(
                time.mktime(self.date_updated.timetuple()))
        else:
            last_modified = rfc1123_date(time.time())
        response = self.request.response
        response.setHeader('Expires', expires)
        response.setHeader('Cache-Control', 'max-age=%d' % self.max_age)
        response.setHeader('X-Cache-Control', 'max-age=%d' % self.max_age)
        response.setHeader('Last-Modified', last_modified)

        if self.format == 'atom':
            return self.renderAtom()
        elif self.format == 'html':
            return self.renderHTML()
        else:
            raise UnsupportedFeedFormat("Format %s is not supported" %
                                        self.format)

    def renderAtom(self):
        """See `IFeed`."""
        self.request.response.setHeader('content-type',
                                        'application/atom+xml;charset=utf-8')
        template_file = ViewPageTemplateFile(self.template_files['atom'])
        result = template_file(self)
        # XXX EdwinGrubbs 2008-01-10 bug=181903
        # Zope3 requires the content-type to start with "text/" if
        # the result is a unicode object.
        return result.encode('utf-8')

    def renderHTML(self):
        """See `IFeed`."""
        return ViewPageTemplateFile(self.template_files['html'])(self)


class FeedEntry:
    """See `IFeedEntry`.

    An individual entry for a feed.
    """

    implements(IFeedEntry)

    def __init__(self,
                 title,
                 link_alternate,
                 date_created,
                 date_updated,
                 date_published=None,
                 authors=None,
                 contributors=None,
                 content=None,
                 id_=None,
                 generator=None,
                 logo=None,
                 icon=None):
        self.title = title
        self.link_alternate = link_alternate
        self.content = content
        self.date_created = date_created
        self.date_updated = date_updated
        self.date_published = date_published
        if date_updated is None:
            raise AssertionError, 'date_updated is required by RFC 4287'
        if authors is None:
            authors = []
        self.authors = authors
        self.contributors = contributors
        if id_ is None:
            self.id = self.construct_id()
        else:
            self.id = id_

    @property
    def last_modified(self):
        if self.date_published is not None:
            return max(self.date_published, self.date_updated)
        return self.date_updated

    def construct_id(self):
        url_path = urlparse(self.link_alternate)[2]
        return 'tag:launchpad.net,%s:%s' % (
            self.date_created.date().isoformat(),
            url_path)


class FeedTypedData:
    """Data for a feed that includes its type."""

    implements(IFeedTypedData)

    content_types = ['text', 'html', 'xhtml']

    def __init__(self, content, content_type='text', root_url=None):
        self._content = content
        if content_type not in self.content_types:
            raise UnsupportedFeedFormat("%s: is not valid" % content_type)
        self.content_type = content_type
        self.root_url = root_url

    @property
    def content(self):
        if (self.content_type in ('html', 'xhtml') and
            self.root_url is not None):
            # Unqualified hrefs must be qualified using the original subdomain
            # or they will try be served from http://feeds.launchpad.net,
            # which will not work.
            soup = BeautifulSoup(self._content)
            a_tags = soup.findAll('a')
            for a_tag in a_tags:
                if a_tag['href'].startswith('/'):
                    a_tag['href'] = urljoin(self.root_url, a_tag['href'])
            altered_content = unicode(soup)
        else:
            altered_content = self._content

        if self.content_type in ('text', 'html'):
            altered_content = xml_escape(altered_content)
        elif self.content_type == 'xhtml':
            soup = BeautifulSoup(
                altered_content,
                convertEntities=BeautifulSoup.HTML_ENTITIES)
            altered_content = unicode(soup)
        return altered_content


class FeedPerson:
    """See `IFeedPerson`.

    If this class is consistently used we will not accidentally leak email
    addresses.
    """

    implements(IFeedPerson)

    def __init__(self, person, rootsite):
        self.name = person.displayname
        # We don't want to disclose email addresses in public feeds.
        self.email = None
        self.uri = canonical_url(person, rootsite=rootsite)