~launchpad-pqm/launchpad/devel

« back to all changes in this revision

Viewing changes to scripts/import-zope-specs.py

Fix lint.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/python -S
 
2
#
 
3
# Copyright 2009 Canonical Ltd.  This software is licensed under the
 
4
# GNU Affero General Public License version 3 (see the file LICENSE).
 
5
 
 
6
# A script to import metadata about the Zope 3 specs into Launchpad
 
7
 
 
8
__metaclass__ = type
 
9
 
 
10
import itertools
 
11
import re
 
12
import sys
 
13
import urllib2
 
14
 
 
15
import _pythonpath
 
16
from zope.component import getUtility
 
17
from BeautifulSoup import BeautifulSoup
 
18
 
 
19
from canonical.launchpad.scripts import execute_zcml_for_scripts
 
20
from canonical.lp import initZopeless
 
21
from lp.blueprints.enums import (
 
22
    SpecificationStatus,
 
23
    SpecificationGoalStatus,
 
24
    SpecificationDelivery,
 
25
    SpecificationPriority,
 
26
    )
 
27
from lp.blueprints.interfaces.specification import ISpecificationSet
 
28
from lp.registry.interfaces.person import IPersonSet
 
29
from lp.registry.interfaces.product import IProductSet
 
30
 
 
31
 
 
32
WIKI_BASE = 'http://wiki.zope.org/zope3/'
 
33
PROPOSAL_LISTS = ['Zope3Proposals', 'OldProposals', 'DraftProposals']
 
34
specroot = WIKI_BASE + 'Zope3Proposals'
 
35
 
 
36
at_replacements = ['_at_', '(at)', '@']
 
37
author_email_pat = re.compile('[-.A-Za-z0-9]+(?:@|%s)[-.A-Za-z0-9]+' %
 
38
                              '|'.join([re.escape(replacement)
 
39
                                        for replacement in at_replacements]))
 
40
 
 
41
def getTextContent(tag):
 
42
    if tag is None:
 
43
        return ''
 
44
    if isinstance(tag, basestring):
 
45
        return tag
 
46
    return ''.join([e for e in tag.recursiveChildGenerator()
 
47
                    if isinstance(e, basestring)])
 
48
 
 
49
 
 
50
class ZopeSpec:
 
51
 
 
52
    def __init__(self, url, title, summary):
 
53
        self.url = url
 
54
        self.name = self.url.split('/')[-1]
 
55
        self.title = title
 
56
        self.summary = summary
 
57
        self.authors = set()
 
58
        self.statuses = set()
 
59
 
 
60
    def parseAuthorEmails(self, text):
 
61
        author_email_list = author_email_pat.findall(text)
 
62
        for author in author_email_list:
 
63
            # unmangle at symbol in email:
 
64
            for replacement in at_replacements:
 
65
                author = author.replace(replacement, '@')
 
66
            self.authors.add(author)
 
67
 
 
68
    def parseStatuses(self, soup):
 
69
        wiki_badges = [
 
70
            'IsWorkInProgress',
 
71
 
 
72
            'IsProposal',
 
73
            'IsRejectedProposal',
 
74
            'IsSupercededProposal',
 
75
            'IsRetractedProposal',
 
76
            'IsAcceptedProposal',
 
77
            'IsImplementedProposal',
 
78
            'IsExpiredProposal',
 
79
            'IsDraftProposal',
 
80
 
 
81
            'IsPlanned',
 
82
            'IsResolved',
 
83
            'IsImplemented',
 
84
 
 
85
            'IsReplaced',
 
86
            'IsOutdated',
 
87
            'IsDraft',
 
88
            'IsEditedDraft',
 
89
            'IsRoughDraft',
 
90
            ]
 
91
        for badge in wiki_badges:
 
92
            url = WIKI_BASE + badge
 
93
            if soup.fetch('a', {'href': url}):
 
94
                self.statuses.add(badge)
 
95
 
 
96
    def parseSpec(self):
 
97
        contents = urllib2.urlopen(self.url).read()
 
98
        soup = BeautifulSoup(contents)
 
99
        contentdivs = soup('div', {'class': 'content'})
 
100
        assert len(contentdivs) == 1
 
101
        contentdiv = contentdivs[0]
 
102
 
 
103
        # Specification statuses are represented by "wiki badges",
 
104
        # which are just hyperlinks to particular pages.
 
105
        self.parseStatuses(soup)
 
106
 
 
107
        # There are two styles of spec.  One of them has a table with
 
108
        # RFC-822 style headers in it.  The other has minor level headings
 
109
        # with text under the heading.
 
110
        tables = soup('table')
 
111
        # Every page has one table, for the main page layout.  So, if the page
 
112
        # has two tables, it means that it will be using the RFC-822 style.
 
113
        if len(tables) >= 2:
 
114
            # This is a spec with RFC-822 style headers.
 
115
            docinfo = tables[1]
 
116
            for row in docinfo('tr'):
 
117
                if len(row('th')) < 1 or len(row('td')) < 1:
 
118
                    continue
 
119
                key = row('th')[0].renderContents()
 
120
                if key.endswith(':'):
 
121
                    key = key[:-1]
 
122
                value = row('td')[0].renderContents()
 
123
 
 
124
                if 'Author' in key:
 
125
                    self.parseAuthorEmails(value)
 
126
        else:
 
127
            # This is a spec with minor level headings, or perhaps with no
 
128
            # headings at all.
 
129
 
 
130
            # Look for an author heading.
 
131
            author_headers = soup(text=re.compile('Author.*', re.I))
 
132
            if author_headers:
 
133
                author = author_headers[0].findNext().renderContents()
 
134
                self.parseAuthorEmails(author)
 
135
 
 
136
    @property
 
137
    def lpname(self):
 
138
        # add dashes before capitalised words
 
139
        name = re.sub(r'([^A-Z])([A-Z])', r'\1-\2', self.name)
 
140
        # lower case name
 
141
        name = name.lower()
 
142
        # remove leading dashes
 
143
        while name.startswith('-'):
 
144
            name = name[1:]
 
145
        # if name doesn't begin with an alphabetical character prefix it
 
146
        if not name[0].isalpha():
 
147
            name = 'x-' + name
 
148
        return name
 
149
 
 
150
    @property
 
151
    def lpstatus(self):
 
152
        # implemented and accepted specs => APPROVED
 
153
        for status in ['IsImplemented',
 
154
                       'IsImplementedProposal',
 
155
                       'IsAcceptedProposal']:
 
156
            if status in self.statuses:
 
157
                return SpecificationStatus.APPROVED
 
158
        # WIP => DISCUSSION
 
159
        if 'IsWorkInProgress' in self.statuses:
 
160
            return SpecificationStatus.DISCUSSION
 
161
        for status in ['IsSupercededProposal', 'IsReplaced']:
 
162
            if status in self.statuses:
 
163
                return SpecificationStatus.SUPERSEDED
 
164
        for status in ['IsExpiredProposal', 'IsOutdated']:
 
165
            if status in self.statuses:
 
166
                return SpecificationStatus.OBSOLETE
 
167
        # draft statuses:
 
168
        for status in ['IsDraftProposal',
 
169
                       'IsDraft',
 
170
                       'IsEditedDraft',
 
171
                       'IsRoughDraft']:
 
172
            if status in self.statuses:
 
173
                return SpecificationStatus.DRAFT
 
174
        # otherwise ...
 
175
        return SpecificationStatus.PENDINGREVIEW
 
176
 
 
177
    @property
 
178
    def lpgoalstatus(self):
 
179
        # implemented and accepted specs => ACCEPTED
 
180
        for status in ['IsImplemented',
 
181
                       'IsImplementedProposal',
 
182
                       'IsAcceptedProposal']:
 
183
            if status in self.statuses:
 
184
                return SpecificationGoalStatus.ACCEPTED
 
185
        # rejected or retracted => DECLINED
 
186
        for status in ['IsRetractedProposal', 'IsRejectedProposal']:
 
187
            if status in self.statuses:
 
188
                return SpecificationGoalStatus.DECLINED
 
189
 
 
190
        # otherwise ...
 
191
        return SpecificationGoalStatus.PROPOSED
 
192
 
 
193
    @property
 
194
    def lpdelivery(self):
 
195
        for status in ['IsImplemented',
 
196
                       'IsImplementedProposal']:
 
197
            if status in self.statuses:
 
198
                return SpecificationDelivery.IMPLEMENTED
 
199
        # otherwise ...
 
200
        return SpecificationDelivery.UNKNOWN
 
201
 
 
202
    def syncSpec(self):
 
203
        zope = getUtility(IProductSet).getByName('zope')
 
204
        zope_dev = getUtility(IPersonSet).getByName('zope-dev')
 
205
        # has the spec been created?
 
206
        lpspec = getUtility(ISpecificationSet).getByURL(self.url)
 
207
        if not lpspec:
 
208
            lpspec = getUtility(ISpecificationSet).new(
 
209
                name=self.lpname,
 
210
                title=self.title,
 
211
                specurl=self.url,
 
212
                summary=self.summary,
 
213
                priority=SpecificationPriority.UNDEFINED,
 
214
                status=SpecificationStatus.NEW,
 
215
                owner=zope_dev,
 
216
                product=zope)
 
217
 
 
218
        # synchronise
 
219
        lpspec.title = self.title
 
220
        lpspec.summary = self.summary
 
221
        lpspec.status = self.lpstatus
 
222
        newgoalstatus = self.lpgoalstatus
 
223
        if newgoalstatus != lpspec.goalstatus:
 
224
            if newgoalstatus == SpecificationGoalStatus.PROPOSED:
 
225
                lpspec.proposeGoal(None, zope_dev)
 
226
            elif newgoalstatus == SpecificationGoalStatus.ACCEPTED:
 
227
                lpspec.acceptBy(zope_dev)
 
228
            elif newgoalstatus == SpecificationGoalStatus.DECLINED:
 
229
                lpspec.declineBy(zope_dev)
 
230
        lpspec.delivery = self.lpdelivery
 
231
        lpspec.updateLifecycleStatus(zope_dev)
 
232
 
 
233
        # set the assignee to the first author email with an LP account
 
234
        for author in sorted(self.authors):
 
235
            person = getUtility(IPersonSet).getByEmail(author)
 
236
            if person is not None:
 
237
                lpspec.assignee = person
 
238
                break
 
239
 
 
240
 
 
241
def iter_spec_urls(url=specroot):
 
242
    contents = urllib2.urlopen(url)
 
243
    soup = BeautifulSoup(contents)
 
244
    contentdivs = soup('div', {'class': 'content'})
 
245
    assert len(contentdivs) == 1
 
246
    contentdiv = contentdivs[0]
 
247
    listofspecs = contentdiv('ul')[0]
 
248
 
 
249
    for listitem in listofspecs('li', recursive=False):
 
250
        anchors = listitem('a')
 
251
        if not anchors:
 
252
            continue
 
253
        specanchor = anchors[0]
 
254
        href = specanchor['href']
 
255
        # broken wiki link => ignore
 
256
        if 'createform?page=' in href:
 
257
            continue
 
258
        title = getTextContent(specanchor)
 
259
        summary = ''.join([getTextContent(tag)
 
260
                               for tag in specanchor.nextSiblingGenerator()])
 
261
        yield ZopeSpec(href, title, summary.strip())
 
262
 
 
263
 
 
264
def main(argv):
 
265
    execute_zcml_for_scripts()
 
266
    ztm = initZopeless()
 
267
 
 
268
    for spec in itertools.chain(*[iter_spec_urls(WIKI_BASE + page)
 
269
                                  for page in PROPOSAL_LISTS]):
 
270
        # parse extra information from the spec body
 
271
        spec.parseSpec()
 
272
        # add its metadata to LP
 
273
        print 'Synchronising', spec.name
 
274
        ztm.begin()
 
275
        try:
 
276
            spec.syncSpec()
 
277
            ztm.commit()
 
278
        except:
 
279
            ztm.abort()
 
280
            raise
 
281
 
 
282
if __name__ == '__main__':
 
283
    sys.exit(main(sys.argv))