~launchpad-pqm/launchpad/devel

« back to all changes in this revision

Viewing changes to scripts/import-zope-specs.py

  • Committer: Jelmer Vernooij
  • Date: 2011-09-21 14:28:02 UTC
  • mfrom: (14006 devel)
  • mto: This revision was merged to the branch mainline in revision 14010.
  • Revision ID: jelmer@canonical.com-20110921142802-7ggkc204igsy532w
MergeĀ lp:launchpad

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
#!/usr/bin/python -S
2
 
#
3
 
# Copyright 2009 Canonical Ltd.  This software is licensed under the
4
 
# GNU Affero General Public License version 3 (see the file LICENSE).
5
 
 
6
 
# A script to import metadata about the Zope 3 specs into Launchpad
7
 
 
8
 
__metaclass__ = type
9
 
 
10
 
import itertools
11
 
import re
12
 
import sys
13
 
import urllib2
14
 
 
15
 
import _pythonpath
16
 
from zope.component import getUtility
17
 
from BeautifulSoup import BeautifulSoup
18
 
 
19
 
from canonical.launchpad.scripts import execute_zcml_for_scripts
20
 
from canonical.lp import initZopeless
21
 
from lp.blueprints.enums import (
22
 
    SpecificationStatus,
23
 
    SpecificationGoalStatus,
24
 
    SpecificationDelivery,
25
 
    SpecificationPriority,
26
 
    )
27
 
from lp.blueprints.interfaces.specification import ISpecificationSet
28
 
from lp.registry.interfaces.person import IPersonSet
29
 
from lp.registry.interfaces.product import IProductSet
30
 
 
31
 
 
32
 
WIKI_BASE = 'http://wiki.zope.org/zope3/'
33
 
PROPOSAL_LISTS = ['Zope3Proposals', 'OldProposals', 'DraftProposals']
34
 
specroot = WIKI_BASE + 'Zope3Proposals'
35
 
 
36
 
at_replacements = ['_at_', '(at)', '@']
37
 
author_email_pat = re.compile('[-.A-Za-z0-9]+(?:@|%s)[-.A-Za-z0-9]+' %
38
 
                              '|'.join([re.escape(replacement)
39
 
                                        for replacement in at_replacements]))
40
 
 
41
 
def getTextContent(tag):
42
 
    if tag is None:
43
 
        return ''
44
 
    if isinstance(tag, basestring):
45
 
        return tag
46
 
    return ''.join([e for e in tag.recursiveChildGenerator()
47
 
                    if isinstance(e, basestring)])
48
 
 
49
 
 
50
 
class ZopeSpec:
51
 
 
52
 
    def __init__(self, url, title, summary):
53
 
        self.url = url
54
 
        self.name = self.url.split('/')[-1]
55
 
        self.title = title
56
 
        self.summary = summary
57
 
        self.authors = set()
58
 
        self.statuses = set()
59
 
 
60
 
    def parseAuthorEmails(self, text):
61
 
        author_email_list = author_email_pat.findall(text)
62
 
        for author in author_email_list:
63
 
            # unmangle at symbol in email:
64
 
            for replacement in at_replacements:
65
 
                author = author.replace(replacement, '@')
66
 
            self.authors.add(author)
67
 
 
68
 
    def parseStatuses(self, soup):
69
 
        wiki_badges = [
70
 
            'IsWorkInProgress',
71
 
 
72
 
            'IsProposal',
73
 
            'IsRejectedProposal',
74
 
            'IsSupercededProposal',
75
 
            'IsRetractedProposal',
76
 
            'IsAcceptedProposal',
77
 
            'IsImplementedProposal',
78
 
            'IsExpiredProposal',
79
 
            'IsDraftProposal',
80
 
 
81
 
            'IsPlanned',
82
 
            'IsResolved',
83
 
            'IsImplemented',
84
 
 
85
 
            'IsReplaced',
86
 
            'IsOutdated',
87
 
            'IsDraft',
88
 
            'IsEditedDraft',
89
 
            'IsRoughDraft',
90
 
            ]
91
 
        for badge in wiki_badges:
92
 
            url = WIKI_BASE + badge
93
 
            if soup.fetch('a', {'href': url}):
94
 
                self.statuses.add(badge)
95
 
 
96
 
    def parseSpec(self):
97
 
        contents = urllib2.urlopen(self.url).read()
98
 
        soup = BeautifulSoup(contents)
99
 
        contentdivs = soup('div', {'class': 'content'})
100
 
        assert len(contentdivs) == 1
101
 
        contentdiv = contentdivs[0]
102
 
 
103
 
        # Specification statuses are represented by "wiki badges",
104
 
        # which are just hyperlinks to particular pages.
105
 
        self.parseStatuses(soup)
106
 
 
107
 
        # There are two styles of spec.  One of them has a table with
108
 
        # RFC-822 style headers in it.  The other has minor level headings
109
 
        # with text under the heading.
110
 
        tables = soup('table')
111
 
        # Every page has one table, for the main page layout.  So, if the page
112
 
        # has two tables, it means that it will be using the RFC-822 style.
113
 
        if len(tables) >= 2:
114
 
            # This is a spec with RFC-822 style headers.
115
 
            docinfo = tables[1]
116
 
            for row in docinfo('tr'):
117
 
                if len(row('th')) < 1 or len(row('td')) < 1:
118
 
                    continue
119
 
                key = row('th')[0].renderContents()
120
 
                if key.endswith(':'):
121
 
                    key = key[:-1]
122
 
                value = row('td')[0].renderContents()
123
 
 
124
 
                if 'Author' in key:
125
 
                    self.parseAuthorEmails(value)
126
 
        else:
127
 
            # This is a spec with minor level headings, or perhaps with no
128
 
            # headings at all.
129
 
 
130
 
            # Look for an author heading.
131
 
            author_headers = soup(text=re.compile('Author.*', re.I))
132
 
            if author_headers:
133
 
                author = author_headers[0].findNext().renderContents()
134
 
                self.parseAuthorEmails(author)
135
 
 
136
 
    @property
137
 
    def lpname(self):
138
 
        # add dashes before capitalised words
139
 
        name = re.sub(r'([^A-Z])([A-Z])', r'\1-\2', self.name)
140
 
        # lower case name
141
 
        name = name.lower()
142
 
        # remove leading dashes
143
 
        while name.startswith('-'):
144
 
            name = name[1:]
145
 
        # if name doesn't begin with an alphabetical character prefix it
146
 
        if not name[0].isalpha():
147
 
            name = 'x-' + name
148
 
        return name
149
 
 
150
 
    @property
151
 
    def lpstatus(self):
152
 
        # implemented and accepted specs => APPROVED
153
 
        for status in ['IsImplemented',
154
 
                       'IsImplementedProposal',
155
 
                       'IsAcceptedProposal']:
156
 
            if status in self.statuses:
157
 
                return SpecificationStatus.APPROVED
158
 
        # WIP => DISCUSSION
159
 
        if 'IsWorkInProgress' in self.statuses:
160
 
            return SpecificationStatus.DISCUSSION
161
 
        for status in ['IsSupercededProposal', 'IsReplaced']:
162
 
            if status in self.statuses:
163
 
                return SpecificationStatus.SUPERSEDED
164
 
        for status in ['IsExpiredProposal', 'IsOutdated']:
165
 
            if status in self.statuses:
166
 
                return SpecificationStatus.OBSOLETE
167
 
        # draft statuses:
168
 
        for status in ['IsDraftProposal',
169
 
                       'IsDraft',
170
 
                       'IsEditedDraft',
171
 
                       'IsRoughDraft']:
172
 
            if status in self.statuses:
173
 
                return SpecificationStatus.DRAFT
174
 
        # otherwise ...
175
 
        return SpecificationStatus.PENDINGREVIEW
176
 
 
177
 
    @property
178
 
    def lpgoalstatus(self):
179
 
        # implemented and accepted specs => ACCEPTED
180
 
        for status in ['IsImplemented',
181
 
                       'IsImplementedProposal',
182
 
                       'IsAcceptedProposal']:
183
 
            if status in self.statuses:
184
 
                return SpecificationGoalStatus.ACCEPTED
185
 
        # rejected or retracted => DECLINED
186
 
        for status in ['IsRetractedProposal', 'IsRejectedProposal']:
187
 
            if status in self.statuses:
188
 
                return SpecificationGoalStatus.DECLINED
189
 
 
190
 
        # otherwise ...
191
 
        return SpecificationGoalStatus.PROPOSED
192
 
 
193
 
    @property
194
 
    def lpdelivery(self):
195
 
        for status in ['IsImplemented',
196
 
                       'IsImplementedProposal']:
197
 
            if status in self.statuses:
198
 
                return SpecificationDelivery.IMPLEMENTED
199
 
        # otherwise ...
200
 
        return SpecificationDelivery.UNKNOWN
201
 
 
202
 
    def syncSpec(self):
203
 
        zope = getUtility(IProductSet).getByName('zope')
204
 
        zope_dev = getUtility(IPersonSet).getByName('zope-dev')
205
 
        # has the spec been created?
206
 
        lpspec = getUtility(ISpecificationSet).getByURL(self.url)
207
 
        if not lpspec:
208
 
            lpspec = getUtility(ISpecificationSet).new(
209
 
                name=self.lpname,
210
 
                title=self.title,
211
 
                specurl=self.url,
212
 
                summary=self.summary,
213
 
                priority=SpecificationPriority.UNDEFINED,
214
 
                status=SpecificationStatus.NEW,
215
 
                owner=zope_dev,
216
 
                product=zope)
217
 
 
218
 
        # synchronise
219
 
        lpspec.title = self.title
220
 
        lpspec.summary = self.summary
221
 
        lpspec.status = self.lpstatus
222
 
        newgoalstatus = self.lpgoalstatus
223
 
        if newgoalstatus != lpspec.goalstatus:
224
 
            if newgoalstatus == SpecificationGoalStatus.PROPOSED:
225
 
                lpspec.proposeGoal(None, zope_dev)
226
 
            elif newgoalstatus == SpecificationGoalStatus.ACCEPTED:
227
 
                lpspec.acceptBy(zope_dev)
228
 
            elif newgoalstatus == SpecificationGoalStatus.DECLINED:
229
 
                lpspec.declineBy(zope_dev)
230
 
        lpspec.delivery = self.lpdelivery
231
 
        lpspec.updateLifecycleStatus(zope_dev)
232
 
 
233
 
        # set the assignee to the first author email with an LP account
234
 
        for author in sorted(self.authors):
235
 
            person = getUtility(IPersonSet).getByEmail(author)
236
 
            if person is not None:
237
 
                lpspec.assignee = person
238
 
                break
239
 
 
240
 
 
241
 
def iter_spec_urls(url=specroot):
242
 
    contents = urllib2.urlopen(url)
243
 
    soup = BeautifulSoup(contents)
244
 
    contentdivs = soup('div', {'class': 'content'})
245
 
    assert len(contentdivs) == 1
246
 
    contentdiv = contentdivs[0]
247
 
    listofspecs = contentdiv('ul')[0]
248
 
 
249
 
    for listitem in listofspecs('li', recursive=False):
250
 
        anchors = listitem('a')
251
 
        if not anchors:
252
 
            continue
253
 
        specanchor = anchors[0]
254
 
        href = specanchor['href']
255
 
        # broken wiki link => ignore
256
 
        if 'createform?page=' in href:
257
 
            continue
258
 
        title = getTextContent(specanchor)
259
 
        summary = ''.join([getTextContent(tag)
260
 
                               for tag in specanchor.nextSiblingGenerator()])
261
 
        yield ZopeSpec(href, title, summary.strip())
262
 
 
263
 
 
264
 
def main(argv):
265
 
    execute_zcml_for_scripts()
266
 
    ztm = initZopeless()
267
 
 
268
 
    for spec in itertools.chain(*[iter_spec_urls(WIKI_BASE + page)
269
 
                                  for page in PROPOSAL_LISTS]):
270
 
        # parse extra information from the spec body
271
 
        spec.parseSpec()
272
 
        # add its metadata to LP
273
 
        print 'Synchronising', spec.name
274
 
        ztm.begin()
275
 
        try:
276
 
            spec.syncSpec()
277
 
            ztm.commit()
278
 
        except:
279
 
            ztm.abort()
280
 
            raise
281
 
 
282
 
if __name__ == '__main__':
283
 
    sys.exit(main(sys.argv))