User:Xenobot/communes.py
Appearance
#!/usr/bin/env python2.5
#version 0.6
import codecs
import re
import wikitools
import settings
wiki = wikitools.Wiki()
wiki.login(settings.username, settings.password)
print 'logged in now'
output_skip = codecs.open('output-commune.txt', 'a', 'utf-8')
f = open('may3morb.txt', 'r')
page_list = f.read().split('\n')
f.close()
print 'retrieved list of pages'
for name in page_list:
title = unicode(name, 'utf-8')
page = wikitools.Page(wiki, '%s' % title, followRedir=False)
if not page.exists:
continue
article_text = page.getWikiText()
if re.search(r'www.insee.fr', article_text, re.I|re.U):
if re.search(r'\|[ ]*insee[ ]*=[ ]*(\w\w)(\w\w\w)', article_text, re.I|re.U):
insee = re.search(r'\|[ ]*insee[ ]*=[ ]*(\w\w)(\w\w\w)', article_text, re.I|re.U)
if not re.search(r'''
==[ ]*References[ ]*==
.*based on the article.*
.*asso.fr.*
.*insee.fr.*
.*www.ign.fr.*''', article_text, re.I|re.U):
print 'couldn\'t find ref header; skipping %s' % title
output_skip.write(title + ' -- no ref header found\n')
output_skip.flush()
continue
new_text = re.compile(r'''
==[ ]*References[ ]*==
.*based on the article.*
.*asso.fr.*
.*insee.fr.*
.*www.ign.fr.*''', re.I|re.U).sub('''
== References ==
* [http://www.maires56.asso.fr Mayors of Morbihan Association] {{fr icon}}
* [http://www.insee.fr/fr/methodes/nomenclatures/cog/fichecommunale.asp?codedep=%s&codecom=%s INSEE commune file]''' % (insee.group(1), insee.group(2)), article_text)
page.edit(new_text, summary=settings.editsumm, bot=1)
print 'Editing %s' % title
else:
print 'couldn\'t find insee; skipping %s' % title
output_skip.write(title + ' -- no insee param\n')
output_skip.flush()
continue
else:
print 'Skipping %s' % title
output_skip.write(title + ' -- no insee.fr url\n')
output_skip.flush()
output_skip.close()