Wikipedia Monitoring IRC Bot

Sep 03 06:21:19 <wikimon> [ http://zenit.senecac.on.ca/wiki/index.php?title=CDOT& ] was changed by [ ShaneStrauss ] on [ 06:18, 3 September 2011 ]
Sep 03 09:11:19 <wikimon> [ http://zenit.senecac.on.ca/wiki/index.php?title=CDOT& ] was changed by [ Fossjon ] on [ 09:08, 3 September 2011 ]

#!/usr/bin/python

import os
import re
import select
import socket
import sys
import time
import urllib2

def listfilt(listobjc, filtstri, onceonly=0):
	outpflag = 0
	outplist = []
	
	for listitem in listobjc:
		if (re.match("^" + filtstri + "$", listitem)):
			outplist.append(listitem)
			
			if (onceonly == 1):
				outpflag = 1
		
		elif (outpflag == 1):
			outplist.append(listitem)
	
	return outplist

def getspage(urlnstri):
	headitem = ""
	
	try:
		websobjc = urllib2.urlopen(urlnstri, None, 10)
		dataread = websobjc.read().replace("\t", "").replace("\r", "").replace("\n", "")
		
		listlist = dataread.replace("<li", "\n<li").replace("</li", "</li\n").split("\n")
		listlist = listfilt(listlist, "<li.*checked.*")
		
		linklist = re.sub("(<a [^>]+>[^<]+</a>)", "\n\\1\n", listlist[0]).split("\n")
		linklist = listfilt(linklist, "<a [^>]+>[^<]+</a>.*")
		
		pagestri = re.sub("action=history[&]*", "", urlnstri).strip()
		datestri = re.sub("<[^>]+>", "", linklist[1]).strip()
		namestri = re.sub("<[^>]+>", "", linklist[2]).strip()
		
		headitem = ("[ %s ] was changed by [ %s ] on [ %s ]" % (pagestri, namestri, datestri))
	
	except KeyboardInterrupt:
		sys.exit(0)
	
	except:
		pass
	
	return headitem

sendline_last = 0; sendline_list = []

def sendline(circobjc, sendstri):
	global sendline_last
	global sendline_list
	
	prestime = time.time()
	tempstri = sendstri.strip()
	
	if (tempstri != ""):
		sendline_list.append(tempstri)
	
	if (((prestime - sendline_last) >= 1) and (len(sendline_list) > 0)):
		print("[SEND] %s" % (sendline_list[0]))
		circobjc.send(sendline_list[0] + "\r\n")
		
		sendline_list.pop(0)
		sendline_last = prestime

readline_data = ""

def readline(circobjc):
	global readline_data
	
	outpstri = ""
	(readlist, outplist, errolist) = select.select([circobjc], [], [], 0)
	
	if (circobjc in readlist):
		tempstri = circobjc.recv(2**20)
		
		if (not tempstri):
			readline_data = ""
			return None
		
		readline_data += tempstri
	
	try:
		newlindx = readline_data.index("\n")
	
	except:
		newlindx = -1
	
	if (newlindx != -1):
		outpstri = readline_data[:newlindx].strip()
		newlindx += 1
		readline_data = readline_data[newlindx:]
	
	return outpstri

procirco_last = 0

def procirco(circobjc, channame, lastmesg):
	global procirco_last
	
	prestime = time.time()
	
	if ((prestime - procirco_last) >= 60):
		sendline(circobjc, "JOIN " + channame)
		procirco_last = prestime
	
	regxobjc = re.match("^PING (.*)$", lastmesg)
	
	if (regxobjc):
		sendline(circobjc, "PONG " + regxobjc.group(1))

def newcirco(hostname, nickname):
	circobjc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
	
	circobjc.connect((hostname, 6667))
	sendline(circobjc, "USER Alex * * : Bob")
	sendline(circobjc, "NICK %s" % (nickname))
	
	return circobjc

def main():
	if (len(sys.argv) < 4):
		print("Usage: %s <host> <nick> <chan>" % (sys.argv[0]))
		sys.exit(0)
	
	hostname = sys.argv[1]; nickname = sys.argv[2]; channame = sys.argv[3]
	pagelist = []; lastlist = []; lasttime = 0
	circobjc = None; circline = ""; circflag = 0
	
	while (1):
		circflag = 0
		prestime = time.time()
		
		if (circobjc == None):
			circobjc = newcirco(hostname, nickname)
		
		else:
			circline = readline(circobjc)
			
			if (circline == None):
				circobjc = None
			
			else:
				circflag = 1
				
				if (circline != ""):
					print("[RECV] %s" % (circline))
				
				procirco(circobjc, channame, circline)
				
				regxobjc = re.match("^:([^!]+)![^ ]+ privmsg [^ ]+ :%s add (.*)$" % (nickname), circline, re.I)
				
				if (regxobjc):
					if (regxobjc.group(2) not in pagelist):
						pagelist.append(regxobjc.group(2))
						lastlist.append("")
						lasttime = 0
				
				regxobjc = re.match("^:([^!]+)![^ ]+ privmsg [^ ]+ :%s del (.*)$" % (nickname), circline, re.I)
				
				if (regxobjc):
					for x in range(len(pagelist) - 1, -1, -1):
						try:
							if (re.match(regxobjc.group(2), pagelist[x], re.I)):
								sendline(circobjc, "PRIVMSG %s :deleting [ %s ]" % (regxobjc.group(1), pagelist[x]))
								lastlist.pop(x)
								pagelist.pop(x)
						
						except:
							pass
				
				regxobjc = re.match("^:([^!]+)![^ ]+ privmsg [^ ]+ :%s out.*$" % (nickname), circline, re.I)
				
				if (regxobjc):
					for listitem in lastlist:
						sendline(circobjc, "PRIVMSG %s :%s" % (regxobjc.group(1), listitem))
			
			if ((prestime - lasttime) >= (5 * 60)):
				for x in range(0, len(pagelist)):
					tempoutp = getspage(pagelist[x])
					
					if ((tempoutp != "") and (tempoutp != lastlist[x])):
						if (lastlist[x] != ""):
							sendline(circobjc, "PRIVMSG %s :%s" % (channame, tempoutp))
						
						lastlist[x] = tempoutp
				
				lasttime = prestime
		
		sendline(circobjc, "")
		
		if (circflag == 0):
			time.sleep(1)

if (__name__ == "__main__"):
	main()

Leave a comment