createrepo From Scratch Using Forks & Pipes

note: this is experimental/proof-of-concept code for timing purposes

This script tries to implement very basic createrepo functionality and is based on a model of forks and pipes for distributed, inter-process communication.

repo.py

import hashlib
import multiprocessing
import os
import re
import select
import sys

def rpmhinfo(p, n):
	# file
	f = open(n, "r")
	d = f.read()
	f.close()
	o = []
	# lead
	magic = d[0:4]
	vers = d[4:6]
	rtype = ((ord(d[6]) << 8) + ord(d[7]))
	rarch = d[8:10]
	name = d[10:76].strip("\0\t\r\n ")
	osnum = d[76:78]
	sigt = d[78:80]
	resr = d[80:96]
	o.append([magic, vers, rtype, rarch, name, osnum, sigt, resr])
	if (rtype != 0):
		return None
	# extra
	q = n[len(p):]; o[0].append(q)
	s = len(d); o[0].append(s)
	h = hashlib.sha256(d).hexdigest(); o[0].append(h)
	# loop
	i = 1; m = 96
	while (d[m:m+3] == "\x8e\xad\xe8"):
		# header
		magic = d[m:m+3]
		vers = d[m+3]
		resr = d[m+4:m+8]
		numb = ((ord(d[m+8]) << 24) + (ord(d[m+9]) << 16) + (ord(d[m+10]) << 8) + ord(d[m+11]))
		size = ((ord(d[m+12]) << 24) + (ord(d[m+13]) << 16) + (ord(d[m+14]) << 8) + ord(d[m+15]))
		offs = ((m + 16) + (numb * 16))
		o.append([magic, vers, resr, numb, size, offs, m])
		# index
		x = 0; l = (numb * 16)
		while (x < l):
			tag = ((ord(d[x+m+16]) << 24) + (ord(d[x+m+17]) << 16) + (ord(d[x+m+18]) << 8) + ord(d[x+m+19]))
			rtype = ((ord(d[x+m+20]) << 24) + (ord(d[x+m+21]) << 16) + (ord(d[x+m+22]) << 8) + ord(d[x+m+23]))
			offs = ((ord(d[x+m+24]) << 24) + (ord(d[x+m+25]) << 16) + (ord(d[x+m+26]) << 8) + ord(d[x+m+27]))
			numb = ((ord(d[x+m+28]) << 24) + (ord(d[x+m+29]) << 16) + (ord(d[x+m+30]) << 8) + ord(d[x+m+31]))
			data = ""
			y = (o[i][5] + offs); j = 0
			while (j < numb):
				if (rtype == 3):
					data += (str((ord(d[y+0]) << 8) + ord(d[y+1])) + "\n")
					y += 2; j += 1
				elif (rtype == 4):
					data += (str((ord(d[y+0]) << 24) + (ord(d[y+1]) << 16) + (ord(d[y+2]) << 8) + ord(d[y+3])) + "\n")
					y += 4; j += 1
				elif ((rtype == 6) or (rtype == 8) or (rtype == 9)):
					while (d[y] != "\0"):
						data += d[y]
						y += 1
					data += "\n"
					y += 1; j += 1
				elif (rtype == 7):
					data += d[y]
					y += 1; j += 1
			x += 16
			o[i].append([tag, rtype, offs, numb, data])
		m = (o[i][4] + o[i][5])
		while ((m % 8) != 0):
			m += 1
		i += 1
	# return
	return o

def safestri(inptstri):
	outpstri = inptstri
	outpstri = outpstri.replace("&", "&amp;")
	outpstri = outpstri.replace("<", "&lt;")
	outpstri = outpstri.replace(">", "&gt;")
	outpstri = outpstri.strip()
	
	return outpstri

def makedict(rpmhobjc):
	i = (len(rpmhobjc) - 1)
	infodict = {"locn":rpmhobjc[0][8], "size":rpmhobjc[0][9], "hash":rpmhobjc[0][10], "poch":0, "pkgr":"", "urln":"", "vndr":"", "heds":rpmhobjc[i][6], "hede":(rpmhobjc[i][4]+rpmhobjc[i][5])}
	
	for x in range(1, len(rpmhobjc)):
		for y in range(7, len(rpmhobjc[x])):
			#print(rpmhobjc[x][y])
			if ((rpmhobjc[x][y][0] == 1000) and (rpmhobjc[x][y][1] == 6)):
				infodict["name"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1022) and (rpmhobjc[x][y][1] == 6)):
				infodict["arch"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1003) and (rpmhobjc[x][y][1] == 4)):
				infodict["poch"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1001) and (rpmhobjc[x][y][1] == 6)):
				infodict["vers"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1002) and (rpmhobjc[x][y][1] == 6)):
				infodict["rels"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1004) and (rpmhobjc[x][y][1] == 9)):
				infodict["summ"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1005) and (rpmhobjc[x][y][1] == 9)):
				infodict["desc"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1015) and (rpmhobjc[x][y][1] == 6)):
				infodict["pkgr"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1020) and (rpmhobjc[x][y][1] == 6)):
				infodict["urln"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1006) and (rpmhobjc[x][y][1] == 4)):
				infodict["RPMTAG_BUILDTIME"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1009) and (rpmhobjc[x][y][1] == 4)):
				infodict["RPMTAG_SIZE"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1007) and (rpmhobjc[x][y][1] == 4)):
				infodict["SIGTAG_PAYLOADSIZE"] = safestri(rpmhobjc[x][y][4])
			
			elif ((rpmhobjc[x][y][0] == 1014) and (rpmhobjc[x][y][1] == 6)):
				infodict["licn"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1011) and (rpmhobjc[x][y][1] == 6)):
				infodict["vndr"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1016) and (rpmhobjc[x][y][1] == 9)):
				infodict["grup"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1007) and (rpmhobjc[x][y][1] == 6)):
				infodict["host"] = safestri(rpmhobjc[x][y][4])
			elif ((rpmhobjc[x][y][0] == 1044) and (rpmhobjc[x][y][1] == 6)):
				infodict["srpm"] = safestri(rpmhobjc[x][y][4])
			
			elif ((rpmhobjc[x][y][0] == 1047) and (rpmhobjc[x][y][1] == 8)):
				infodict["prvn"] = safestri(rpmhobjc[x][y][4]).split("\n")
			elif ((rpmhobjc[x][y][0] == 1113) and (rpmhobjc[x][y][1] == 8)):
				infodict["prvl"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1112) and (rpmhobjc[x][y][1] == 4)):
				infodict["prvf"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1049) and (rpmhobjc[x][y][1] == 8)):
				infodict["reqn"] = safestri(rpmhobjc[x][y][4]).split("\n")
			elif ((rpmhobjc[x][y][0] == 1050) and (rpmhobjc[x][y][1] == 8)):
				infodict["reql"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1048) and (rpmhobjc[x][y][1] == 4)):
				infodict["reqf"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1054) and (rpmhobjc[x][y][1] == 8)):
				infodict["conn"] = safestri(rpmhobjc[x][y][4]).split("\n")
			elif ((rpmhobjc[x][y][0] == 1055) and (rpmhobjc[x][y][1] == 8)):
				infodict["conl"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1053) and (rpmhobjc[x][y][1] == 4)):
				infodict["conf"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1090) and (rpmhobjc[x][y][1] == 8)):
				infodict["obsn"] = safestri(rpmhobjc[x][y][4]).split("\n")
			elif ((rpmhobjc[x][y][0] == 1115) and (rpmhobjc[x][y][1] == 8)):
				infodict["obsl"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1114) and (rpmhobjc[x][y][1] == 4)):
				infodict["obsf"] = rpmhobjc[x][y][4].split("\n")
			
			elif ((rpmhobjc[x][y][0] == 1118) and (rpmhobjc[x][y][1] == 8)):
				infodict["flep"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1117) and (rpmhobjc[x][y][1] == 8)):
				infodict["flen"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1116) and (rpmhobjc[x][y][1] == 4)):
				infodict["flei"] = rpmhobjc[x][y][4].split("\n")
			elif ((rpmhobjc[x][y][0] == 1030) and (rpmhobjc[x][y][1] == 3)):
				infodict["flet"] = rpmhobjc[x][y][4].split("\n")
	
	return infodict

def procenvr(envrstri):
	outpstri = ""
	
	if (envrstri == ""):
		return outpstri
	
	verslist = envrstri.rsplit("-", 1)
	pochlist = verslist[0].split(":", 1)
	
	if (len(verslist) > 1):
		outpstri = ((" rel=\"%s\"" % (verslist[1])) + outpstri)
	
	if (len(pochlist) < 2):
		pochlist.insert(0, "0")
	
	outpstri = ((" epoch=\"%s\" ver=\"%s\"" % (pochlist[0], pochlist[1])) + outpstri)
	
	return outpstri

def rpmelist(namelist, verslist, flaglist):
	x = len(namelist); y = len(verslist); z = len(flaglist)
	d = {"2":"LT", "4":"GT", "8":"EQ", "10":"LE", "12":"GE"}
	o = ""
	
	if (x < 1):
		return o
	
	if ((x <= y) and (y <= z)):
		f = []
		for w in range(0, x):
			f.append(["", ""])
			
			n = str(int(flaglist[w]) & 0xf)
			try:
				f[w][0] = (" flags=\"%s\"" % (d[n]))
			except:
				pass
			
			n = ((int(flaglist[w]) & 0x200) + (int(flaglist[w]) & 0x100))
			if (n != 0):
				f[w][1] = (" pre=\"1\"")
		
		l = []
		for w in range(0, x):
			if (re.match("^.*rpmlib.*$", namelist[w], re.I)):
				continue
			
			i = procenvr(verslist[w])
			n = ("name=\"%s\"%s%s%s" % (namelist[w], f[w][0], i, f[w][1]))
			
			if (n in l):
				continue
			
			o += ("\t\t\t<rpm:entry %s/>\n" % (n))
			l.append(n)
	
	return o

def makexmld(rpmhdict):
	o = ""
	
	o += ("<package type=\"rpm\">\n")
	
	o += ("\t<name>%s</name>\n" % (rpmhdict["name"]))
	o += ("\t<arch>%s</arch>\n" % (rpmhdict["arch"]))
	o += ("\t<version epoch=\"%s\" ver=\"%s\" rel=\"%s\"/>\n" % (rpmhdict["poch"], rpmhdict["vers"], rpmhdict["rels"]))
	o += ("\t<checksum type=\"sha256\" pkgid=\"YES\">%s</checksum>\n" % (rpmhdict["hash"]))
	o += ("\t<summary>%s</summary>\n" % (rpmhdict["summ"]))
	o += ("\t<description>%s</description>\n" % (rpmhdict["desc"]))
	o += ("\t<packager>%s</packager>\n" % (rpmhdict["pkgr"]))
	o += ("\t<url>%s</url>\n" % (rpmhdict["urln"]))
	o += ("\t<time file=\"%s\" build=\"%s\"/>\n" % (int(rpmhdict["RPMTAG_BUILDTIME"]) - 10, rpmhdict["RPMTAG_BUILDTIME"]))
	o += ("\t<size package=\"%s\" installed=\"%s\" archive=\"%s\"/>\n" % (rpmhdict["size"], rpmhdict["RPMTAG_SIZE"], rpmhdict["SIGTAG_PAYLOADSIZE"]))
	o += ("\t<location href=\"%s\"/>\n" % (rpmhdict["locn"]))
	
	o += ("\t<format>\n")
	o += ("\t\t<rpm:license>%s</rpm:license>\n" % (rpmhdict["licn"]))
	o += ("\t\t<rpm:vendor>%s</rpm:vendor>\n" % (rpmhdict["vndr"]))
	o += ("\t\t<rpm:group>%s</rpm:group>\n" % (rpmhdict["grup"]))
	o += ("\t\t<rpm:buildhost>%s</rpm:buildhost>\n" % (rpmhdict["host"]))
	o += ("\t\t<rpm:sourcerpm>%s</rpm:sourcerpm>\n" % (rpmhdict["srpm"]))
	o += ("\t\t<rpm:header-range start=\"%s\" end=\"%s\"/>\n" % (rpmhdict["heds"], rpmhdict["hede"]))
	
	try:
		t = rpmelist(rpmhdict["prvn"], rpmhdict["prvl"], rpmhdict["prvf"])
		o += ("\t\t<rpm:provides>\n%s\t\t</rpm:provides>\n" % (t))
	except:
		pass
	
	try:
		t = rpmelist(rpmhdict["reqn"], rpmhdict["reql"], rpmhdict["reqf"])
		o += ("\t\t<rpm:requires>\n%s\t\t</rpm:requires>\n" % (t))
	except:
		pass
	
	try:
		t = rpmelist(rpmhdict["conn"], rpmhdict["conl"], rpmhdict["conf"])
		o += ("\t\t<rpm:conflicts>\n%s\t\t</rpm:conflicts>\n" % (t))
	except:
		pass
	
	try:
		t = rpmelist(rpmhdict["obsn"], rpmhdict["obsl"], rpmhdict["obsf"])
		o += ("\t\t<rpm:obsoletes>\n%s\t\t</rpm:obsoletes>\n" % (t))
	except:
		pass
	
	try:
		x = len(rpmhdict["flen"])
		y = len(rpmhdict["flei"])
		z = len(rpmhdict["flet"])
	except:
		x = -1; y = -2; z = -3
	
	if ((x == y) and (y == z)):
		l = []
		for u in range(0, z):
			if (rpmhdict["flen"][u] == ""):
				break
			
			i = int(rpmhdict["flei"][u])
			pathname = (rpmhdict["flep"][i].rstrip("/") + "/" + rpmhdict["flen"][u])
			if (not re.match("^(/etc.*|.*bin/.*|/usr/lib/sendmail)$", pathname, re.I)):
				continue
			
			if (pathname in l):
				continue
			
			n = (int(rpmhdict["flet"][u]) & 0x4000)
			if (n != 0):
				o += ("\t\t<file type=\"dir\">%s</file>\n" % (pathname))
			
			else:
				o += ("\t\t<file>%s</file>\n" % (pathname))
			
			l.append(pathname)
	
	o += ("\t</format>\n")
	
	o += ("</package>\n")
	
	return [o]

def findfile(foldname):
	x = 0
	filelist = [foldname]
	remolist = []
	
	while (x < len(filelist)):
		if (os.path.isdir(filelist[x])):
			templist = os.listdir(filelist[x])
			
			for tempitem in templist:
				filelist.append(filelist[x] + "/" + tempitem)
			
			remolist.append(filelist[x])
		
		else:
			if (not re.match("^.*\.rpm$", filelist[x], re.I)):
				remolist.append(filelist[x])
			
			elif (re.match("^(%s/repodata/.*|.*debuginfo.*|.*\.src\.rpm)$" % (foldname), filelist[x], re.I)):
				remolist.append(filelist[x])
		
		x += 1
	
	for remoitem in remolist:
		while (filelist.count(remoitem) > 0):
			filelist.remove(remoitem)
	
	return filelist

def main():
	if (len(sys.argv) < 2):
		print("Usage: %s <repo-fold>" % (sys.argv[0]))
		sys.exit(0)
	
	repofold = sys.argv[1].rstrip("/")
	repolist = findfile(repofold)
	repoleng = len(repolist)
	
	print("found [%d] rpm files..." % (repoleng))
	
	childnum = 3
	destfold = "/tmp/repodata"
	pipelist = []; proclist = []
	
	try:
		os.mkdir(destfold)
	except:
		pass
	
	print("beg primary.xml...")
	
	for x in range(0, childnum):
		(parnpipe, chilpipe) = multiprocessing.Pipe(True)
		procnumb = os.fork()
		
		if (procnumb == 0):
			parnpipe.close()
			fileobjc = open("%s/primary.%d.xml" % (destfold, x), "w")
			
			while (1):
				chilpipe.send("more")
				filename = chilpipe.recv()
				
				if (not filename):
					break
				
				print("[%d] : %s" % (x, filename))
				headobjc = rpmhinfo(repofold + "/", filename)
				
				try:
					headdict = makedict(headobjc)
					xmldobjc = makexmld(headdict)
					fileobjc.write(xmldobjc[0])
				except:
					sys.stderr.write("erro:" + str(sys.exc_info()) + "\n")
					pass
			
			fileobjc.close()
			sys.exit(0)
		
		else:
			chilpipe.close()
			pipelist.append(parnpipe)
			proclist.append(procnumb)
	
	while (len(pipelist) > 0):
		(readlist, outplist, errolist) = select.select(pipelist, [], [])
		
		for pipeitem in pipelist:
			if (pipeitem not in readlist):
				continue
			
			try:
				dumydata = pipeitem.recv()
				
				if (len(repolist) > 0):
					pipeitem.send(repolist[0])
					repolist.pop(0)
				else:
					pipeitem.send("")
			
			except:
				pipelist.remove(pipeitem)
	
	for procitem in proclist:
		try:
			os.waitpid(procitem, 0)
		except:
			pass
	
	print("end primary.xml...")
	
	primoutp = ""
	for x in range(0, childnum):
		try:
			fileobjc = open("%s/primary.%d.xml" % (destfold, x), "r")
			primoutp += fileobjc.read()
			fileobjc.close()
			os.unlink("%s/primary.%d.xml" % (destfold, x))
		except:
			pass
	
	fileobjc = open("%s/primary.xml" % (destfold), "w")
	fileobjc.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
	fileobjc.write("<metadata xmlns=\"http://linux.duke.edu/metadata/common\" xmlns:rpm=\"http://linux.duke.edu/metadata/rpm\" packages=\"%d\">\n" % (repoleng))
	fileobjc.write(primoutp)
	fileobjc.write("</metadata>\n")
	fileobjc.close()
	
	fileobjc = open("%s/repomd.xml" % (destfold), "w")
	fileobjc.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
	fileobjc.write("<repomd xmlns=\"http://linux.duke.edu/metadata/repo\" xmlns:rpm=\"http://linux.duke.edu/metadata/rpm\">\n")
	fileobjc.write("<data type=\"primary\">\n")
	fileobjc.write("\t<checksum type=\"sha256\">%s</checksum>\n" % (hashlib.sha256(primoutp).hexdigest()))
	fileobjc.write("\t<location href=\"repodata/primary.xml\"/>\n")
	fileobjc.write("\t<size>%d</size>\n" % (len(primoutp)))
	fileobjc.write("</data>\n")
	fileobjc.write("</repomd>\n")
	fileobjc.close()

if (__name__ == "__main__"):
	main()

Leave a comment