note: this is experimental/proof-of-concept code for timing purposes
This script tries to implement very basic createrepo functionality and is based on a model of forks and pipes for distributed, inter-process communication.
repo.py
import hashlib
import multiprocessing
import os
import re
import select
import sys
def rpmhinfo(p, n):
# file
f = open(n, "r")
d = f.read()
f.close()
o = []
# lead
magic = d[0:4]
vers = d[4:6]
rtype = ((ord(d[6]) << 8) + ord(d[7]))
rarch = d[8:10]
name = d[10:76].strip("\0\t\r\n ")
osnum = d[76:78]
sigt = d[78:80]
resr = d[80:96]
o.append([magic, vers, rtype, rarch, name, osnum, sigt, resr])
if (rtype != 0):
return None
# extra
q = n[len(p):]; o[0].append(q)
s = len(d); o[0].append(s)
h = hashlib.sha256(d).hexdigest(); o[0].append(h)
# loop
i = 1; m = 96
while (d[m:m+3] == "\x8e\xad\xe8"):
# header
magic = d[m:m+3]
vers = d[m+3]
resr = d[m+4:m+8]
numb = ((ord(d[m+8]) << 24) + (ord(d[m+9]) << 16) + (ord(d[m+10]) << 8) + ord(d[m+11]))
size = ((ord(d[m+12]) << 24) + (ord(d[m+13]) << 16) + (ord(d[m+14]) << 8) + ord(d[m+15]))
offs = ((m + 16) + (numb * 16))
o.append([magic, vers, resr, numb, size, offs, m])
# index
x = 0; l = (numb * 16)
while (x < l):
tag = ((ord(d[x+m+16]) << 24) + (ord(d[x+m+17]) << 16) + (ord(d[x+m+18]) << 8) + ord(d[x+m+19]))
rtype = ((ord(d[x+m+20]) << 24) + (ord(d[x+m+21]) << 16) + (ord(d[x+m+22]) << 8) + ord(d[x+m+23]))
offs = ((ord(d[x+m+24]) << 24) + (ord(d[x+m+25]) << 16) + (ord(d[x+m+26]) << 8) + ord(d[x+m+27]))
numb = ((ord(d[x+m+28]) << 24) + (ord(d[x+m+29]) << 16) + (ord(d[x+m+30]) << 8) + ord(d[x+m+31]))
data = ""
y = (o[i][5] + offs); j = 0
while (j < numb):
if (rtype == 3):
data += (str((ord(d[y+0]) << 8) + ord(d[y+1])) + "\n")
y += 2; j += 1
elif (rtype == 4):
data += (str((ord(d[y+0]) << 24) + (ord(d[y+1]) << 16) + (ord(d[y+2]) << 8) + ord(d[y+3])) + "\n")
y += 4; j += 1
elif ((rtype == 6) or (rtype == 8) or (rtype == 9)):
while (d[y] != "\0"):
data += d[y]
y += 1
data += "\n"
y += 1; j += 1
elif (rtype == 7):
data += d[y]
y += 1; j += 1
x += 16
o[i].append([tag, rtype, offs, numb, data])
m = (o[i][4] + o[i][5])
while ((m % 8) != 0):
m += 1
i += 1
# return
return o
def safestri(inptstri):
outpstri = inptstri
outpstri = outpstri.replace("&", "&")
outpstri = outpstri.replace("<", "<")
outpstri = outpstri.replace(">", ">")
outpstri = outpstri.strip()
return outpstri
def makedict(rpmhobjc):
i = (len(rpmhobjc) - 1)
infodict = {"locn":rpmhobjc[0][8], "size":rpmhobjc[0][9], "hash":rpmhobjc[0][10], "poch":0, "pkgr":"", "urln":"", "vndr":"", "heds":rpmhobjc[i][6], "hede":(rpmhobjc[i][4]+rpmhobjc[i][5])}
for x in range(1, len(rpmhobjc)):
for y in range(7, len(rpmhobjc[x])):
#print(rpmhobjc[x][y])
if ((rpmhobjc[x][y][0] == 1000) and (rpmhobjc[x][y][1] == 6)):
infodict["name"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1022) and (rpmhobjc[x][y][1] == 6)):
infodict["arch"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1003) and (rpmhobjc[x][y][1] == 4)):
infodict["poch"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1001) and (rpmhobjc[x][y][1] == 6)):
infodict["vers"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1002) and (rpmhobjc[x][y][1] == 6)):
infodict["rels"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1004) and (rpmhobjc[x][y][1] == 9)):
infodict["summ"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1005) and (rpmhobjc[x][y][1] == 9)):
infodict["desc"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1015) and (rpmhobjc[x][y][1] == 6)):
infodict["pkgr"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1020) and (rpmhobjc[x][y][1] == 6)):
infodict["urln"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1006) and (rpmhobjc[x][y][1] == 4)):
infodict["RPMTAG_BUILDTIME"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1009) and (rpmhobjc[x][y][1] == 4)):
infodict["RPMTAG_SIZE"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1007) and (rpmhobjc[x][y][1] == 4)):
infodict["SIGTAG_PAYLOADSIZE"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1014) and (rpmhobjc[x][y][1] == 6)):
infodict["licn"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1011) and (rpmhobjc[x][y][1] == 6)):
infodict["vndr"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1016) and (rpmhobjc[x][y][1] == 9)):
infodict["grup"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1007) and (rpmhobjc[x][y][1] == 6)):
infodict["host"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1044) and (rpmhobjc[x][y][1] == 6)):
infodict["srpm"] = safestri(rpmhobjc[x][y][4])
elif ((rpmhobjc[x][y][0] == 1047) and (rpmhobjc[x][y][1] == 8)):
infodict["prvn"] = safestri(rpmhobjc[x][y][4]).split("\n")
elif ((rpmhobjc[x][y][0] == 1113) and (rpmhobjc[x][y][1] == 8)):
infodict["prvl"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1112) and (rpmhobjc[x][y][1] == 4)):
infodict["prvf"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1049) and (rpmhobjc[x][y][1] == 8)):
infodict["reqn"] = safestri(rpmhobjc[x][y][4]).split("\n")
elif ((rpmhobjc[x][y][0] == 1050) and (rpmhobjc[x][y][1] == 8)):
infodict["reql"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1048) and (rpmhobjc[x][y][1] == 4)):
infodict["reqf"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1054) and (rpmhobjc[x][y][1] == 8)):
infodict["conn"] = safestri(rpmhobjc[x][y][4]).split("\n")
elif ((rpmhobjc[x][y][0] == 1055) and (rpmhobjc[x][y][1] == 8)):
infodict["conl"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1053) and (rpmhobjc[x][y][1] == 4)):
infodict["conf"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1090) and (rpmhobjc[x][y][1] == 8)):
infodict["obsn"] = safestri(rpmhobjc[x][y][4]).split("\n")
elif ((rpmhobjc[x][y][0] == 1115) and (rpmhobjc[x][y][1] == 8)):
infodict["obsl"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1114) and (rpmhobjc[x][y][1] == 4)):
infodict["obsf"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1118) and (rpmhobjc[x][y][1] == 8)):
infodict["flep"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1117) and (rpmhobjc[x][y][1] == 8)):
infodict["flen"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1116) and (rpmhobjc[x][y][1] == 4)):
infodict["flei"] = rpmhobjc[x][y][4].split("\n")
elif ((rpmhobjc[x][y][0] == 1030) and (rpmhobjc[x][y][1] == 3)):
infodict["flet"] = rpmhobjc[x][y][4].split("\n")
return infodict
def procenvr(envrstri):
outpstri = ""
if (envrstri == ""):
return outpstri
verslist = envrstri.rsplit("-", 1)
pochlist = verslist[0].split(":", 1)
if (len(verslist) > 1):
outpstri = ((" rel=\"%s\"" % (verslist[1])) + outpstri)
if (len(pochlist) < 2):
pochlist.insert(0, "0")
outpstri = ((" epoch=\"%s\" ver=\"%s\"" % (pochlist[0], pochlist[1])) + outpstri)
return outpstri
def rpmelist(namelist, verslist, flaglist):
x = len(namelist); y = len(verslist); z = len(flaglist)
d = {"2":"LT", "4":"GT", "8":"EQ", "10":"LE", "12":"GE"}
o = ""
if (x < 1):
return o
if ((x <= y) and (y <= z)):
f = []
for w in range(0, x):
f.append(["", ""])
n = str(int(flaglist[w]) & 0xf)
try:
f[w][0] = (" flags=\"%s\"" % (d[n]))
except:
pass
n = ((int(flaglist[w]) & 0x200) + (int(flaglist[w]) & 0x100))
if (n != 0):
f[w][1] = (" pre=\"1\"")
l = []
for w in range(0, x):
if (re.match("^.*rpmlib.*$", namelist[w], re.I)):
continue
i = procenvr(verslist[w])
n = ("name=\"%s\"%s%s%s" % (namelist[w], f[w][0], i, f[w][1]))
if (n in l):
continue
o += ("\t\t\t<rpm:entry %s/>\n" % (n))
l.append(n)
return o
def makexmld(rpmhdict):
o = ""
o += ("<package type=\"rpm\">\n")
o += ("\t<name>%s</name>\n" % (rpmhdict["name"]))
o += ("\t<arch>%s</arch>\n" % (rpmhdict["arch"]))
o += ("\t<version epoch=\"%s\" ver=\"%s\" rel=\"%s\"/>\n" % (rpmhdict["poch"], rpmhdict["vers"], rpmhdict["rels"]))
o += ("\t<checksum type=\"sha256\" pkgid=\"YES\">%s</checksum>\n" % (rpmhdict["hash"]))
o += ("\t<summary>%s</summary>\n" % (rpmhdict["summ"]))
o += ("\t<description>%s</description>\n" % (rpmhdict["desc"]))
o += ("\t<packager>%s</packager>\n" % (rpmhdict["pkgr"]))
o += ("\t<url>%s</url>\n" % (rpmhdict["urln"]))
o += ("\t<time file=\"%s\" build=\"%s\"/>\n" % (int(rpmhdict["RPMTAG_BUILDTIME"]) - 10, rpmhdict["RPMTAG_BUILDTIME"]))
o += ("\t<size package=\"%s\" installed=\"%s\" archive=\"%s\"/>\n" % (rpmhdict["size"], rpmhdict["RPMTAG_SIZE"], rpmhdict["SIGTAG_PAYLOADSIZE"]))
o += ("\t<location href=\"%s\"/>\n" % (rpmhdict["locn"]))
o += ("\t<format>\n")
o += ("\t\t<rpm:license>%s</rpm:license>\n" % (rpmhdict["licn"]))
o += ("\t\t<rpm:vendor>%s</rpm:vendor>\n" % (rpmhdict["vndr"]))
o += ("\t\t<rpm:group>%s</rpm:group>\n" % (rpmhdict["grup"]))
o += ("\t\t<rpm:buildhost>%s</rpm:buildhost>\n" % (rpmhdict["host"]))
o += ("\t\t<rpm:sourcerpm>%s</rpm:sourcerpm>\n" % (rpmhdict["srpm"]))
o += ("\t\t<rpm:header-range start=\"%s\" end=\"%s\"/>\n" % (rpmhdict["heds"], rpmhdict["hede"]))
try:
t = rpmelist(rpmhdict["prvn"], rpmhdict["prvl"], rpmhdict["prvf"])
o += ("\t\t<rpm:provides>\n%s\t\t</rpm:provides>\n" % (t))
except:
pass
try:
t = rpmelist(rpmhdict["reqn"], rpmhdict["reql"], rpmhdict["reqf"])
o += ("\t\t<rpm:requires>\n%s\t\t</rpm:requires>\n" % (t))
except:
pass
try:
t = rpmelist(rpmhdict["conn"], rpmhdict["conl"], rpmhdict["conf"])
o += ("\t\t<rpm:conflicts>\n%s\t\t</rpm:conflicts>\n" % (t))
except:
pass
try:
t = rpmelist(rpmhdict["obsn"], rpmhdict["obsl"], rpmhdict["obsf"])
o += ("\t\t<rpm:obsoletes>\n%s\t\t</rpm:obsoletes>\n" % (t))
except:
pass
try:
x = len(rpmhdict["flen"])
y = len(rpmhdict["flei"])
z = len(rpmhdict["flet"])
except:
x = -1; y = -2; z = -3
if ((x == y) and (y == z)):
l = []
for u in range(0, z):
if (rpmhdict["flen"][u] == ""):
break
i = int(rpmhdict["flei"][u])
pathname = (rpmhdict["flep"][i].rstrip("/") + "/" + rpmhdict["flen"][u])
if (not re.match("^(/etc.*|.*bin/.*|/usr/lib/sendmail)$", pathname, re.I)):
continue
if (pathname in l):
continue
n = (int(rpmhdict["flet"][u]) & 0x4000)
if (n != 0):
o += ("\t\t<file type=\"dir\">%s</file>\n" % (pathname))
else:
o += ("\t\t<file>%s</file>\n" % (pathname))
l.append(pathname)
o += ("\t</format>\n")
o += ("</package>\n")
return [o]
def findfile(foldname):
x = 0
filelist = [foldname]
remolist = []
while (x < len(filelist)):
if (os.path.isdir(filelist[x])):
templist = os.listdir(filelist[x])
for tempitem in templist:
filelist.append(filelist[x] + "/" + tempitem)
remolist.append(filelist[x])
else:
if (not re.match("^.*\.rpm$", filelist[x], re.I)):
remolist.append(filelist[x])
elif (re.match("^(%s/repodata/.*|.*debuginfo.*|.*\.src\.rpm)$" % (foldname), filelist[x], re.I)):
remolist.append(filelist[x])
x += 1
for remoitem in remolist:
while (filelist.count(remoitem) > 0):
filelist.remove(remoitem)
return filelist
def main():
if (len(sys.argv) < 2):
print("Usage: %s <repo-fold>" % (sys.argv[0]))
sys.exit(0)
repofold = sys.argv[1].rstrip("/")
repolist = findfile(repofold)
repoleng = len(repolist)
print("found [%d] rpm files..." % (repoleng))
childnum = 3
destfold = "/tmp/repodata"
pipelist = []; proclist = []
try:
os.mkdir(destfold)
except:
pass
print("beg primary.xml...")
for x in range(0, childnum):
(parnpipe, chilpipe) = multiprocessing.Pipe(True)
procnumb = os.fork()
if (procnumb == 0):
parnpipe.close()
fileobjc = open("%s/primary.%d.xml" % (destfold, x), "w")
while (1):
chilpipe.send("more")
filename = chilpipe.recv()
if (not filename):
break
print("[%d] : %s" % (x, filename))
headobjc = rpmhinfo(repofold + "/", filename)
try:
headdict = makedict(headobjc)
xmldobjc = makexmld(headdict)
fileobjc.write(xmldobjc[0])
except:
sys.stderr.write("erro:" + str(sys.exc_info()) + "\n")
pass
fileobjc.close()
sys.exit(0)
else:
chilpipe.close()
pipelist.append(parnpipe)
proclist.append(procnumb)
while (len(pipelist) > 0):
(readlist, outplist, errolist) = select.select(pipelist, [], [])
for pipeitem in pipelist:
if (pipeitem not in readlist):
continue
try:
dumydata = pipeitem.recv()
if (len(repolist) > 0):
pipeitem.send(repolist[0])
repolist.pop(0)
else:
pipeitem.send("")
except:
pipelist.remove(pipeitem)
for procitem in proclist:
try:
os.waitpid(procitem, 0)
except:
pass
print("end primary.xml...")
primoutp = ""
for x in range(0, childnum):
try:
fileobjc = open("%s/primary.%d.xml" % (destfold, x), "r")
primoutp += fileobjc.read()
fileobjc.close()
os.unlink("%s/primary.%d.xml" % (destfold, x))
except:
pass
fileobjc = open("%s/primary.xml" % (destfold), "w")
fileobjc.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
fileobjc.write("<metadata xmlns=\"http://linux.duke.edu/metadata/common\" xmlns:rpm=\"http://linux.duke.edu/metadata/rpm\" packages=\"%d\">\n" % (repoleng))
fileobjc.write(primoutp)
fileobjc.write("</metadata>\n")
fileobjc.close()
fileobjc = open("%s/repomd.xml" % (destfold), "w")
fileobjc.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
fileobjc.write("<repomd xmlns=\"http://linux.duke.edu/metadata/repo\" xmlns:rpm=\"http://linux.duke.edu/metadata/rpm\">\n")
fileobjc.write("<data type=\"primary\">\n")
fileobjc.write("\t<checksum type=\"sha256\">%s</checksum>\n" % (hashlib.sha256(primoutp).hexdigest()))
fileobjc.write("\t<location href=\"repodata/primary.xml\"/>\n")
fileobjc.write("\t<size>%d</size>\n" % (len(primoutp)))
fileobjc.write("</data>\n")
fileobjc.write("</repomd>\n")
fileobjc.close()
if (__name__ == "__main__"):
main()