note: this is experimental/proof-of-concept code for timing purposes
This script tries to implement very basic createrepo functionality and is based on a model of forks and pipes for distributed, inter-process communication.
repo.py
import hashlib import multiprocessing import os import re import select import sys def rpmhinfo(p, n): # file f = open(n, "r") d = f.read() f.close() o = [] # lead magic = d[0:4] vers = d[4:6] rtype = ((ord(d[6]) << 8) + ord(d[7])) rarch = d[8:10] name = d[10:76].strip("\0\t\r\n ") osnum = d[76:78] sigt = d[78:80] resr = d[80:96] o.append([magic, vers, rtype, rarch, name, osnum, sigt, resr]) if (rtype != 0): return None # extra q = n[len(p):]; o[0].append(q) s = len(d); o[0].append(s) h = hashlib.sha256(d).hexdigest(); o[0].append(h) # loop i = 1; m = 96 while (d[m:m+3] == "\x8e\xad\xe8"): # header magic = d[m:m+3] vers = d[m+3] resr = d[m+4:m+8] numb = ((ord(d[m+8]) << 24) + (ord(d[m+9]) << 16) + (ord(d[m+10]) << 8) + ord(d[m+11])) size = ((ord(d[m+12]) << 24) + (ord(d[m+13]) << 16) + (ord(d[m+14]) << 8) + ord(d[m+15])) offs = ((m + 16) + (numb * 16)) o.append([magic, vers, resr, numb, size, offs, m]) # index x = 0; l = (numb * 16) while (x < l): tag = ((ord(d[x+m+16]) << 24) + (ord(d[x+m+17]) << 16) + (ord(d[x+m+18]) << 8) + ord(d[x+m+19])) rtype = ((ord(d[x+m+20]) << 24) + (ord(d[x+m+21]) << 16) + (ord(d[x+m+22]) << 8) + ord(d[x+m+23])) offs = ((ord(d[x+m+24]) << 24) + (ord(d[x+m+25]) << 16) + (ord(d[x+m+26]) << 8) + ord(d[x+m+27])) numb = ((ord(d[x+m+28]) << 24) + (ord(d[x+m+29]) << 16) + (ord(d[x+m+30]) << 8) + ord(d[x+m+31])) data = "" y = (o[i][5] + offs); j = 0 while (j < numb): if (rtype == 3): data += (str((ord(d[y+0]) << 8) + ord(d[y+1])) + "\n") y += 2; j += 1 elif (rtype == 4): data += (str((ord(d[y+0]) << 24) + (ord(d[y+1]) << 16) + (ord(d[y+2]) << 8) + ord(d[y+3])) + "\n") y += 4; j += 1 elif ((rtype == 6) or (rtype == 8) or (rtype == 9)): while (d[y] != "\0"): data += d[y] y += 1 data += "\n" y += 1; j += 1 elif (rtype == 7): data += d[y] y += 1; j += 1 x += 16 o[i].append([tag, rtype, offs, numb, data]) m = (o[i][4] + o[i][5]) while ((m % 8) != 0): m += 1 i += 1 # return return o def safestri(inptstri): outpstri = inptstri outpstri = outpstri.replace("&", "&") outpstri = outpstri.replace("<", "<") outpstri = outpstri.replace(">", ">") outpstri = outpstri.strip() return outpstri def makedict(rpmhobjc): i = (len(rpmhobjc) - 1) infodict = {"locn":rpmhobjc[0][8], "size":rpmhobjc[0][9], "hash":rpmhobjc[0][10], "poch":0, "pkgr":"", "urln":"", "vndr":"", "heds":rpmhobjc[i][6], "hede":(rpmhobjc[i][4]+rpmhobjc[i][5])} for x in range(1, len(rpmhobjc)): for y in range(7, len(rpmhobjc[x])): #print(rpmhobjc[x][y]) if ((rpmhobjc[x][y][0] == 1000) and (rpmhobjc[x][y][1] == 6)): infodict["name"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1022) and (rpmhobjc[x][y][1] == 6)): infodict["arch"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1003) and (rpmhobjc[x][y][1] == 4)): infodict["poch"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1001) and (rpmhobjc[x][y][1] == 6)): infodict["vers"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1002) and (rpmhobjc[x][y][1] == 6)): infodict["rels"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1004) and (rpmhobjc[x][y][1] == 9)): infodict["summ"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1005) and (rpmhobjc[x][y][1] == 9)): infodict["desc"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1015) and (rpmhobjc[x][y][1] == 6)): infodict["pkgr"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1020) and (rpmhobjc[x][y][1] == 6)): infodict["urln"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1006) and (rpmhobjc[x][y][1] == 4)): infodict["RPMTAG_BUILDTIME"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1009) and (rpmhobjc[x][y][1] == 4)): infodict["RPMTAG_SIZE"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1007) and (rpmhobjc[x][y][1] == 4)): infodict["SIGTAG_PAYLOADSIZE"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1014) and (rpmhobjc[x][y][1] == 6)): infodict["licn"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1011) and (rpmhobjc[x][y][1] == 6)): infodict["vndr"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1016) and (rpmhobjc[x][y][1] == 9)): infodict["grup"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1007) and (rpmhobjc[x][y][1] == 6)): infodict["host"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1044) and (rpmhobjc[x][y][1] == 6)): infodict["srpm"] = safestri(rpmhobjc[x][y][4]) elif ((rpmhobjc[x][y][0] == 1047) and (rpmhobjc[x][y][1] == 8)): infodict["prvn"] = safestri(rpmhobjc[x][y][4]).split("\n") elif ((rpmhobjc[x][y][0] == 1113) and (rpmhobjc[x][y][1] == 8)): infodict["prvl"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1112) and (rpmhobjc[x][y][1] == 4)): infodict["prvf"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1049) and (rpmhobjc[x][y][1] == 8)): infodict["reqn"] = safestri(rpmhobjc[x][y][4]).split("\n") elif ((rpmhobjc[x][y][0] == 1050) and (rpmhobjc[x][y][1] == 8)): infodict["reql"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1048) and (rpmhobjc[x][y][1] == 4)): infodict["reqf"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1054) and (rpmhobjc[x][y][1] == 8)): infodict["conn"] = safestri(rpmhobjc[x][y][4]).split("\n") elif ((rpmhobjc[x][y][0] == 1055) and (rpmhobjc[x][y][1] == 8)): infodict["conl"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1053) and (rpmhobjc[x][y][1] == 4)): infodict["conf"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1090) and (rpmhobjc[x][y][1] == 8)): infodict["obsn"] = safestri(rpmhobjc[x][y][4]).split("\n") elif ((rpmhobjc[x][y][0] == 1115) and (rpmhobjc[x][y][1] == 8)): infodict["obsl"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1114) and (rpmhobjc[x][y][1] == 4)): infodict["obsf"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1118) and (rpmhobjc[x][y][1] == 8)): infodict["flep"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1117) and (rpmhobjc[x][y][1] == 8)): infodict["flen"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1116) and (rpmhobjc[x][y][1] == 4)): infodict["flei"] = rpmhobjc[x][y][4].split("\n") elif ((rpmhobjc[x][y][0] == 1030) and (rpmhobjc[x][y][1] == 3)): infodict["flet"] = rpmhobjc[x][y][4].split("\n") return infodict def procenvr(envrstri): outpstri = "" if (envrstri == ""): return outpstri verslist = envrstri.rsplit("-", 1) pochlist = verslist[0].split(":", 1) if (len(verslist) > 1): outpstri = ((" rel=\"%s\"" % (verslist[1])) + outpstri) if (len(pochlist) < 2): pochlist.insert(0, "0") outpstri = ((" epoch=\"%s\" ver=\"%s\"" % (pochlist[0], pochlist[1])) + outpstri) return outpstri def rpmelist(namelist, verslist, flaglist): x = len(namelist); y = len(verslist); z = len(flaglist) d = {"2":"LT", "4":"GT", "8":"EQ", "10":"LE", "12":"GE"} o = "" if (x < 1): return o if ((x <= y) and (y <= z)): f = [] for w in range(0, x): f.append(["", ""]) n = str(int(flaglist[w]) & 0xf) try: f[w][0] = (" flags=\"%s\"" % (d[n])) except: pass n = ((int(flaglist[w]) & 0x200) + (int(flaglist[w]) & 0x100)) if (n != 0): f[w][1] = (" pre=\"1\"") l = [] for w in range(0, x): if (re.match("^.*rpmlib.*$", namelist[w], re.I)): continue i = procenvr(verslist[w]) n = ("name=\"%s\"%s%s%s" % (namelist[w], f[w][0], i, f[w][1])) if (n in l): continue o += ("\t\t\t<rpm:entry %s/>\n" % (n)) l.append(n) return o def makexmld(rpmhdict): o = "" o += ("<package type=\"rpm\">\n") o += ("\t<name>%s</name>\n" % (rpmhdict["name"])) o += ("\t<arch>%s</arch>\n" % (rpmhdict["arch"])) o += ("\t<version epoch=\"%s\" ver=\"%s\" rel=\"%s\"/>\n" % (rpmhdict["poch"], rpmhdict["vers"], rpmhdict["rels"])) o += ("\t<checksum type=\"sha256\" pkgid=\"YES\">%s</checksum>\n" % (rpmhdict["hash"])) o += ("\t<summary>%s</summary>\n" % (rpmhdict["summ"])) o += ("\t<description>%s</description>\n" % (rpmhdict["desc"])) o += ("\t<packager>%s</packager>\n" % (rpmhdict["pkgr"])) o += ("\t<url>%s</url>\n" % (rpmhdict["urln"])) o += ("\t<time file=\"%s\" build=\"%s\"/>\n" % (int(rpmhdict["RPMTAG_BUILDTIME"]) - 10, rpmhdict["RPMTAG_BUILDTIME"])) o += ("\t<size package=\"%s\" installed=\"%s\" archive=\"%s\"/>\n" % (rpmhdict["size"], rpmhdict["RPMTAG_SIZE"], rpmhdict["SIGTAG_PAYLOADSIZE"])) o += ("\t<location href=\"%s\"/>\n" % (rpmhdict["locn"])) o += ("\t<format>\n") o += ("\t\t<rpm:license>%s</rpm:license>\n" % (rpmhdict["licn"])) o += ("\t\t<rpm:vendor>%s</rpm:vendor>\n" % (rpmhdict["vndr"])) o += ("\t\t<rpm:group>%s</rpm:group>\n" % (rpmhdict["grup"])) o += ("\t\t<rpm:buildhost>%s</rpm:buildhost>\n" % (rpmhdict["host"])) o += ("\t\t<rpm:sourcerpm>%s</rpm:sourcerpm>\n" % (rpmhdict["srpm"])) o += ("\t\t<rpm:header-range start=\"%s\" end=\"%s\"/>\n" % (rpmhdict["heds"], rpmhdict["hede"])) try: t = rpmelist(rpmhdict["prvn"], rpmhdict["prvl"], rpmhdict["prvf"]) o += ("\t\t<rpm:provides>\n%s\t\t</rpm:provides>\n" % (t)) except: pass try: t = rpmelist(rpmhdict["reqn"], rpmhdict["reql"], rpmhdict["reqf"]) o += ("\t\t<rpm:requires>\n%s\t\t</rpm:requires>\n" % (t)) except: pass try: t = rpmelist(rpmhdict["conn"], rpmhdict["conl"], rpmhdict["conf"]) o += ("\t\t<rpm:conflicts>\n%s\t\t</rpm:conflicts>\n" % (t)) except: pass try: t = rpmelist(rpmhdict["obsn"], rpmhdict["obsl"], rpmhdict["obsf"]) o += ("\t\t<rpm:obsoletes>\n%s\t\t</rpm:obsoletes>\n" % (t)) except: pass try: x = len(rpmhdict["flen"]) y = len(rpmhdict["flei"]) z = len(rpmhdict["flet"]) except: x = -1; y = -2; z = -3 if ((x == y) and (y == z)): l = [] for u in range(0, z): if (rpmhdict["flen"][u] == ""): break i = int(rpmhdict["flei"][u]) pathname = (rpmhdict["flep"][i].rstrip("/") + "/" + rpmhdict["flen"][u]) if (not re.match("^(/etc.*|.*bin/.*|/usr/lib/sendmail)$", pathname, re.I)): continue if (pathname in l): continue n = (int(rpmhdict["flet"][u]) & 0x4000) if (n != 0): o += ("\t\t<file type=\"dir\">%s</file>\n" % (pathname)) else: o += ("\t\t<file>%s</file>\n" % (pathname)) l.append(pathname) o += ("\t</format>\n") o += ("</package>\n") return [o] def findfile(foldname): x = 0 filelist = [foldname] remolist = [] while (x < len(filelist)): if (os.path.isdir(filelist[x])): templist = os.listdir(filelist[x]) for tempitem in templist: filelist.append(filelist[x] + "/" + tempitem) remolist.append(filelist[x]) else: if (not re.match("^.*\.rpm$", filelist[x], re.I)): remolist.append(filelist[x]) elif (re.match("^(%s/repodata/.*|.*debuginfo.*|.*\.src\.rpm)$" % (foldname), filelist[x], re.I)): remolist.append(filelist[x]) x += 1 for remoitem in remolist: while (filelist.count(remoitem) > 0): filelist.remove(remoitem) return filelist def main(): if (len(sys.argv) < 2): print("Usage: %s <repo-fold>" % (sys.argv[0])) sys.exit(0) repofold = sys.argv[1].rstrip("/") repolist = findfile(repofold) repoleng = len(repolist) print("found [%d] rpm files..." % (repoleng)) childnum = 3 destfold = "/tmp/repodata" pipelist = []; proclist = [] try: os.mkdir(destfold) except: pass print("beg primary.xml...") for x in range(0, childnum): (parnpipe, chilpipe) = multiprocessing.Pipe(True) procnumb = os.fork() if (procnumb == 0): parnpipe.close() fileobjc = open("%s/primary.%d.xml" % (destfold, x), "w") while (1): chilpipe.send("more") filename = chilpipe.recv() if (not filename): break print("[%d] : %s" % (x, filename)) headobjc = rpmhinfo(repofold + "/", filename) try: headdict = makedict(headobjc) xmldobjc = makexmld(headdict) fileobjc.write(xmldobjc[0]) except: sys.stderr.write("erro:" + str(sys.exc_info()) + "\n") pass fileobjc.close() sys.exit(0) else: chilpipe.close() pipelist.append(parnpipe) proclist.append(procnumb) while (len(pipelist) > 0): (readlist, outplist, errolist) = select.select(pipelist, [], []) for pipeitem in pipelist: if (pipeitem not in readlist): continue try: dumydata = pipeitem.recv() if (len(repolist) > 0): pipeitem.send(repolist[0]) repolist.pop(0) else: pipeitem.send("") except: pipelist.remove(pipeitem) for procitem in proclist: try: os.waitpid(procitem, 0) except: pass print("end primary.xml...") primoutp = "" for x in range(0, childnum): try: fileobjc = open("%s/primary.%d.xml" % (destfold, x), "r") primoutp += fileobjc.read() fileobjc.close() os.unlink("%s/primary.%d.xml" % (destfold, x)) except: pass fileobjc = open("%s/primary.xml" % (destfold), "w") fileobjc.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") fileobjc.write("<metadata xmlns=\"http://linux.duke.edu/metadata/common\" xmlns:rpm=\"http://linux.duke.edu/metadata/rpm\" packages=\"%d\">\n" % (repoleng)) fileobjc.write(primoutp) fileobjc.write("</metadata>\n") fileobjc.close() fileobjc = open("%s/repomd.xml" % (destfold), "w") fileobjc.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") fileobjc.write("<repomd xmlns=\"http://linux.duke.edu/metadata/repo\" xmlns:rpm=\"http://linux.duke.edu/metadata/rpm\">\n") fileobjc.write("<data type=\"primary\">\n") fileobjc.write("\t<checksum type=\"sha256\">%s</checksum>\n" % (hashlib.sha256(primoutp).hexdigest())) fileobjc.write("\t<location href=\"repodata/primary.xml\"/>\n") fileobjc.write("\t<size>%d</size>\n" % (len(primoutp))) fileobjc.write("</data>\n") fileobjc.write("</repomd>\n") fileobjc.close() if (__name__ == "__main__"): main()