# This file can't even be _put_ in a subversion-controlled directory
# or svn fails miserably.  This can also be a bz2, but it'll run about
# 10% slower
FREEDB = "/usr/home/dan/tmp/freedb/freedb-complete-20050104.tar"
FREEDB_COMMAND = "tar -xOf " + FREEDB
DBNAME = "freedb"

# Import Psyco if available
try:
    import psyco
    psyco.full()
except ImportError:
    pass

from funcs import *
import tarfile
import os
import time
import pg

discs = 0
songs = 0
starttime = time.time()

def showstats():
    print
    print "Number of discs:", discs
    print "Number of songs:", songs
    print "Computation time (seconds):", time.time()-starttime

def parsefreedb(f, datadict={}):
    # Not actually used, but kept around for posterity
    datadict.clear()
    for line in f.readlines():
        if line[0] == "#":
            continue
        key, value = line.strip().split("=", 1)
        datadict[key] = value


def insertentries(f, db):
    global discs, songs
    line = "\n"
    discoid = 0
    dtitle = ""
    
    while line != "":
        line = f.readline()
        if len(line) < 2:
            continue
        if line[0] == '#' and len(line) > 5:
            if line[2:6] == "xmcd":
                if discs > 0 and (discs % 1000) == 0:
                    showstats()
                discs += 1
                dtitle = ""
                discoid = db.insert("disc", {discid=-1}))[
                continue
            elif line[2] != 'D':
                continue
            else:
                lengthline = line.split()
                try:
                    seconds += int(lengthline[3])
                except:
                continue
        if line[1] != 'T':
            continue
        if line[0] == 'D':
            if dtitle != "":
                dtitle = dtitle + " " + line[7:-1]
            else:
                dtitle = line[7:-1]
            db.update(
        elif line[0] == 'T':
            if line[7] == '=':
                strtokw(line[8:-1], kwset)
            else:
                strtokw(line[9:-1], kwset)
            kwset.update(dtitle)
            #print " ".join(kwset)
            insert(kwset)
            
def main():
    f = os.popen(FREEDB_COMMAND)
    db = pg.DB(DBNAME)

    insertentries(f, db)

    showstats()

if __name__ == "__main__":
    main()

# Number of discs: 1000000
# Number of songs: 13333859
# Number of index entries: 881075985
# Entries per song: 66.0780937462
# Keywords per songs: 6.27440675651
