Logo Search packages:      
Sourcecode: gadfly version File versions  Download package

relalg.py

"""Simple relational algebra interpreter.

usage:

   To make the grammar

       python relalg.py make

   To run some relatoinal algebra expressions

       python relalg.py < expressions_file

"""

# EDIT INSTALLDIR TO BE ABLE TO LOAD UNDER ANY CWD
INSTALLDIR = "."

## simple relational algebra using only the equality predicate
## note: string values cannot contain ;

## statement sequencing using ; handled at higher level

00024 relalg_rules = """

statement ::

@R statementassn :: statement >> assignment

@R statementexpr :: statement >> rexpr

@R assignment1 :: assignment >> name = rexpr

@R assignmentn :: assignment >> name = assignment

@R union :: rexpr >> rexpr U rterm

@R rterm :: rexpr >> rterm

@R minus :: rexpr >> rexpr - rterm

@R intersect :: rterm >> rterm intersect rfactor

@R join :: rterm >> rterm join rfactor

@R rfactor :: rterm >> rfactor

@R projection :: rfactor >> projection [ names ] rfactor

@R names0 :: names >>

@R namesn :: names >> names1

@R names11 :: names1 >> name

@R names1n :: names1 >> names1 name

@R selection :: rfactor >> selection ( condition ) rfactor

@R conditionor :: condition >> condition | boolfactor

@R condfactor :: condition >> boolfactor

@R factorand :: boolfactor >> boolfactor & boolprimary

@R factorprime :: boolfactor >> boolprimary

@R notprimary :: boolprimary >> ~ boolprimary

@R primarycondition :: boolprimary >> ( condition )

@R primaryeq :: boolprimary >> expression = expression

@R expname :: expression >> name

@R expvalue :: expression >> value

@R rename :: rfactor >> rename [ names ] to [ names ] rfactor

@R named :: rfactor >> name

@R factorexpr :: rfactor >> ( rexpr )

@R relationval :: rfactor >> [ names ] ( rows )

@R rows0 :: rows >>

@R rowsn :: rows >> somerows

@R somerows1 :: somerows >> row

@R somerowsn :: somerows >> somerows , row

@R emptyrow :: row >> NIL

@R row1 :: row >> value

@R rown :: row >> row value

@R valuenum :: value >> number

@R valuestr :: value >> string
"""

keywords = """
  selection intersect rename projection to NIL U join
"""

puncts = """=^~|,-[]()&"""

nonterms = """
statement assignment rexpr rterm value rfactor
names names1 condition boolfactor boolprimary
expression rows somerows row
"""

try:
    from kjbuckets import *
except ImportError:
    from kjbuckets0 import *


class relation:

    def __init__(self, names, rows):
        #print "relation init", names, rows
        names = self.names = tuple(names)
        nameset = self.nameset = kjSet(names)
        for r in rows:
            if nameset != kjSet(r.keys()):
                raise ValueError, \
                 "bad names: "+`(names, r.items())`
        self.rows = kjSet(rows)

    def __repr__(self):
        from string import join
        names = self.names
        rows = self.rows.items()
        if not rows:
            nns = join(names)
            replist = [nns, "="*len(nns), " --<empty>--"]
            return join(replist, "\n")
        #print names, rows
        nnames = len(names)
        if nnames==1:
            replist = [names[0]]
        else:
            replist = [names]
        for r in rows:
            elt = r.dump(names)
            replist.append(r.dump(names))
        #print replist
        if nnames==1:
            replist = maxrep(replist)
        else:
            transpose = apply(map, tuple([None] + replist))
            adjusted = map(maxrep, transpose)
            replist = apply(map, tuple([None] + adjusted))
            replist = map(join, replist)
        replist.insert(1, "=" * len(replist[0]))
        #print replist
        return join(replist, "\n")

def maxrep(list):
    list = map(str, list)
    maxlen = max( map(len, list) )
    for i in range(len(list)):
        item = list[i]
        litem = len(item)
        list[i] = item + (" " * (maxlen-litem))
    return list

# context is a simple dictionary of named relations

def elt0(l, c):
    return l[0]

statementassn = elt0

def statementexpr(l, c):
    from string import split, join
    print
    print "  --- expression result ---"
    print
    data = str(l[0])
    print "   "+ join(split(data, "\n"), "\n   ")

def assignment1(l, c):
    [name, eq, val] = l
    c[name] = val
    return val

assignmentn = assignment1

def check_compat(v1, v2):
    names1, names2 = v1.names, v2.names
    if names1 != names2:
        raise ValueError, \
        "operands not union compatible "+`(names1, names2)`
    return names1, v1.rows, v2.rows

def union(l, c):
    [v1, U, v2] = l
    names1, r1, r2 = check_compat(v1, v2)
    return relation(names1, (r1+r2).items())

rterm = elt0

def minus(l, c):
    [v1, m, v2] = l
    names1, r1, r2 = check_compat(v1, v2)
    return relation(names1, (r1-r2).items())

def intersect(l, c):
    [v1, i, v2] = l
    names1, r1, r2 = check_compat(v1, v2)
    return relation(names1, (r1&r2).items())

def join(l, c):
    [v1, j, v2] = l
    n1, n2 = v1.names, v2.names
    r1, r2 = v1.rows.items(), v2.rows.items()
    n1s, n2s = kjSet(n1), kjSet(n2)
    common = tuple((n1s&n2s).items())
    result = kjSet()
    if common:
        # simple hashjoin
        G = kjGraph()
        for a in r1:
            G[a.dump(common)] = a
        for b in r2:
            for a in G.neighbors(b.dump(common)):
                result[a+b] = 1
    else:
        for a in r1:
            for b in r2:
                result[a+b] = 1
    return relation( (n1s+n2s).items(), result.items() )

rfactor = elt0

def projection(l, c):
    [p, b1, names, b2, val] = l
    proj = kjSet(names)
    result = kjSet()
    for row in val.rows.items():
        result[ proj * row ] = 1
    return relation( names, result.items())

def emptylist(l, c):
    return []

names0 = emptylist

namesn = elt0

def names11(l, c):
    return l

def names1n(l, c):
    [ns, n] = l
    ns.append(n)
    return ns

def selection(l, c):
    [sel, p1, cond, p2, val] = l
    return cond.filter(val)

## conditions are not optimized at all!

00271 class conditionor:
    def __init__(self, l, c):
        [self.c1, op, self.c2] = l
    def filter(self, val):
        v1 = self.c1.filter(val)
        v2 = self.c2.filter(val)
        return relation(v1.names, (v1.rows+v2.rows).items())

condfactor = elt0

class factorand(conditionor):
    def filter(self, val):
        v1 = self.c1.filter(val)
        v2 = self.c2.filter(val)
        return relation(v1.names, (v1.rows&v2.rows).items())

factorprime = elt0

class notprimary:
    def __init__(self, l, c):
        [n, self.c1] = l
    def filter(self, val):
        v1 = self.c1.filter(val)
        return relation(v1.names, (val.rows-v1.rows).items())

def elt1(l, c):
    return l[1]

primarycondition = elt1

class primaryeq:
    def __init__(self, l, c):
        [self.e1, eq, self.e2] = l
    def filter(self, val):
        rows = val.rows.items()
        e1v = self.e1.value(rows)
        e2v = self.e2.value(rows)
        result = kjSet()
        for (r, v1, v2) in map(None, rows, e1v, e2v):
            if v1==v2:
                result[r] = 1
        return relation(val.names, result.items())

class expname:
    def __init__(self, l, c):
        self.name = l[0]
    def value(self, rows):
        name = self.name
        r = list(rows)
        for i in xrange(len(r)):
            r[i] = r[i][name]
        return r

class expvalue(expname):
    def value(self, rows):
        return [self.name] * len(rows)

def rename(l, c):
    [ren, b1, names, b2, to, b3, names2, b4, val] = l
    if len(names)!=len(names2):
        raise ValueError, "names lengths must match"+`(names1, names2)`
    remap = kjDict(map(None, names2, names))
    oldnames = kjSet(val.names)
    addnames = kjSet(names2)
    remnames = kjSet(names)
    keepnames = oldnames - remnames
    remap = remap + keepnames
    if not remnames.subset(oldnames):
        #print remnames, oldnames
        raise ValueError, "old names not present"+`(names, val.names)`
    newnames = keepnames+addnames
    rows = val.rows.items()
    for i in range(len(rows)):
        rows[i] = remap*rows[i]
    return relation(newnames.items(), rows)

def named(l, c):
    [name] = l
    return c[name]

def relationval(l, c):
    [b1, names, b2, p1, rows, p2] = l
    names = tuple(names)
    ln = len(names)
    for i in xrange(len(rows)):
        this = rows[i]
        lt = len(this)
        if lt!=ln:
            raise ValueError, "names, vals don't match"+`(names,this)`
        if len(this)==1:
            this = this[0]
        else:
            this = tuple(this)
        rows[i] = kjUndump(names, this)
    return relation(names, rows)

rows0 = emptylist

rowsn = elt0

def somerows1(l, c):
    #print "somerows1", l
    return l

def somerowsn(l, c):
    #print "somerowsn", l
    [sr, c, r] = l
    sr.append(r)
    return sr

emptyrow = emptylist

row1 = somerows1

def factorexpr(l, c):
    return l[1]

def rown(l, c):
    #print "rows", l
    [r, v] = l
    r.append(v)
    return r

valuenum = valuestr = elt0

## snarfed from sqlbind
# note: all reduction function defs must precede this assign
00398 VARS = vars()

class punter:
    def __init__(self, name):
        self.name = name
    def __call__(self, list, context):
        print "punt:", self.name, list
        return list

class tracer:
    def __init__(self, name, fn):
        self.name = name
        self.fn = fn

    def __call__(self, list, context):
        print "tracing", self.name, list
        test = self.fn(list, context)
        print self.name, "returns", test
        return test

def BindRules(sqlg):
    for name in sqlg.RuleNameToIndex.keys():
        if VARS.has_key(name):
            #print "binding", name
            sqlg.Bind(name, VARS[name]) # nondebug
            #sqlg.Bind(name, tracer(name, VARS[name]) ) # debug
        else:
            print "unbound", name
            sqlg.Bind(name, punter(name))
    return sqlg

## snarfed from sqlgen

00431 MARSHALFILE = "relalg.mar"

import string
alphanum = string.letters+string.digits + "_"
userdefre = "[%s][%s]*" % (string.letters +"_", alphanum)
RACOMMENTREGEX = "COMMENT .*"

def userdeffn(str):
    return str

charstre = "'[^\n']*'"

def charstfn(str):
    return str[1:-1]

numlitre = "[%s][%s\.]*" % (string.digits, alphanum) # not really...

00448 def numlitfn(str):
    """Note: this is "safe" because regex
       filters out dangerous things."""
    return eval(str)

def DeclareTerminals(Grammar):
    Grammar.Addterm("name", userdefre, userdeffn)
    Grammar.Addterm("string", charstre, charstfn)
    Grammar.Addterm("number", numlitre, numlitfn)

def Buildrelalg(filename=MARSHALFILE):
    import kjParseBuild
    SQLG = kjParseBuild.NullCGrammar()
    #SQLG.SetCaseSensitivity(0)
    DeclareTerminals(SQLG)
    SQLG.Keywords(keywords)
    SQLG.punct(puncts)
    SQLG.Nonterms(nonterms)
    # should add comments
    SQLG.comments([RACOMMENTREGEX])
    SQLG.Declarerules(relalg_rules)
    print "working..."
    SQLG.Compile()
    filename = INSTALLDIR+"/"+filename
    print "dumping to", filename
    outfile = open(filename, "wb")
    SQLG.MarshalDump(outfile)
    outfile.close()
    return SQLG

def reloadrelalg(filename=MARSHALFILE):
    import kjParser
    filename = INSTALLDIR+"/"+filename
    infile = open(filename, "rb")
    SQLG = kjParser.UnMarshalGram(infile)
    infile.close()
    DeclareTerminals(SQLG)
    BindRules(SQLG)
    return SQLG

def runfile(f):
    from string import split, join
    ragram = reloadrelalg()
    context = {}
    #f = open(filename, "r")
    data = f.read()
    #f.close()
    from string import split, strip
    commands = split(data, ";")
    for c in commands:
        if not strip(c): continue
        print " COMMAND:"
        data = str(c)
        pdata = "  "+join(split(c, "\n"), "\n  ")
        print pdata
        test = ragram.DoParse1(c, context)
        print

# c:\python\python relalg.py ratest.txt

if __name__=="__main__":
    try:
        done = 0
        import sys
        argv = sys.argv
        if len(argv)>1:
            command = argv[1]
            if command=="make":
                print "building relational algebra grammar"
                Buildrelalg()
                done = 1
        else:
            runfile(sys.stdin)
            done = 1
    finally:
        if not done:
            print __doc__

Generated by  Doxygen 1.6.0   Back to index