
| Current Path : /home/ift/52_procpy/dataninja/95__misc/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //home/ift/52_procpy/dataninja/95__misc/dyndb.py |
# -*- coding: utf-8 -*-
import os
import re
import sys
import sqlite3
import time
import codecs
import hashlib
import pickle
import base64
import json
import jsonpickle
import xlrd
import xlwt
#*************************************************************************
class DBObj (object):
"""
This class maintains an Object Store which can store arbitrary
python objects, hold in a table 'entries' of a given target database.
"""
def __init__ (self,dbname=""):
"""
This implementation here supports SQLite-Database-Files.
The optional parameter dbname ( or dbname['PROCDB'] if that
exists ) gives the name of the target SQLite database file. If dbname
is empty, then the in-memory 'filename' :memory: is chosen.
The constructor creates a database on the target file, if it
does not exist, and creates certain columns.
Row MD5KEY: It is a hash key over all contents of all columns
and will be unique. Mandatory.
Row OBJID: Optional unique arbitrary field which contains
an identifier which is unique in case of existing.
Rows OBJCLASS, SLEEP, WAKETIME, JUMP: For special purposes, see below.
Rows will dynamically added as they are needed.
self.index_field contains a dictionary with column names. If a column with one
of that names is dynamically created, an index on that column is also created.
"""
dbfile = ":memory:"
if type(dbname).__name__ == 'dict':
if 'PROCDB' in dbname.keys():
dbfile = dbname['PROCDB']
else:
dbfile = dbname
self.dbh = sqlite3.connect(dbfile)
self.dbh.row_factory = sqlite3.Row
cursor = self.dbh.cursor()
cursor.execute("create table if not exists entries ("
+ "MD5KEY TEXT UNIQUE,"
+ "OBJID TEXT UNIQUE,"
+ "OBJCLASS TEXT,"
+ "SLEEP TEXT,"
+ "WAKETIME TEXT,"
+ "JUMP TEXT)"
)
cursor.execute("create index if not exists md5idx on entries (MD5KEY)")
cursor.execute("create index if not exists objidx on entries (OBJID)")
self.dbh.commit()
self.compiled_text_pattern = {}
self.sorted_fields = ('NAME VORNAME STRASSE PLZ STADT TEL FAX MOBIL MAIL SEX INDEX INFO ' +
'K KTO BANK BLZ STIMM END MD5KEY').split(" ")
self.local_enc = 'utf-8'
self.index_fields = {
'DATUM' : 1,
'NAME' : 1,
'PLZ' : 1,
'STADT' : 1,
'TEL' : 1,
'KTO' : 1,
'KTO1' : 1,
'KTO2' : 1,
}
# self.index_fields = {}
#*************************************************************************
def row_serialize (self,dumpobj,mode=0):
"""
This function prepares an arbitrary python variable dumpobj
to be json'ied, in the following way:
If the variable is an object, then transform it to a dictionary by
taking its _dict_ and add the Key-Value-pair 'OBJCLASS' : <class name>
to it.
Lists are converted to dictionaries as well, where the keys are the
explicit positions to the values in the former list. Example:
["ab", "dc", 672] --> { 0 : "ab", 1 : "dc" , 2 : 672 }
If the optional parameter mode is set to 1 , then a dictionary
(a pure dict or from a former object) will moreover transform by
shifting all key-value pairs to the key OBJDATA, unless the key
is uppercase ( more precise: if key.upper() == key ).
Example: { "KX" : 4 , "Sd4" : "qwe", "abc" : 5 , "VAL3XY" : "__Zu8__" } --->
{ "KX" : 4 , "VAL3XY" : "__Zu8__" , "OBJDATA" : { "Sd4" : "qwe", "abc" : 5 }}
The row_serializing is applied recursive thorough the whole data structure;
but dict- and object-transformations for sub-structures always will be done
normally, with mode=0, and hence without shifting key-value pairs to OBJDATA.
This is only possible on the highest level.
The function then returns the transformed ('row_serialized' dumpobj) structure which
is a dictionary in each case. This dictionary can be taken directly as a document
in a MongoDB without any transformations.
For the target SQLite database the rule will be easy: the keys of the row-serialized
data structure are the columns to enter in, the values (json'ied) are the
respective data. Missing columns will be added, that is the duty of upsert (see below)
"""
if re.search(r"^(str|unicode|int|float|complex|long)$",type(dumpobj).__name__):
return(dumpobj)
if type(dumpobj).__name__ == 'list':
objrows = {}
zaehler = 0
for o in dumpobj:
objrows[str(zaehler)] = self.row_serialize(o)
zaehler = zaehler + 1
return(objrows)
if not type(dumpobj).__name__ == 'dict': # if the dataset is an object, take its __dict__
dumpobj.OBJCLASS = dumpobj.__module__ + "." + dumpobj.__class__.__name__ # and add class information
dumpobj = dumpobj.__dict__
if type(dumpobj).__name__ == 'dict':
objrows = {}
for o in dumpobj:
if mode == 0 or o.upper() == o:
objrows[o] = self.row_serialize(dumpobj[o])
else:
if 'OBJDATA' not in objrows:
objrows['OBJDATA'] = {}
objrows['OBJDATA'][o] = self.row_serialize(dumpobj[o])
return(objrows)
#*************************************************************************
# Re-creates from a row-serialized structure the former python data variable
def row_objectify (self,dumpobj):
erg = {}
if not type(dumpobj).__name__ == 'dict':
return(dumpobj)
for o in dumpobj:
erg[o] = self.row_objectify(dumpobj[o])
if 'OBJDATA' in erg:
erg1 = self.row_objectify(erg['OBJDATA'])
for o in erg1:
erg[o] = erg1[o]
del erg['OBJDATA']
if 'OBJCLASS' in erg:
erg1 = jsonpickle.decode(' { "py/object" : "' + erg['OBJCLASS'] + '" } ')
del erg['OBJCLASS']
erg1.__dict__ = erg
return(erg1)
erg1 = []
zaehler = 0
ee = erg.keys()
ee.sort()
for o in ee:
try:
zaehler1 = int(o)
except:
return(erg)
if not zaehler == zaehler1:
return(erg)
erg1.append(erg[o])
zaehler = zaehler + 1
return(erg1)
#***************************************************************************
def upsert (self,dumpobj0,delete_mode=0):
"""
This function updates/inserts an arbitrary python object (dumpobj0)
into the datastore table entries .
This will be performed with the following steps:
1. row-serialization to a dicitionary
2. json-serialization of all values of the key-value pairs of that dictionary
3. entering the values into columns which are named ky the respective keys
(if columns are missing, they will be added to the table on the fly)
4. computing a hash-key from all rows of the entered data and entering that
hash-key into column MD5KEY
If the row-serialization of dumpobj0 itself contains a key MD5KEY, then:
a) if an entry in the database table with the same value in the MD5KEY row
exists, that entry will be interpreted as a predecessor of dumpobj0
which will be deleted firstly, then dumpobj0 will be entered as a decessor
b) if such an entry in the database table does NOT exist, the dumpobj0
is interpreted as a decessor to a not (yet) existing predecessor.
Then it seems that the former predecessor has been changed yet.
In that case, the MD5KEY of the new dumpobj0-entry will not be made,
the MD5KEY-columns remains empty, to show, that this lonely
entry has to be merged later with another entry.
Be aware of the constraints for entering by unique MD5KEY and unique OBJID.
In this case the result of upsert will be None.
"""
if 'WAKETIME' in dir(dumpobj0): # this is a special for the case
try: # of dumpobj0 is a 'process' which
if int(dumpobj0.WAKETIME) < 0: # has to be entered. If a dumped process
return("") # is thawed, its WAKETIME in the table
except: # is set to (-1) * <process_number>), this
pass # indicates that it is already running
# and shall not be re-awakened multiple.
dumpobj = self.row_serialize(dumpobj0,1)
# print dumpobj['MD5KEY']
## delete_mode == ""
## for k in dumpobj.keys():
## if k == "OBJDATA":
## for k1 in dumpobj['OBJDATA'].keys():
## if re.search(r"^(DEL|MISC\d+)$",k1,flags=re.IGNORECASE):
## if k1.upper() == "DEL":
## delete_mode = dumpobj['OBJDATA'][k1]
## del dumpobj['OBJDATA'][k1]
## if re.search(r"^(DEL|MISC\d+)$",k,flags=re.IGNORECASE):
## if k.upper() == "DEL":
## delete_mode = dumpobj[k1]
## del dumpobj[k]
if 'MD5KEY' in dumpobj:
md5key = dumpobj['MD5KEY']
del dumpobj['MD5KEY']
md5new = []
entry_fields = []
entry_values = []
## entry_hash = {}
for o in dumpobj: # serializing all values with json resp. unicode (the 'opposite' of read_obj, see below)
entry_fields.append(o)
o1 = dumpobj[o]
if type(o1).__name__ in ["str","int","unicode","float","long"]:
o1 = unicode(o1)
# m = re.search(r"^ *([A-Za-z0-\_]+)\: *(.*)$",o1,re.DOTALL)
# if m:
# entry_fields[-1] = m.group(1)
# o1 = m.group(2)
else:
o1 = json.dumps(dumpobj[o],sort_keys=True,indent=4,ensure_ascii=False)
entry_values.append(o1)
## if merge_mode == 1:
## entry_hash[o] = [o1]
md5new.append(o+":"+o1) # create the hash-value
## if delete_mode == "M" and merge_mode == 1 and 'last_entry' in self: # try to merge to the last entrya
## entry_fields1 = entry_fields[:] # store the old values
## entry_values1 = entry_values[:] # for
## md5new1 = md5new # rollback
## md5key1 = md5key
## entry_hash1 = dict(entry_hash)
## for k in self.last_entry:
## if not k in entry_hash:
## entry_fields.append(k)
## entry_values.append(self.last_entry[k])
## entry_hash[k] = self.last_entry[k]
## md5new.append(o+":"+self.last_entry[k]) # create the hash-value
## elif not self._last_entry[k] == entry_hash[k]:
## entry_fields = entry_fields1
## entry_values = entry_values1 # merge attempt failed. -> Rollback
## md5new = md5new1
## md5key = md5key1
## entry_hash = entry_hash1
## break
md5new.sort()
md5new = ",".join(md5new)
md5new = bytearray(md5new,'utf-8')
md5new = base64.b64encode(md5new)
md5new = hashlib.md5(md5new).digest()
md5new = base64.b64encode(md5new) # new md5key
md5new = re.sub(r"^(.*)\=\=$","\\1",md5new)
cursor = self.dbh.cursor()
nr = 1
if 'md5key' in vars(): # if there is a former MD5KEY
if md5new == md5key: # which is identical to the new
return(md5key) # the entry is exactly yet in the database
nr = cursor.execute("delete from entries where MD5KEY like '" + md5key + "%'")
if True or nr > 0: # first idea was to sedt the MD5KEYs to "" if there is no predecessor
entry_fields.append("MD5KEY")
entry_values.append(md5new)
## if merge_mode == 1:
## self.last_entry = entry_hash
## self.last_entry['MD5KEY'] = md5new
if delete_mode > 1:
return(None)
text = ""
o1 = ""
for o in entry_fields: # transfer the entry into the database
text = text + "," + o.upper()
o1 = o1 + ",?"
text = "insert into entries (" + re.sub(r"^\,","",text) + ") values (" + re.sub(r"^\,","",o1) + ")"
while (0 == 0): # add columns as log as there are some missing
try:
cursor.execute(text,entry_values)
break
except Exception as e:
# print str(e)
m = re.search(r"no +column +named +([A-Z0-9\_]+)",str(e))
if m:
o = m.group(1) # column extension
print("alter table entries add column " + o.upper() + " TEXT")
cursor.execute("alter table entries add column " + o.upper() + " TEXT")
if o.upper() in self.index_fields:
cursor.execute("create index if not exists " + o.upper() + "idx on entries (" + o.upper() + ")")
else:
return(None) # if there is another error by touching the unique contraints in MD5KEY or OBJID
return(md5new)
#*************************************************************************
def read_obj (self,cursor):
"""
deserializes a database table row into a python dictionary.
which can be re-transformed into the former python object by
row_objectify
"""
content = cursor.fetchone()
if not content:
return None
obj = {}
zaehler = -1
for o in content:
zaehler = zaehler + 1
if not o:
continue
o1 = None
try:
o1 = int(o)
except:
pass
if not o1:
try:
o1 = float(o)
except:
pass
if not o1:
try:
o1 = json.loads(o)
except:
pass
if not o1:
try:
o1 = unicode(o)
except:
pass
obj[cursor.description[zaehler][0]] = o1
return(obj)
#*************************************************************************
def query_data (self,md5key="",datafield=""):
"""
Creates a database cursor which represents
a certain query against the table entries .
"""
cursor = self.dbh.cursor()
if md5key == "": # first mode, without any parameters. Select entries whose WAKETIME is reached.
cursor.execute("select * from entries where " +
"WAKETIME < " + str(time.time()) + " and WAKETIME > 0")
elif not datafield == "": # search against a special value in a given column
cursor.execute("select * from entries where " + datafield + " ='" + md5key + "'")
else:
sqlclause = "select * from entries where " + md5key # proper SQL query clause
try:
cursor.execute(sqlclause)
except Exception as e: # a special form of query: searching for
md5key = re.sub(r",","%' ) and ( OBJDATA like '%",md5key,9999) # <a1>,<a2>,<a3>~<b1>,<b2> means:
md5key = re.sub(r"~","%' ) or ( OBJDATA like '%",md5key,9999) # search for entries where the patterns
md5key = "( ( OBJDATA like '%" + md5key + "%' ) )" # <a1>, <a2> and <a3> OR the patterns
try:
cursor.execute("select * from entries where " + md5key) # <b1> and <b2> are contained in OBJDATA
except:
print ( "\nNo valid sql expression:\n\n" + sqlclause + "\n" +
str(e) + "\n\nand also not valid short expression:"
"\n\n" + md5key + "\n")
return(cursor)
# erg = self.read_obj(cursor)
#
# if erg:
# return( self.row_objectify (erg) ) # create and return the object gotten by the query
# else:
# return(md5key)
#*************************************************************************
def projection_process (self,entry,params=[]):
"""
Takes a python object (entry, maybe queried formelry by query_data) and
executes the 'next' function which is stored in the entry.JUMP variable
If no new entry.JUMP is set in that function, it will be computed
as the 'next' function in the object class
If entry is not an object, it can be taken as an object class,
and entry is initialised as an instance of that object class.
After the execution of the function the entry will be dumped into the
database table entries .
Additionally, if entry.OBJID does not (yet) exist, a value for
that will be computed, so that the uniqueness constraint for the column
OBJID will be fulfilled. If entry has a method next_test (which
returns the decessor pattern for a given pattern), this method will
be used, otherwise labels from 001, 002, 003, ... will be created
"""
msg = "proceeded"
try:
entry = re.sub(r"^(.*)\.py$","\\1.Test",re.sub(r"[\\\/]",".",entry,9999))
m = re.search(r"^(.*)\.(.*?)$",entry)
except:
m = None
if m: # new running item
exec('import ' + m.group(1))
try:
exec('entry = ' + entry + '(params)\n')
msg = "created"
entry.__func__ = ""
except Exception as e:
return(e)
else: # proceeding an item
if 'WAKETIME' in vars(entry):
if entry.WAKETIME < 0:
return("Entry is already running.")
time_to_wait = entry.WAKETIME - time.time()
if time_to_wait > 0:
return("Awake time not yet reached. " + str(time_to_wait) + " sec to wait.")
try:
entry.__func__ = entry.JUMP
del entry.JUMP
if not entry.__func__== "":
cursor = self.dbh.cursor()
cursor.execute("update entries set WAKETIME='-" + str(os.getpid()) + "' " +
"where WAKETIME > 0 " +
"and MD5KEY='" + entry.MD5KEY + "'"
)
self.dbh.commit()
if cursor.rowcount == 1:
exec("entry." + entry.__func__ + "()")
except Exception as e:
entry.JUMP = entry.__func__
print str(e)
entry.ERROR = str(e)
if not 'JUMP' in vars(entry): # computing the next jump function it entry.JUMP does not exist
entry.JUMP = entry.__func__
for k in dir(entry):
if re.search(r"^(run|r\d+|t\d+)$",k):
if entry.JUMP == "":
entry.JUMP = k
break
elif entry.JUMP == k:
entry.JUMP = ""
if entry.JUMP == "":
entry.SLEEP = 9999999999
if 'SLEEP' in vars(entry):
entry.WAKETIME = time.time() + float(entry.SLEEP)
entry.WAKETIME = str(min(entry.WAKETIME,9999999999))
set_parent_id = True
while (0 == 0):
if 'OBJID' in vars(entry): # computing a unique OBJID if it does not exist
erg = self.upsert(entry)
if erg:
print entry.OBJID + " " + msg
self.dbh.commit()
return(1)
elif set_parent_id:
entry.PARENTID = entry.OBJID
set_parent_id = False
try:
m = re.search(r"^(.*)\.(.*)$",entry.OBJID)
objclass = m.group(1)
objnr = m.group(2)
except:
objclass = type(entry).__module__ + "." + type(entry).__name__
if '__prefix__' in vars(entry):
objclass = entry.__prefix__ + "." + objclass
objnr = ""
try:
objnr = entry.next_test(objnr)
except:
try:
objnr = ("%03u" % (int(objnr)+1))
except:
objnr = "001"
if objnr == "":
return(0)
entry.OBJID = objclass + "." + objnr
#*************************************************************************
def run_process (self,object_identifier,param=[]):
cursor = self.query_data(object_identifier,"OBJID")
entry = self.read_obj(cursor)
if entry:
erg = self.row_objectify (entry)
else:
erg = object_identifier
return( self.projection_process(erg,param) )
#*************************************************************************
def csvimport (self,csvname):
"""
This function upserts a bulk of key-value pairs from a csv-list
into the database table entries .
The name of the resp. csv-file is in csvname
The column names will be taken from the first valid line
(comments starting with , empty lines und and underlining lines consisting
only from ---- will be skipped)
If column names are missing, they will becreated on the fly with
with names MISC01, MISC02, ....
Each cell can overwrite the static column name under which it comes:
By denoting <key>: <value> the <value goes into the row's
key-value pairs with the key <key> and the value <value> .
"""
text = self.read_excel_file(csvname)
if not text:
text = codecs.open(csvname,'r',self.local_enc).read()
cells = re.split(r"\"|$",text,99999999,re.DOTALL) # make a list of all text patterns separated by "
text = re.sub(r"\"(.*?)\"","---FIELD---",text,99999999,re.DOTALL) # replace all "..." cells with ---FIELD---
text = text.split("\n")
cells_count = -2
tmp_entry_list = []
entry0 = {}
column_descriptors = []
for zeile in text:
try:
zeile_sum = zeile
zeile = zeile.split(",")
except:
zeile_sum = "".join(zeile)
if re.search(r"^(\#|\-+$| *$|alter *table)",zeile_sum): # skip comments, empty lines, underlining ---------
continue
if cells_count == -2:
column_descriptors = zeile
cells_count = -1
continue
entry = {}
zaehler = -1
miscnr = 0
for cell in zeile:
zaehler = zaehler + 1
cell = cell.strip()
if cell == "---FIELD---":
cells_count = cells_count + 2
cell = cells[cells_count].strip()
m = re.search(r"^\s*([A-Za-z0-9\_]+)\: *(.*)$",cell,re.DOTALL)
if m:
field = m.group(1)
cell = m.group(2)
else:
while (0 == 0):
try:
if unicode(column_descriptors[zaehler]) == unicode(""):
miscnr = miscnr + 1
column_descriptors[zaehler] = unicode("MISC" + ("%02u" % miscnr))
break
except:
miscnr = miscnr + 1
column_descriptors.append(unicode("MISC" + ("%02u" % miscnr)))
if re.search(r"^\s*$",cell,re.DOTALL):
continue
field = column_descriptors[zaehler]
cell = re.sub(r"^\-\-\> *","",cell)
cell = cell.strip()
if field == "MDEL":
entry['DELETE'] = cell
elif field == "DEL" and cell == "X":
entry['DELETE'] = cell
elif field == "MD5KEY":
entry[field] = cell
elif not re.search(r"^MISC\d+$",field):
entry[field.lower()] = cell
if 'DELETE' in entry and entry['DELETE'] == "M": # THE MERGE !
entry1 = dict(entry0)
for k in entry:
if k == "DELETE":
continue
if not k in entry1 or entry1[k] == "":
entry1[k] = entry[k]
elif not entry1[k] == entry[k]:
if re.search(r"^(INFO|INDEX)$",k):
entry1[k] = entry1[k] + "\n" + entry[k]
else:
del entry1
break
if 'entry1' in vars():
entry0 = entry1 # entry1 is a merge result which could be merged again
else: # and therefore put in entry0
entry1 = entry0
entry0 = entry # No merge possible. So upsert the entry0 and make the
entry = entry1 # entry to the new entry0
for field in entry.keys():
if field.upper() in self.index_fields:
entry[field.upper()] = entry[field]
erg = self.upsert(entry)
if erg:
entry['MD5KEY'] = erg
tmp_entry_list.append(entry)
self.dbh.commit()
self.column_descriptors = column_descriptors
return(tmp_entry_list)
#*************************************************************************
def read_excel_file (self,xlsname):
m = re.search(r"^(.*)\,(.*)$",xlsname)
if m:
xlsname = m.group(2)
try:
wb = xlrd.open_workbook(xlsname)
except:
return(None)
sheets = wb.sheet_names()
sheet = sheets[0]
if m:
sheet = m.group(1)
try:
sheet = wb.sheet_by_name(sheet)
except:
print "Sheets in the file " + m.group(2) + ":\n" + str(sheets)
return(0)
text = []
xrows = sheet.row(4)
rcount = 0
while (0 == 0):
if rcount == sheet.nrows:
break
ccount = 0
text.append([])
while (0 == 0):
if ccount == sheet.ncols:
break
cell_content = sheet.cell_value(rcount,ccount)
try:
if cell_content == int(cell_content):
cell_content = unicode(int(cell_content))
except:
pass
text[-1].append(unicode(cell_content))
ccount = ccount + 1
rcount = rcount + 1
return(text)
#**************************************************************************
def ff (self,text,entry):
"""
Replaces in the string variable all occurrences of -KEYNAME- by
the value of entry.KEYNAME (KEYNAME stands for alphanumeric key notators)
A special 'keyname' is: REST. It produces a printout of all 'remaining'
key-value pairs.
"""
try:
text1 = self.compiled_text_pattern[text]
except:
text1 = unicode ( "u\"\"\"" + re.sub(r"(^|-)(x?)(\d*[A-Z\_0-9\.]+?|x)(\d*)-",
"\"\"\"+unicode(self.valfield(entry,\"\\2\",\"\\3\",\"\\4\"))+u\"\"\"",
text,99999999,flags=re.DOTALL) + "\"\"\"" )
self.compiled_text_pattern[text] = text1
self.entry_cache = {}
exec "erg = " + text1
self.entry_cache = {}
return(erg)
#*************************************************************************
def valfield (self,entry,delflag,field,nr=""):
if field == "x" and delflag == "":
delflag = "x"
field = ""
erg = ""
while (0 == 0):
try:
erg = self.entry_cache[field]
break
except:
pass
if not field == "DELETE":
try:
erg = entry[field]
except:
try:
erg = entry[field.lower()]
except:
pass
if erg == "" and not nr == "" and 'index' in entry:
if re.search((field+nr).lower(),(" " + entry['index'] + " ").lower()):
field = field+nr
nr = ""
continue
if type(erg) == type(None):
erg = ""
if not nr == "":
try:
erg = (erg.split(","))[int(nr)]
except:
pass
if field == "COUNT":
erg = str(self.entries_count)
elif re.search(r"^REST",field):
erg = ""
ee = entry.keys()
ee.sort()
for k0 in self.sorted_fields + ee:
if k0.upper() == "MD5KEY":
k = k0.upper()
else:
k = k0.lower()
if not k in entry:
continue
if not k in self.entry_cache and type(entry[k]) != type(None):
if k == "md5key" and not delflag == "":
continue
# if field == "REST":
# erg = erg + "\n" + k + ": " + self.valfield(entry,"",k) + "\",\""
erg = erg + "\n" + ("%-15s" % (k.upper() + ": ")) + unicode(entry[k]) + "\",\""
self.entry_cache[k] = erg
erg = "\"" + erg + "\n\""
self.entry_cache[field.lower()] = erg
break
if erg == "" and delflag == "":
erg = "-" + field + nr + "-"
# erg = re.sub(r"\"","\\\"",erg,99999999)
return(erg)
#*************************************************************************
def export_data (self,pars):
"""
This function reads out data from the database in a rendered form.
A parameter which is a .csv or .adr file is taken as a data source
and its entries are upserted by the csvimport function.
The other parameters are read in in the database in the order:
First parameter (filter): The sql-filter expression against the table entries
( or the short form <pattern1>,<pattern2>,...~<pattern3>, ....)
Second parameter (template): The text template. Each occurences of -<KEY>- will be
replaced by the respective value of the actual entry.
-x<KEY>- works at the same but replaced by "" if no
such key exists.
If the text template is a name of an existing file,
then the contents will be written in files where
the placeholders in the filename also are replaced,
so that the can be as a result many files. The
placeholders in the contents of the file wille be also
replaced.
Third parameter (sort1): Sorts the entries to a pattern given by the template sort1
Each entries with the same sort1-Value will be given
to the merge function which returns a merged list
of entries.
Fourth paramater (filter_template): Filters again against filter , but on the
pattern given by filter_template, and only if the
form <pattern1>,<pattern2>,...~<pattern3>, ....
for filter is used
"""
for par in pars:
par = par.decode(self.local_enc)
if re.search(r"^(.*)\.(csv|adr|xlsx?)$",par) and not 'filter' in vars():
tmp_entry_list = self.csvimport(par)
elif not 'filter' in vars():
filter = par
elif not 'template' in vars():
template = par
elif not 'sort1' in vars():
sort1 = par
elif not 'filter_template' in vars():
filter_template = par
# if template == "..":
# template = "-NAME-,-VORNAME-,-xSTRASSE-,-xPLZ-,-xSTADT-,-xTEL-,-xMAIL-,-REST-"
if not 'sort1' in vars():
sort1 = "-MD5KEY-"
if 'filter' in vars() and not filter == 'IMPORT':
if not 'template' in vars():
template = "."
if filter == "":
filter = "~"
cursor = self.query_data(filter)
pre_check = 0
if 'sort1' in vars() or 'filter_template' in vars():
filter_conditions = []
for pattern in filter.split("~"):
filter_conditions.append(pattern.split(","))
tmp_entry_dict = {}
pre_check = 1
else:
if not 'template' in vars() and 'column_descriptors' in vars(self):
template = ("-,-x".join(self.column_descriptors)) + "-"
template = re.sub(r"-xMDEL-","-xDEL-",template)
template = re.sub(r"^-,","",template)
else:
pass
template = "."
pre_check = 2
if template == ".":
template = "-xDEL-,-xNAME-,-xVORNAME-,-xSTRASSE-,-xPLZ-,-xSTADT-,-xTEL-,-xMAIL-,-REST-"
template_csv = ""
convert_to_xls = {} # if the template is an xls(x)-file, firstly convert to csv
if os.path.isfile(template):
template_csv = template
text = self.read_excel_file(template_csv)
if text:
template_content = ""
for rows in text:
zeile = ",".join(rows)
template_content = template_content + "\n"
else:
template_content = codecs.open(template_csv,'r',self.local_enc).read()
m = re.search(r"^(.*)\.(.*)$",template)
template_header = "";
m1 = re.search(r"^\s*([^\n]+?)\s*$",template_content,flags=re.DOTALL)
if m1:
template_header = re.sub(r"-x?","",m1.group(1),99999999) + "\n----------------------\n"
# template_header = re.sub(r"(^|,)DEL(,|$)","\\1\\2",template_header)
template_filename = re.sub(r"^(.*)[\\\/](.*)$","\\2",template_csv)
else:
template_header = re.sub(r"-x?","",template,99999999) + "\n----------------------\n"
# template_header = re.sub(r"(^|,)DEL(,|$)","\\1\\2",template_header)
if not re.search(r"[ \:\;]",template_header):
print "\n" + template_header,
self.entries_count = 0
while (0 == 0):
while (0 == 0):
if pre_check == 2:
try:
entry = tmp_entry_list.pop(0)
except:
break
else:
entry = self.read_obj(cursor)
entry = self.row_objectify(entry)
if not entry:
break
if pre_check == 1:
if 'filter_template' in vars():
filter_str = self.ff(filter_template,entry)
for p1 in filter_conditions:
bed = True
for p2 in p1:
if re.search(p2,filter_str):
continue
bed = False
break
if bed:
break
if not bed:
continue
sort_pattern = self.ff(sort1,entry)
if sort_pattern == "":
sort_pattern = entry['MD5KEY']
if not sort_pattern == "":
if not sort_pattern in tmp_entry_dict:
tmp_entry_dict[sort_pattern] = [entry]
else:
entry['del'] = "M"
tmp_entry_dict[sort_pattern].append(entry)
else:
self.entries_count = self.entries_count + 1
cr = "\n"
if not template_csv == "":
template_content1 = self.ff(template_content,entry)
template_filename1 = self.ff(template_filename,entry)
if not os.path.isfile(template_filename1):
codecs.open(template_filename1,'w',self.local_enc).write(template_header)
m = re.search(r"^(.*)\.(.*)$",template_filename1)
if m.group(2) == "xls" or m.group(2) == "xlsx":
template_content1 = re.sub(",\"\s*([A-Z\_0-9]+)\: *",",\"\\1: ",template_content1,99999999)
template_content1 = re.sub(",\"\n\"","",template_content1,99999999)
convert_to_xls[template_filename1] = m.group(2)
if not re.search("\n",template_content1):
cr = ""
codecs.open(template_filename1,'a',self.local_enc).write(cr+template_content1+cr)
else:
rendered_text = self.ff(template,entry).encode(self.local_enc)
if not re.search("\n",rendered_text):
cr = ""
print cr + rendered_text + cr
if not pre_check == 1:
break
tmp_entry_list = []
sort_tmp = tmp_entry_dict.keys()
sort_tmp.sort()
for k in sort_tmp:
for e in self.mark_for_merge(tmp_entry_dict[k]):
tmp_entry_list.append( e )
pre_check = 2
for file in convert_to_xls:
# continue
template_content1 = codecs.open(file,'r',self.local_enc).read()
template_content1 = re.sub(r"\\\"","",template_content1,99999999)
cells = re.split(r"\"|$",template_content1,99999999,flags=re.DOTALL)
cells_count = -1
template_content1 = re.sub(r"\"(.*?)\"","---FIELD---",template_content1,99999999,flags=re.DOTALL)
wb = xlwt.Workbook(encoding=self.local_enc)
sheet = wb.add_sheet("DataExcerpt")
style = xlwt.XFStyle()
style.alignment.wrap = 1
crows = 0
for zeile in template_content1.split("\n"):
ccols = 0
for cell in zeile.split(","):
if cell == "---FIELD---":
cells_count = cells_count + 2
cell = cells[cells_count].strip()
sheet.write(crows,ccols,cell,style)
ccols = ccols + 1
crows = crows + 1
wb.save(file)
#*************************************************************************
def mark_for_merge (self,entries):
diff_entry = {}
for entry in entries:
for k in entry:
if k == "MD5KEY":
continue
if k in diff_entry:
if not diff_entry[k] == entry[k]:
diff_entry[k] = entry[k]
entry[k] = "-->" + unicode(entry[k])
else:
diff_entry[k] = entry[k]
else:
diff_entry[k] = entry[k]
return(entries)
#*************************************************************************
def normalize (self,field,content):
return(content)
#*************************************************************************
if __name__ == '__main__':
r = DBObj(sys.argv[1])
# r.csvimport("ift2013.csv")
r.ktoimport("gmbh.kto")
r.dbh.commit()
# text1 = r.extract_db("","-NAME-,\\\"-REST-\n\\\" ")
# text1 = r.extract_db("NAME || VORNAME like '%'","-NAME-,-VORNAME-,\\\"-REST-\n\\\" ")
# text1 = r.extract_db("NAME || VORNAME like '%'","-REST-")
# codecs.open('test.csv1',"w",'utf-8').write(text1)
#
# tt = """NAME-Dies ist ein Test.
#
#-NAME- -VORNAME- -MD5KEY- -REST-
#"""
# r.filter_regex("-NAME-","Gabriel")
#
# print r.ff(tt,{ "NAME" : "Gabriel",
# "VORNAME" : "Christian" , "STRASSE" : "Nuernberger Str." })
#*************************************************************************
#*************************************************************************
#*************************************************************************
#*************************************************************************
#*************************************************************************
# OBSOLETE:
#*************************************************************************
def xxmerge (self,entries):
return(entries)
merged_entry = {}
for entry in entries:
for k0 in entry:
if k0 == "md5key":
continue
content = entry[k0]
content = self.normalize(k0,content)
try:
xxy
if int(content) == 1:
content = k0
k = "index"
except:
k = k0
if k0 not in merged_entry:
merged_entry[k0] = content
else:
while 0 == 0:
try:
merged_entry[k][content] = 1
break
except Exception as e:
# print str(e)
merged_entry[k] = { k0 : content }
return( [merged_entry] )
indexlist = ""
try:
if type(merged_entry["index"]).__name__ == 'dict':
indexlist = ""
for k in merged_entry["index"]:
indexlist = indexlist + " " + k
merged_entry["index"] = indexlist
except:
merged_entry["index"] = ""
indexlist = merged_entry["index"].upper()
indexlist = re.sub(r"([\-\: \,\;\.\n]+)"," ",indexlist,99999999,flags=re.DOTALL)
# print "XXX " + indexlist
indexdict = {}
for indexelement in indexlist.split(" "):
if re.search(r"^\s*$",indexelement):
continue
indexdict[indexelement] = 1
indexlist = indexdict.keys()
merged_entry['index'] = " ".join(indexlist)
# print merged_entry['index']
return( [merged_entry] )
#*************************************************************************
def xxextract_db (self,query,text):
cursor = self.dbh.cursor()
if not query == "":
query = " where " + query
cursor.execute("select * from entries " + query)
text1 = ""
while (0 == 0):
o = cursor.fetchone()
if o == None:
break
text1 = text1 + self.ff(text,o) + "\n"
return(text1)
def xxprocimport (self,dumpobj):
entry_fields = []
entry_values = []
if not 'JUMP' in vars(dumpobj):
dumpobj.JUMP = actual_func
for o in dir(dumpobj):
if re.search(r"^(run|r\d+|t\d+)$",o):
if dumpobj.JUMP == "":
dumpobj.JUMP = o
break
elif dumpobj.JUMP == o:
dumpobj.JUMP = ""
if dumpobj.JUMP == "":
dumpobj.SLEEP = 9999999999
waketime = time.time()
try:
waketime = waketime + float(dumpobj.SLEEP)
except Exception as e:
print str(e)
pass
waketime = str(min(waketime,9999999999))
dumpobj.WAKETIME = waketime
for var in dir(dumpobj):
if not var.upper() == var:
continue
entry_fields.append(var)
exec('entry_values.append(str(dumpobj.'+var+'))')
exec('del dumpobj.' + var)
if var == "TESTRUN":
self._init = entry_values[-1]
if var == "JUMP":
if not actual_func == "":
actual_func = actual_func + ", "
print actual_func + "next Jump: " + entry_values[-1]
o = 1
o = re.sub(r"^b\'(.*)\'$", r"\1", str(base64.b64encode(pickle.dumps(dumpobj))))
text = ""
while not o == "":
text = text + o[:80] + "\n";
o = o[80:]
entry_fields.append('OBJ')
entry_values.append(text)
procstore = dyndb.DBObj(self.dbfile)
if not 'TESTRUN' in vars(self):
entry_fields.append('TESTRUN')
entry_values.append('')
testclass = re.sub(r"^(.*)\'(.*)\'(.*)$","\\2",str(dumpobj.__class__))
zaehler = 0
while (0 == 0):
zaehler = zaehler + 1
try:
entry_values[-1] = testclass + "." + dumpobj.next_testrun(zaehler)
except:
entry_values[-1] = testclass + "." + "t" + ("%03u" % zaehler)
if entry_values[-1] == "":
return(None)
try:
md5key = procstore.importdata(entry_fields[:],entry_values[:])
break
except Exception as e:
# print str(e)
pass
self._init = entry_values[-1]
else:
md5key = self.importdata(entry_fields[:],entry_values[:])
self.dbh.commit()
return(md5key)
#*************************************************************************
def xxproceed (self,md5key="",datafield=""):
cursor = self.dbh.cursor()
if md5key == "":
cursor.execute("select * from entries where not OBJ = 'None'" +
" and WAKETIME < " + str(time.time()) + " and WAKETIME > 0")
else:
if datafield == "":
if re.search(r"\.",md5key):
datafield = "TESTRUN"
else:
datafield = "MD5KEY"
# print("select * from entries where " + datafield + " ='" + md5key + "'")
cursor.execute("select * from entries where " + datafield + " ='" + md5key + "'")
entry_fields = cursor.description[:]
entry_values = cursor.fetchone()
# print str(entry_fields)
if type(entry_values) == type(None):
return("No process found for restart")
zaehler = 0
for f in entry_fields:
if f[0] == "OBJ":
dumped_obj = entry_values[zaehler]
if f[0] == "WAKETIME":
if float(entry_values[zaehler]) < 0:
return("Process is running.")
o = float(entry_values[zaehler]) - time.time()
if o > 0:
return("Awake time not yet reached, " + str(int(o)) + " sec. to wait ...")
zaehler = zaehler + 1
o = bytes(re.sub(r"\n", r"", dumped_obj))
o = base64.b64decode(o)
o = pickle.loads(o)
zaehler = 0
for f in entry_fields:
if not f[0] == "OBJ":
exec("o."+f[0] + " = '''" + str(entry_values[zaehler]) + "'''")
if f[0] == "MD5KEY":
cursor = procstore.dbh.cursor()
cursor.execute("update entries set WAKETIME='-" + str(os.getpid()) +
"' where WAKETIME > 0 and MD5KEY='" + entry_values[zaehler] + "'")
procstore.dbh.commit()
if cursor.rowcount != 1:
return(None)
md5key = entry_values[zaehler]
if f[0] == "TESTRUN":
testrun = entry_values[zaehler]
zaehler = zaehler + 1
try:
func = o.JUMP
del o.JUMP
if not func == "":
exec("o." + func + "()")
self.procimport(func,o)
return(testrun)
except Exception as e:
print str(e)
errmsg = ( '\nERROR in object ' + md5key + ", " + testrun +
':\n' + '\n'.join(traceback.format_tb(sys.exc_info()[2])) )
cursor = procstore.dbh.cursor()
cursor.execute("update entries set ERROR=? where MD5KEY='" + md5key + "'",[errmsg])
self.dbh.commit()
return(errmsg)
#*************************************************************************
def xxktoimport (self,ktofile):
self.ktofile = ktofile
text = codecs.open(self.ktofile,'r','utf-8').read()
cursor = self.dbh.cursor()
column_names = { "ID": 1, "SORT": 1 }
kto = ""
for zeile in text.split("\n"):
entry_fields = []
entry_values = []
md5key = ""
m = re.search(r"^(\d\d\d\d\d\d\d\d) +(\-?\d+\.\d\d) +(\S+) +(\S+) +(\-?\d+\.\d\d) +(.*?) *$",zeile)
if m:
entry_fields.append("DATUM")
entry_values.append(m.group(1))
entry_fields.append("BETRAG")
entry_values.append(m.group(2))
entry_fields.append("KTO1")
entry_values.append(kto+m.group(3))
entry_fields.append("KTO2")
entry_values.append(kto+m.group(4))
entry_fields.append("BEMERKUNG")
entry_values.append(m.group(6))
md5key = m.group(1) + ";" + m.group(2) + ";" + m.group(3) + ";" + m.group(4) + ";" + m.group(6)
else:
m = re.search(r"^(\S+) +(\S+) +(\-\d+\.\d\d) *$",zeile)
if m:
entry_fields.append("KTO")
entry_values.append(m.group(1))
entry_fields.append("BEZEICHNUNG")
entry_values.append(m.group(2))
md5key = m.group(1) + ";" + m.group(2)
elif kto == "":
kto = "-" + re.sub(r"^(\S+)(.*)$","\\1",zeile)
if md5key == "":
continue
md5key = bytearray(md5key,'utf-8')
md5key = base64.b64encode(md5key)
md5key = hashlib.md5(md5key).digest()
md5key = base64.b64encode(md5key) # new md5key
entry_fields.append("MD5KEY")
entry_values.append(md5key)
for o in entry_fields:
if o not in column_names:
column_names[o] = 1
try:
cursor.execute("alter table entries add column " + o.upper())
if o.upper() in self.index_fields:
cursor.execute("create index if not exists " + o.upper() + "idx on entries (" + o.upper() + ")")
except:
pass
text = ""
o1 = ""
for o in entry_fields: # transfer the entry into the database
text = text + "," + o
o1 = o1 + ",?"
# print "insert into entries (" + re.sub(r"^\,","",text) + ") values (" + re.sub(r"^\,","",o1) + ")"
text = "insert into entries (" + re.sub(r"^\,","",text) + ") values (" + re.sub(r"^\,","",o1) + ")"
try:
cursor.execute(text,entry_values)
except:
print zeile