
| Current Path : /home/cgabriel/20_dev/12_procpy/dataninja/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //home/cgabriel/20_dev/12_procpy/dataninja/objpickle.py |
# -*- coding: utf-8 -*-
import os
import re
import sys
import base64
import hashlib
import json
import procpy
try:
import jsonpickle
except:
import pip
pip._internal.main(["install","jsonpickle"])
import jsonpickle
#*************************************************************************
def to_mongo_dict (dumpobj,objmode=0):
"""
This function prepares an arbitrary python variable dumpobj
to be json'ied, in the following way:
If the variable is an object, then transform it to a dictionary by
taking its _dict_ and add the Key-Value-pair 'OBJCLASS' : <class name>
to it.
Lists are converted to dictionaries as well, where the keys are the
explicit positions to the values in the former list. Example:
["ab", "dc", 672] --> { 0 : "ab", 1 : "dc" , 2 : 672 }
The row_serializing is applied recursive thorough the whole data structure;
but dict- and object-transformations for sub-structures always will be done
normally, with mode=0, and hence without shifting key-value pairs to OBJDATA.
This is only possible on the highest level.
The function then returns the transformed ('row_serialized' dumpobj) structure which
is a dictionary in each case. This dictionary can be taken directly as a document
in a MongoDB without any transformations.
For the target SQLite database the rule will be easy: the keys of the row-serialized
data structure are the columns to enter in, the values (json'ied) are the
respective data. Missing columns will be added, that is the aim of upsert (see below)
"""
if type(dumpobj).__name__ in ("str","unicode","int","float","complex","long","NoneType"):
return(dumpobj)
if type(dumpobj).__name__ == 'list':
objrows = {}
zaehler = 0
for o in dumpobj:
objrows[zaehler] = to_mongo_dict(o)
zaehler = zaehler + 1
return(objrows)
if type(dumpobj).__name__ == "module":
return(None)
if not type(dumpobj).__name__ == "dict":
dumpobj.OBJCLASS = dumpobj.__module__ + "." + dumpobj.__class__.__name__ # and add class information
dumpobj = dumpobj.__dict__
if '__FUNCTION__' in dumpobj:
del dumpobj['__FUNCTION__']
objrows = {}
for o in list(dumpobj.keys()):
o1 = to_mongo_dict(dumpobj[o])
if not o1 == None:
objrows[o] = o1
return(objrows)
#**********************************************************************
# Re-creates from a row-serialized structure the former python data variable
def from_mongo_dict (dumpobj):
if not type(dumpobj) == type({}):
return(dumpobj)
erg = {}
for o in dumpobj:
erg[o] = from_mongo_dict(dumpobj[o])
if 'OBJCLASS' in erg:
erg1 = jsonpickle.decode(' { "py/object" : "' + erg['OBJCLASS'] + '" } ')
del erg['OBJCLASS']
for fieldname in erg:
if "(" in erg:
if not '__FUNCTION__' in erg:
erg['__FUNCTION__'] = {}
erg['__FUNCTION__'][fieldname] = erg[fieldname]
del erg[fieldname]
erg1.__dict__ = erg
return(erg1)
erg1 = []
zaehler = 0
ee = list(erg.keys())
try:
ee.sort(key=lambda x:int(x))
except:
return(erg)
for o in ee:
try:
zaehler1 = int(o)
except:
return(erg)
if not zaehler == zaehler1:
return(erg)
erg1.append(erg[o])
zaehler = zaehler + 1
return(erg1)
#************************************************************************
def insert_obj (dbh,dbtable,entry): # inserts into an sql database
(placeholder,index_length) = ('%s','(80)')
if dbh.__class__.__module__ == "sqlite3":
(placeholder,index_length) = ('?','')
field_list = []
entry_values = []
placeholders = []
cursor = dbh.cursor()
# print entry.keys()
for field in entry: # transfer the entry into the database
if entry[field] == None:
dumpdata = None
elif type(entry[field]).__name__ in ("int","unicode","float","long"):
dumpdata = str(entry[field])
elif type(entry[field]).__name__ in ("str"):
dumpdata = str(entry[field]) # .decode(encoding="utf-8"))
else:
dumpdata = json.dumps(entry[field],sort_keys=True,indent=4,ensure_ascii=False)
field_list.append(mask(field))
entry_values.append(dumpdata)
placeholders.append(placeholder)
text = "insert into " + dbtable + " (" + ",".join(field_list) + ") values (" + ",".join(placeholders) + ")"
# print text
while (0 == 0): # add columns as long as there are some missing
try:
cursor.execute(text,entry_values)
break
except Exception as e:
m = re.search(r"(Unknown +column .*?|no +column +named +)([A-Z0-9\_]+)",str(e))
if m:
new_column = m.group(2)
cursor.execute("alter table " + dbtable + " add column " + new_column + " TEXT DEFAULT NULL")
else:
try:
new_column = field_list[0]
cursor.execute("create table " + dbtable + "(" + new_column + ")")
except Exception as e:
print("ERROR")
return("ERROR")
if mask(new_column) == new_column:
cursor.execute("create index " + new_column + "idx on " + dbtable + " (" + new_column + index_length + ")")
return(None)
#*************************************************************************
def read_obj (cursor):
obj0 = {}
column_contents = cursor.fetchone()
if column_contents == None:
return(None)
zaehler = -1
for column_content in column_contents:
zaehler = zaehler + 1
fieldname = procpy.objpickle.demask( cursor.description[zaehler][0] )
m = re.search(r"^GROUP.*ONCAT\((\S+?)[\),]",fieldname)
if m:
fieldname = m.group(1)
column_content = column_content.split("---GROUPCONCATSEPARATOR---")
obj0[fieldname] = []
for column_content1 in column_content:
try:
obj0[fieldname].append(procpy.objpickle.from_mongo_dict( json.loads(column_content1)) )
except:
obj0[fieldname].append(procpy.objpickle.from_mongo_dict( column_content1) )
elif not fieldname == "''":
try:
column_content = json.loads(column_content)
except:
pass
obj0[fieldname] = column_content
obj = procpy.objpickle.from_mongo_dict(obj0)
return(obj)
#*************************************************************************
def mask (field):
field1 = ""
for o in field:
if o == "_" or not o == o.upper():
field1 = field1 + "_" + o.upper()
else:
field1 = field1 + o
return(field1)
#*************************************************************************
def demask (field):
field1 = ""
make_lower = False
for o in field:
if make_lower:
field1 = field1 + o.lower()
make_lower = False
else:
if o == "_":
make_lower = True
else:
field1 = field1 + o
return(field1)
#*************************************************************************
def import_xls_to_db (self):
import xlrd
wb = xlrd.open_workbook(xlsname)
sheets = {}
'''
This function imports all the tables of the excel workbook self.file
into the database self.db. The tables are named like the worksheets,
and the columns are named like the respective sheet columns.
The first row contains the column descriptors.
The format of the respective database row is:
First row:
<column1>: column number (3 digit),width
<column2>: column number (3 digit),width
....
Other rows:
<column1>: value
<column2>: value
Each table contains at least one column ROWNR.
Moreover, for each <column> there will be written assigned columns
__<column>__COLOR
__<column>__FONT
which carry the formatting information of the cell.
And also, there are two additional columns:
__NR__ __HEIGHT__
for numbering and formatting the rows itself.
'''
for sheet_name in wb.sheet_names():
sheet = wb.sheet_by_name(sheet)
text = []
crows = 0
while (0 == 0):
if crows == sheet.nrows:
break
ccols = 0
text.append([])
while (0 == 0):
if ccols == sheet.ncols:
break
cell_content = sheet.cell_value(crows,ccols)
try:
if cell_content == int(cell_content):
cell_content = str(int(cell_content))
except:
pass
text[-1].append(str(cell_content))
ccols = ccols + 1
crows = crows + 1
sheets[sheet_name] = text
return(text)
#***************************************************************************************
def import_excel_workbook(FileName): # import xlsx-Files to python in dataframe objects
from openpyxl.styles import PatternFill, Font
from openpyxl import load_workbook
from pandas import DataFrame
wb = load_workbook(FileName)
sheets = wb.get_sheet_names()
databook = DataFrame() # one dataframe for entire workbook
font = {}
fill = {}
height = []
rownr = []
last_row = 0
last_col = 0
for sheet in sheets: # append data from all sheets
sheet = wb.get_sheet_by_name(sheet)
if sheet.max_column == 1 and sheet.max_row == 1:# worksheet is empty
continue
##### get the data
data = sheet.values
cols = next(data)[0:]
df = DataFrame(data)
df.columns = cols # fit column headers
databook = databook.append(df)
max_row, max_col = df.shape
#### get column format
for colind in range(last_col, max_col): # get format of new rows
font[df.columns[colind]] = sheet.cell(row=last_row+1, column = colind+1).font # get entire font information here?
fill[df.columns[colind]] = sheet.cell(row=last_row+1, column = colind+1).fill.start_color.index
#### get row format for each new row
for rowind in range(last_row,max_row):
height.append(sheet.row_dimensions[rowind].height)
rownr.append(rowind)
last_row, last_col = databook.shape
#### appending format information
databook = databook.append(font, ignore_index=True)
databook = databook.append(fill, ignore_index=True)
#### fill ROWNR
rownr.append("__FONT__")
rownr.append("__FILL__")
##### fill height
height = [25 if rh is None else rh for rh in height]
height.append('None')
height.append('None')
##### insert format information
databook.insert(len(databook.columns),"__HEIGHT__",height)
databook.index = rownr
# print databook
return databook
#***************************************************************************************
#
# how to search in databook:
# search for column-key:
# databook.loc[databook[column]>key]: gives out dataframe of the entire rows
# search for row_key:
# databook.loc[row] : gives out entire rows
#******************************************************************
def insert_to_excel(data,excelfilename): # enter data in existing excel workbook
# data has to be a dict
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
databook = self.import_excel_workbook(excelfilename)
for column in list(data.keys()): # if data contains more columns than the ones
if not column in databook.columns: # in the excel sheet: expand excel sheet
databook.insert(len(databook.columns),column, 0)
for row in cursor.rownr:
if obj in databook.index: # if object entry exists in excel file, update
databook.loc[obj].update(cursor[obj])
else: # else: make new entry
databook.append(cursor[obj])
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(databook, index=True, header=True):
ws.append(r)
ws.save(excelfilename) # brutal save: old version is gone without notice
# wenn man den database output auch in ein dataframe packt, kann man hier einfach updaten!
#*********************************************************************
def databook_to_excel(databank, excelfilename): # this is more complicated task because of the format of cells which has to be applied
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(databank, header=True): # only header should be written down
if r.index == "__FONT__":
font = r
elif r.index == "__FILL__":
fill = r # welche struktur hat r hier?
else:
ws.append(r)
for i in range(1,ws.max_columns):
for j in range(1,ws.max_row):
ws.cell(column=i, row=j).font = font[i]
ws.cell(column=i, row=j).fill = fill[i]
wb.save(excelfilename)
#**********************************************************************
def create_test_dataframe():
from pandas import Dataframe as df
d = df([[1,2,3][4,5,6][7,8,9]],index=[1,'__FONT__','__FILL__'],columns=['param1','param2','__HEIGHT__'])
return d
#*********************************************************************
#*********************************************************************
# Unit tests:
class UnitTest (object):
def test01 (self,abc):
x = to_mongo_dict([{"ABC":"123"}])
print(x)
#*********************************************************************
if __name__ == "__main__":
UnitTest.__dict__[sys.argv[1]](UnitTest(),sys.argv[2:])