
| Current Path : /home/cgabriel/20_dev/12_procpy/dataninja/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //home/cgabriel/20_dev/12_procpy/dataninja/magicmerge.py |
# coding: utf8
import os,re,sys
import openpyxl
from openpyxl.styles import Font, PatternFill
import pdb
#*************************************************************************
class Magicmerge (object):
def __init__ (self):
pass
#*************************************************************************
def sync_lists2 (self,list0, k, list1,i,list2,j):
'''
Sorts the list1 of arrays/tuples (rows) via its column i by the
j'th column of list2
'''
def cmp_with_list(a,b,sortcols): # the sorting function of the nr'th column by clist
for (nr,clist) in sortcols:
# print a[nr][0], b[nr][0]
try:
erg = clist.index(a[nr][0]) - clist.index(b[nr][0])
if not erg == 0:
return(erg)
except Exception as e:
print(e)
return(-1)
return(0)
sortcols = []
zaehler = 0
for j1 in j:
sortcols.append( ( i[zaehler], [x[j1][0] for x in list2] ) )
zaehler = zaehler + 1
# pdb.set_trace()
# for ii in range(len(list0)):
# try:
# open("testtext.txt","a").write(str(list0[ii][0])+" ; "+str(list1[ii][0])+"\n")
# except:
# pass
list0.sort(cmp = lambda x,y: cmp_with_list(x,y,sortcols) ) # sort the list0
list1.sort(cmp = lambda x,y: cmp_with_list(x,y,sortcols) ) # sort the list1
zaehler = -1
# print lis;t0
# print list1
# print list2
none_row = [[None]*len(list1[0][0])]*max(len(list0[0]),len(list1[0]),len(list2[0]))
while (0 == 0): # synchronize list1 and list2 by introducing None rows
zaehler = zaehler + 1
if zaehler >= len(list0):
if zaehler >= len(list1):
if zaehler >= len(list2):
break
else:
list1.append(none_row)
list0.append(none_row)
else:
if zaehler >= len(list2):
list2.append(none_row)
list0.append(none_row)
else:
if zaehler >= len(list1):
list1.append(none_row)
if zaehler >= len(list2):
list2.append(none_row)
else:
if zaehler >= len(list2):
list2.append(none_row)
zaehler1 = 0
for i1 in i:
j1 = j[zaehler1]
clist = sortcols[zaehler1][1]
# all three lists are the same
if list0[zaehler][i1][0] == list2[zaehler][j1][0] and list0[zaehler][k][0] == list1[zaehler][j1][0]:
break
# two lists are the same, one is different
elif list0[zaehler][i1][0] == list2[zaehler][j1][0] or list1[zaehler][k][0] == list2[zaehler][j1][0]:
if list0[zaehler][i1][0] == list2[zaehler][j1][0]:
a = list0
b = list1
else:
a = list1
b = list0
if b[zaehler][k][0] in clist[zaehler:]:
b.insert(zaehler,none_row)
else:
a.insert(zaehler,none_row)
list2.insert(zaehler,none_row)
for clist1 in sortcols:
clist1[1].insert(zaehler,none_row)
elif list0[zaehler][i1][0] == list1[zaehler][j1][0]:
if list0[zaehler][k][0] in clist[zaehler:]:
list0.insert(zaehler,none_row)
list1.insert(zaehler,none_row)
else:
list2.insert(zaehler,none_row)
for clist1 in sortcols:
clist1[1].insert(zaehler,none_row)
# all three lists are different from each other
else:
if list0[zaehler][k][0] in clist[zaehler:] and list1[zaehler][k][0] in clist[zaehler:]:
if clist.index(list0[zaehler][k][0])<clist.index(list1[zaehler][k][0]):
a = list0
b = list1
else:
b = list0
a = list1
a.insert(zaehler, none_row)
b.insert(zaehler, none_row)
b.insert(zaehler, none_row)
else:
list2.insert(zaehler, none_row)
if not list0[zaehler][k][0] in clist[zaehler:] and not list1[zaehler][k][0] in clist[zaehler:]:
list2.insert(zaehler, none_row)
list1.insert(zaehler, none_row)
list0.insert(zaehler+1, none_row)
zaehler += 1
else:
if list0[zaehler][k][0] in clist[zaehler:]:
a = list0
else:
a = list1
a.insert(zaehler, none_row)
a.insert(zaehler, none_row)
list2.insert(zaehler, none_row)
for clist1 in sortcols:
clist1[1].insert(zaehler,none_row)
#*************************************************************************
def sync_lists (self,list1,i,list2,j):
'''
Sorts the list1 of arrays/tuples (rows) via its column i by the
j'th column of list2
'''
def cmp_with_list(a,b,sortcols): # the sorting function of the nr'th column by clist
for (nr,clist) in sortcols:
try:
erg = clist.index(a[nr][0]) - clist.index(b[nr][0])
if not erg == 0:
return(erg)
except:
return(0)
return(0)
sortcols = []
zaehler = 0
for j1 in j:
sortcols.append( ( i[zaehler], [x[j1][0] for x in list2] ) )
zaehler = zaehler + 1
list1.sort(cmp = lambda x,y: cmp_with_list(x,y,sortcols) ) # sort the list1
zaehler = -1
none_row = [[None]*len(list1[0][0])]*len(list1[0])
while (0 == 0): # synchronize list1 and list2 by introducing None rows
zaehler = zaehler + 1
if zaehler >= len(list1):
if zaehler >= len(list2):
break
else:
list1.append(none_row)
else:
if zaehler >= len(list2):
list2.append(none_row)
else:
zaehler1 = 0
for i1 in i:
j1 = j[zaehler1]
clist = sortcols[zaehler1][1]
if not list1[zaehler][i1][0] == list2[zaehler][j1][0]:
if list1[zaehler][i1][0] in clist[zaehler:]:
list1.insert(zaehler,none_row)
else:
list2.insert(zaehler,none_row)
for clist1 in sortcols:
clist1[1].insert(zaehler,none_row)
break
#*************************************************************************
def find_col(self):
return [0],[0]
#*************************************************************************
def merge_objects (self, obj1, obj2, orig):
from openpyxl.styles import Font
from openpyxl.styles import PatternFill as Fill
import copy
result = copy.deepcopy(orig)
if self.compare(obj1, obj2):
return obj1
# unterste stufe:
if (not orig) or isinstance(orig, (str, int, float, bool, Font, Fill)):
if self.compare(orig,obj1) and not self.compare(orig, obj2):
result = obj2
elif not self.compare(orig,obj1) and self.compare(orig, obj2):
result = obj1
elif (not self.compare(orig,obj1) and not self.compare(orig, obj2)
and isinstance(orig,(str,int,float))):
# dies ist auf die spezielle situation der sprach-dateien angepasst
if not obj1:
result = obj2
elif not obj2:
result = obj1
##### result = str(orig) +";"+ str(obj1) +";"+ str(obj2)
# do same thing recursively
else:
#### this part deals with merge problems concerning the deletion of an entire row
if not self.compare(obj1, orig) and not self.compare(obj2, orig):
if self.is_element_None(obj1):
return obj1
elif self.is_element_None(obj2):
return obj2
####################################################################################
for i in range(len(obj1)):
try:
result[i] = self.merge_objects(obj1[i],obj2[i],orig[i])
except: pdb.set_trace()
return result
#***********************************************************************************
def compare(self, obj1, obj2):
'''
compares if objects are the same by compairing their dict
if objects are simple values, compare the value
'''
# print (obj1, obj2)
same = True
if not type(obj1)==type(obj2):
return not same
if not obj1 or isinstance(obj1,(str,int,float,list,tuple)):
same = obj1 == obj2
else:
for att in vars(obj1):
try: same = same & self.compare(vars(obj1)[att],vars(obj2)[att])
except: return False
return same
#*************************************************************************
def remove_empty_rows(self,result):
i = 0
while 0==0:
try:
delete_row = 1
for entry in result[i]:
if entry[0]:
delete_row = 0
break
if delete_row:
# print "delete row"
result.remove(result[i])
else:
i += 1
except:
break
#*************************************************************************
def is_element_None(self, element):
is_None = True
try:
for entry in element:
if isinstance(element,dict):
entry = element[entry]
if not self.is_element_None(entry):
is_None = False
break
except:
if element:
is_None = False
return is_None
#*************************************************************************
def csvimport (self,csvname,dynfield=True): # ,tmp_entry_list=[]):
"""
This function reads a bulk of key-value pairs from a csv-list.
The name of the resp. csv-file is in csvname
The column names will be taken from the first valid line
(comments starting with , empty lines und and underlining lines consisting
only from ---- will be skipped)
If column names are missing, they will be created on the fly with
with names MISC01, MISC02, ....
Each cell can overwrite the static column name under which it comes:
By denoting <key>: <value> the <value> goes into the row's
key-value pairs with the key <key> and the value <value> .
(but only if the column does not end with _ and dynfield = True)
"""
text = codecs.open(csvname,'r',"utf8").read()
cells = re.split(r"\"|$",text,99999999,re.DOTALL) # make a list of all text patterns separated by "
text = re.sub(r"\"(.*?)\"","---FIELD---",text,99999999,re.DOTALL) # replace all "..." cells with ---FIELD---
m = re.search(r"^(.*?---FIELD---)[^,\n]",text)
if m:
print("Error in csv-File" + m.group(1))
return(0)
text = text.splitlines()
cells_count = -2
entry0 = {}
column_descriptors = []
for zeile in text:
try:
zeile_sum = zeile
zeile = zeile.split(",")
except:
zeile_sum = "".join(zeile)
if re.search(r"^(#|\\-+ *$|-+ *$| *$|alter *table)",zeile_sum): # skip comments, empty lines, underlining ---------
continue
m = re.search(r"^ *[qQ]\: *([^\n]*)$",zeile_sum)
if m:
self.import_query = m.group(1)
continue
if cells_count == -2:
column_descriptors = zeile
cells_count = -1
continue
entry = {}
zaehler = -1
miscnr = 0
for cell in zeile:
zaehler = zaehler + 1
cell = cell.strip()
if cell == "---FIELD---":
cells_count = cells_count + 2
cell = cells[cells_count].strip()
while (0 == 0):
try:
if str(column_descriptors[zaehler]) == str(""):
miscnr = miscnr + 1
column_descriptors[zaehler] = str("MISC" + ("%02u" % miscnr))
break
except:
miscnr = miscnr + 1
column_descriptors.append(str("MISC" + ("%02u" % miscnr)))
if re.search(r"^\s*$",cell,re.DOTALL):
continue
field = column_descriptors[zaehler]
if dynfield and not field[-1] == "_":
m = re.search(r"^\s*([A-Za-z0-9\_]+)\: *(.*)$",cell,re.DOTALL)
if m:
field = m.group(1)
cell = m.group(2)
cell = re.sub(r"^\-\-\> *","",cell)
cell = re.sub(r"\r","",cell,99999999)
cell = re.sub(r"\t"," ",cell,99999999)
cell = re.sub(r"\"","\'",cell,99999999)
cell = cell.strip()
if field[0:3] == "XXX":
continue
if field == "":
continue
if re.search(r"^\s*$",cell):
continue
if field == "MDEL":
entry['DELETE'] = cell
elif field == "DEL":
if cell == "X":
entry['DELETE'] = cell
elif field == "MD5KEY":
entry[field] = cell
elif not re.search(r"^MISC\d+$",field):
entry[field] = cell
if field in entry and not field in column_descriptors:
column_descriptors.append(field)
entries.append(entry)
return(entries,column_descriptors)
#**************************************************************************