
| Current Path : /home/ift/52_procpy/dataninja/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //home/ift/52_procpy/dataninja/multilingual.py |
import re
class Translation (object):
def __init__ (self,target_language="en,de",patterntable="",ambicheck=0):
if patterntable == "":
patterntable = '''
key en de fr
001 "y es" ja oui
'''
self.sort_languages = target_language.split(",")
self.ambi = {}
if ambicheck > 0:
self.allkeys = {}
self.compute_translations(patterntable)
#********************************************************************************************
def compute_translations (self,patterntable):
self.translations = {} # Field with translation dictionaries in the order of the target languages
for zeile in patterntable.split("\n"):
zeile = zeile.strip()
if zeile == "" or zeile[0] == "#" or re.search(r"^-$",zeile): # Comments and empty lines are excluded
continue
cells = re.split(r"\"|$",zeile,99999999,re.DOTALL) # make a list of all text patterns separated by "
zeile = re.sub(r"\"(.*?)\"","---FIELD---",zeile,99999999,flags=re.DOTALL) # replace all "..." cells with ---FIELD---
zeile0 = re.sub(r" +"," ",zeile).split(" ")
zeile = []
fieldcounter = 1
for col in zeile0:
if col == "---FIELD---":
zeile.append(cells[fieldcounter])
fieldcounter = fieldcounter + 2
else:
zeile.append(col)
language_nr = -1
for col_as_value in zeile: # for each language exactly one target pattern table
language_nr = language_nr + 1 # where the other patterns go in as keys
if not col_as_value == "-":
if not 'language_header' in vars():
self.translations[col_as_value] = {}
continue
if not language_header[language_nr] in self.sort_languages:
continue
for col_as_key in zeile:
if not col_as_key == "-":
try:
self.translations[language_header[language_nr]][col_as_key]
except:
self.translations[language_header[language_nr]][col_as_key] = []
self.translations[language_header[language_nr]][col_as_key].append(col_as_value)
continue
if not 'language_header' in vars():
language_header = zeile
# print self.translations
#******************************************************************
def translate (self,text):
val = None
for language in self.sort_languages:
try:
val = self.translations[language][text][0]
break
except:
pass
if val:
self.compute_ambiguity(language,val)
return(val)
return(text)
#******************************************************************
def compute_ambiguity (self,language="",col_as_key=""):
if col_as_key == "":
if str(language) == "":
if 'allkeys' not in vars(self):
self.allkeys = {}
for language in self.sort_languages:
for col_as_key in self.translations[language]:
self.compute_ambiguity(language,col_as_key)
elif 'allkeys' in vars(self):
try:
self.allkeys[language]
except:
pass
try:
self.allkeys[language][col_as_key]
return # Wert schon eingelesen
except:
pass
target_values = {}
for target_value in self.translations[language][col_as_key]:
target_values[target_value] = 1
target_values = list(target_values.keys())
if len(target_values) > 1:
try:
self.ambi[language]
except:
self.ambi[language] = {}
self.ambi[language][col_as_key] = target_values
return(self.ambi)