Welcome To Our Shell

Mister Spy & Souheyl Bypass Shell

Current Path : /home/cgabriel/20_dev/11_iftlib/sysadmin/
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64
Current File : //home/cgabriel/20_dev/11_iftlib/sysadmin/util.py
#  coding:  utf8

import os,sys,glob,sys,re,random,time


class Util (object):

    def __init__ (self):
        self.IFT        = "ift"
        self.colormode  = "LineArt"
        self.TESSERACT  = "tesseract -l deu --psm 11 pdf" 
        self.TESSERACT1 = "tesseract -l eng --psm 11 pdf" 


                
#***********************************************************

    def subtree (self,pars):
    
        prefix = pars[0]
        prefix = prefix + "/"
        prefix = re.sub(r"([\\\/])[\\\/]","\\1",prefix,99)
        
        print(prefix)
        m = re.search(r"^(.*)[\\\/](.*)[\\\/]$",prefix)
        print(prefix)
        if m:
            newbranch = m.group(2)
        else:
            newbranch = prefix[:-1]
            
        print(prefix,newbranch)
        os.system("git branch -D " + newbranch)
        os.system("git subtree --prefix " + prefix + " split -b " + newbranch)
        print("111")
        os.system("mkdir ~/" + newbranch)
        os.system("git init -bare ~/" + newbranch)
        os.system("git push ~/" + newbranch + " " + newbranch + ":master")
        os.system("git branch -D " + newbranch)
        os.system("chdir ~/" +  newbranch + "; git checkout master")
        
#***********************************************************

    def del_all (self,pars):
    
        os.system('rm *.aux');
#        os.system('rm *.log');
        os.system('rm *.toc');
        os.system('rm *.ind');
        os.system('rm *.ilg');
        os.system('rm *.idx');
        os.system('rm *.bbc');
        os.system('rm *.lvz');
        os.system('rm *~');
        os.system('rm */*~');
        os.system('rm */*/*~');
        os.system('rm */*/*/*~');
        os.system('rm */*/*/*/*~');
        os.system('rm */*/*/*/*/*~');
        os.system('rm */*/*/*/*/*/*~');
        os.system('rm */*/*/*/*/*/*/*~');
        os.system('rm */*/*/*/*/*/*/*/*~');
        os.system('rm */*/*/*/*/*/*/*/*/*~');
        os.system('rm *.pyc');
        os.system('rm */*.pyc');
        os.system('rm */*/*.pyc');
        os.system('rm */*/*/*.pyc');
        os.system('rm */*/*/*/*.pyc');
        os.system('rm */*/*/*/*/*.pyc');
        os.system('rm */*/*/*/*/*/*.pyc');
        os.system('rm */*/*/*/*/*/*/*.pyc');
        os.system('rm */*/*/*/*/*/*/*/*.pyc');
        os.system('rm */*/*/*/*/*/*/*/*/*.pyc');
        os.system('rm .*~');
        os.system('rm */.*~');
        os.system('rm */*/.*~');
        os.system('rm */*/*/.*~');
        os.system('rm */*/*/*/.*~');
        os.system('rm */*/*/*/*/.*~');
        os.system('rm */*/*/*/*/*/.*~');
        os.system('rm */*/*/*/*/*/*/.*~');
        os.system('rm */*/*/*/*/*/*/*/.*~');
        os.system('rm */*/*/*/*/*/*/*/*/.*~');
        os.system('rm texput.*');
        os.system('rm xxqq.*');
        os.system('rm *P_L_A_C_E_H_O_L_D_E_R*.*');
        
        if len(pars) > 0:
            os.system('rmdir */*/*/*/*/*/*/*/*/*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/*/');
            os.system('rmdir */*/*/*/*/');
            os.system('rmdir */*/*/*/');
            os.system('rmdir */*/*/');
            os.system('rmdir */*/');
            os.system('rmdir */');
        

        self.pdffiles = {}
    
#***********************************************************

    def addclip (self,pars):

        text = ""
        while (0 == 0):
            text0 = text

            text  = ""
            try:
                text = open(pars[0]).read()
            except:
                pass
            if not text == text0:
                text1 = text
                for o in (1,2):
                    text1 = re.sub(r"(^|\n)(\d\d\d\d\d\d\d\d) +(-?\d+\.\d\d) +(\S+) +(\S+) +(-?\d+\.\d\d) +(.*?)\n",
                                         "\\1 \\3 \n",text1,99999999,flags=re.DOTALL)
                text1 = re.sub(r"\s+"," ",text1,99999999,flags=re.DOTALL)
                text1 = text1.strip()
                text1 = text1.split(" ")
                result = 0.00
                exist_values = False
                for entry in text1:
                    try:
                        result = result + float(entry)
                        exist_values = True
                    except:
                        pass
                if not exist_values:
                    continue
                result = "%3.2f" % result
                print (result)
                open(pars[0],"w").write("")
                try:
                    open(pars[1],"w").write(result + "\n")
                except:
                    pass
            time.sleep(1)

#******************************************************

    def normalize (self,pars):
    
        for file in os.listdir('.'):
            print(file)
            file1 = file
            file1 = re.sub(r"[ \\\+\(\)\'\°[\]\!\{\}\,~\&]+","_",file1,99999999)
            file1 = re.sub(r"[éèê]","e",file1,99999999)
            file1 = re.sub(r"[óòô]","o",file1,99999999)
            file1 = re.sub(r"[áàâ]","a",file1,99999999)
            file1 = re.sub(r"[úùû]","u",file1,99999999)
            file1 = re.sub(r"[Ç]","C",file1,99999999)
            file1 = re.sub(r"[íìîı]","i",file1,99999999)
            file1 = re.sub(r"[İ]","I",file1,99999999)
            file1 = re.sub(r"ä","ae",file1,99999999)
            file1 = re.sub(r"ö","oe",file1,99999999)
            file1 = re.sub(r"ü","ue",file1,99999999)
            file1 = re.sub(r"Ä","Ae",file1,99999999)
            file1 = re.sub(r"Ö","Oe",file1,99999999)
            file1 = re.sub(r"Ü","Ue",file1,99999999)
            file1 = re.sub(r"ş","s",file1,99999999)
            file1 = re.sub(r"ß","ss",file1,99999999)

            file1 = re.sub(r"_-_","__",file1,99999999)
            file1 = re.sub(r"_-","__",file1,99999999)
            file1 = re.sub(r"\:","__",file1,99999999)

            if file == file1:
                continue
            print(file)
            os.rename(file,file1)        


#******************************************************

    def dbl_files (self,pars):
    

        '''
        Identifies the double file candidates of a list of files
        '''

        if len(pars) == 0 or pars[0] == "-":
            text = sys.stdin.read()
        else:
            text = open(pars[0]).read().split("\n")

        files = {}
        try:
            text = text.split("\n")
        except:
            pass

        list1 = {}

        for file in text:

            file = re.sub(r"^\.[\\\/]","",file)
            if not os.path.isfile(file):
                continue
            filesize = os.path.getsize(file)
            o        = str(filesize) + open(file).read(100000)
            m        = re.search(r"(^|\/|\\)(\d\d\d\d\d\d)\.(qq_|qw_|)(\d+_\d\d)_",file)
            if m:
                o = m.group(2) + "__" + m.group(4)  #  inhaltliche Zuordnung bei Quittungen
            try:
                list1[o].append(file)
            except:
                list1[o] = [("%015u" % int(filesize)),file]

        list2 = []
        for id in list1:
            if len(list1[id]) > 2:
                list2.append( list1[id] )
        list2.sort(key=lambda x:x[0])
        for id in list2:
            for id1 in id:
                try:
                    print ( ("%16.6f" % (int(id1)/100000.0) ) )
                except:
                    print (id1)
            print ("")
            

#****************************************************

    def utf (self,pars):

        for file in glob.glob("*") + glob.glob("*/*"):
            if not os.path.isfile(file):
                continue
            if re.search(r"^(.*)\~$",file):
                continue
            filetyp = os.popen("file -i '" + file + "'").read()

            m = re.search(r"charset\=(\S+)",filetyp,re.DOTALL)
            if m:
                filecode = m.group(1)
                if '8bit' in filecode:
                    filecode = 'iso-8859-1'
                if not filecode == "utf-8" and ("iso" in filecode or "ascii" in filecode):
                    os.system("cp " + file + " " + file + "~")
                    print("iconv -f " + filecode + " -t utf-8 " + file + "\n")
                    os.system("iconv -f " + filecode + " -t utf-8 " + file +"~ > " + file)

#****************************************************

    def scanc (self,pars):
    
        self.colormode = "Color"
        self.scan(pars)

#****************************************************

    def scan (self,pars):
    
        try:
            scandir = pars[1]
        except:
            scandir = "/home/xxx01_scan"
            
        try:
            scandev = pars[2]
        except:
            scandev = "278"

        if not os.path.isdir(scandir):
            scandir = "."

        devnull = ""
        if scandir == "/home/01_scan":
            devnull = " > /dev/null"

        c1 = int(pars[0]) - 48   #  den Eingabeparameter normieren auf die Zifferntaste
        if c1 < 0:
            c1 = c1 + 48
            
        for sdev in [ 
            'escl:https://192.168.153.73:443',
            'escl:http://2a02:810d:9880:7fc2:82ce:62ff:fe5a:760c:8080',
            'escl:http://192.168.153.73:8080',
            'hpaio:/net/hp_colorlaserjet_mfp_m278-m281?ip=192.168.153.73'    #  find the scan device
                      ]:
            if not re.search(scandev,sdev):
                continue
            if not re.search(r"not +supported",os.popen("scanimage -d " + sdev + " / 2>&1").read()):
                scan_device = sdev
                break

        print ("SCAN DEVICE: " + scan_device)

        if c1 == 0:     #   leave the loop
            open("exit.txt","w").write("1\n")
            os.system("chmod 775 exit.txt")
            
        if c1 in [5,7]:  #  get the scan file name
            listdir = os.listdir(scandir)
            zaehler = 0
            for file in listdir:
                m = re.search(r"hpscan(\d+)\.pdf$",file)
                if m:
                    zaehler = max(zaehler,int(m.group(1)))
            zaehler = zaehler + 1
            datei   = "hpscan" + ("%03u" % (int(zaehler)-0))
            datei_1 = "hpscan" + ("%03u" % (int(zaehler)-1))
                
        if os.path.isfile(scandir+"/double_"+datei+".pdf"):
            os.unlink(scandir+"/double_"+datei+".pdf")
        if os.path.isfile(scandir+"/ocr_"+datei+".pdf"):
            os.unlink(scandir+"/ocr_"+datei+".pdf")
        if os.path.isfile(scandir+"/ocr_double_"+datei+".pdf"):
            os.unlink(scandir+"/ocr_double_"+datei+".pdf")
#        print("cd "+scandir +"; hp-scan --size=a4 -d " + scan_device +    #  the SCAN
#                  [""," --adf"][int((c1-5)/2)] + " --mode LineArt -o" + datei +".pdf " + devnull + "; chmod 775 " + datei +".pdf")
        os.system("cd "+scandir +"; hp-scan --size=a4 -d " + scan_device +    #  the SCAN
                  [""," --adf"][int((c1-5)/2)] + " --mode " + self.colormode + " -o" + datei +".pdf " + devnull + "; chmod 775 " + datei +".pdf")
        self.beep([0,100,500,100])
        
        self.scanconcat([datei_1+".pdf",datei+".pdf",scandir,c1])
            
#*****************************************************************************
            
    def scanconcat (self,pars):
    
        datei_1 = pars[0]
        datei   = pars[1]
        try:
            scandir = pars[2]
        except:
            scandir = "."
        try:
            c1 = pars[3]
        except:
            c1 = 7

        text = os.popen("cd " + scandir + "; pdftk " + datei_1 + " cat 99999999 xyz.pdf 2>&1").read()  #  Check whether double-paged document
#        print (text)
        m1   = re.search(r"input +PDF +has. +(\d+) +pages",text)                       #  is found
        if m1 and not os.path.isfile(scandir+"/"+datei):
            open(scandir+"/"+datei,"w").write("1\n")
        text = os.popen("cd " + scandir + "; pdftk " + datei + " cat 99999999 xyz.pdf 2>&1").read()
#        print (text)
        m    = re.search(r"input +PDF +has. +(\d+) +pages",text)
        
        if c1 == 5 and m1 and int(m1.group(1)) == 1:
            if os.path.isfile(scandir+"/append_"+datei_1):
                os.system("cd " + scandir + "; pdftk append_" + datei_1 + " " + datei +
                          " cat output append_" + datei)
            else:
                os.system("cd " + scandir + "; pdftk " + datei_1 + "  " + datei +
                          " cat output append_" + datei)

#        print (c1,m.group(1),m1.group(1))
        if c1 == 7 and m and m1 and abs(int(m.group(1))-int(m1.group(1))) < 2:  #  Page counts differs at most about 1
            catparameter        = [] 
            (seitenzahl,offset) = ( int(m.group(1))+int(m1.group(1)) , 1-max(0,int(m.group(1))-int(m1.group(1))) )  
            while (0 == 0):
#                print (catparameter)
                newpages = [ int(len(catparameter)/2 + offset), int(seitenzahl - len(catparameter)/2) ]
                if int(newpages[0]) > int(newpages[1]):
                    break
                if newpages[0] == newpages[1] or int(newpages[0]) == 0:
                    newpages = [ newpages[0] ]
                catparameter = catparameter + [str(newpages[0]),str(newpages[1])]
            os.system("cd " + scandir + "; pdftk " + datei_1 + "  " + datei + "  cat output xyztmp.pdf")
            os.system("cd " + scandir + "; pdftk xyztmp.pdf cat " + " ".join(catparameter) +
                      " output double_" + datei)
            os.unlink(scandir+"/xyztmp.pdf")

        print (scandir,datei)

        
#        self.scan_ocr(scandir,datei)
#        self.scan_ocr(scandir,"append_" + datei)
#        self.scan_ocr(scandir,"double_" + datei)

#****************************************************

    def xxscan_ocr (self,scandir,datei):

        
        if not os.path.isfile(scandir+"/"+datei + ".pdf"):
            return()
            
        os.system("cd "+scandir +"; cp " + datei + ".pdf " + datei + "_1.pdf; " +
                   "abbyyocr -adt  -rl German -ido -if " + datei + "_1.pdf -tet UTF8 -of " + datei + "_2.txt; ")

        if not os.path.isfile(scandir+"/"+datei+"_2.txt"):
            return()
            
        text = open(scandir+"/"+datei+"_2.txt").read()
        text = re.sub(r"ä","ae",text,99999999)
        text = re.sub(r"ö","oe",text,99999999)
        text = re.sub(r"ü","ue",text,99999999)
        text = re.sub(r"Ä","Ae",text,99999999)
        text = re.sub(r"Ö","Oe",text,99999999)
        text = re.sub(r"Ü","Ue",text,99999999)
        text = re.sub(r"ß","ss",text,99999999)
        text = re.sub(r"\n","\n"*1000,text,99999999)
        
        open(scandir+"/"+datei+"_2.txt","w").write(text)
        os.system("cd "+scandir +"; a2ps -B -1 -l 99999999 -o " + datei + "_2.ps " + datei + "_2.txt; " +
                  "ps2pdf " + datei + "_2.ps " + datei + "_2.pdf; " +
                  "pdftk " + datei + "_1.pdf " + datei + "_2.pdf cat output ocr_" + datei + ".pdf")
        print ("pdf file with ocr text appended ...")
        os.unlink(scandir+"/"+datei + "_1.pdf")
        os.unlink(scandir+"/"+datei + "_2.pdf")
        os.unlink(scandir+"/"+datei + "_2.txt")
        os.unlink(scandir+"/"+datei + "_2.ps")


#****************************************************

    def xxocr (self,pars):

        datei = pars[0] 
        if not os.path.isfile(datei):
            return()
        
        try:
            abbyy_par = pars[1]
        except:
            abbyy_par = " -adt -rl German "
        
            
        text0   = open(datei).read()
        streams = ""
        while (0 == 0):
            m = re.search(r"^(.*?\s)stream\s*(.*?)\s+endstream(.*)$",text0,re.DOTALL)
            if not m:
                break
            if len(m.group(3)) > 20:
                streams = streams + m.group(2)[0:30] + "\n"
            text0 = m.group(3)

        zaehler      = "000001"
        ocr_complete = True
        del_datei    = []

        while (0 == 0):
            datei1 = "__xx_yy_zz__" + zaehler
            os.system("pdftk " + datei + " cat " + zaehler + " output " + datei1 + ".pdf 2>&1")
            if not os.path.isfile(datei1+".pdf"):
                break
            text = os.popen("pdftotext " + datei1 + ".pdf 2>&1").read()
            print("DDDD",datei)
            if "ocr" in datei:
                text = ""
#            print(glob.glob("./no_pdftotext*"))
            if len(glob.glob("./no_pdftotext*")) > 0:
                text = ""
            if re.search(r"\* *\* *\* *O *C *R *S *C *A *N *\* *\* *\*",text):
                print ("take page " + str(int(zaehler)))
                del_datei.append(datei1+".pdf")
            else:
                print ("abbyyocr on page " + str(int(zaehler)))
                if not os.path.isfile(datei1 + "_watermark.txt"):
                    os.system("abbyyocr " + abbyypar + " -ido -if " + datei1 + ".pdf -tet UTF8 -of " + datei1 + "_watermark.txt")
                text = "\n* * * OCR SCAN * * *"+"\n" + open(datei1+"_watermark.txt").read() + "\n"
                text = re.sub(r"ä","ae",text,99999999)
                text = re.sub(r"ö","oe",text,99999999)
                text = re.sub(r"ü","ue",text,99999999)
                text = re.sub(r"Ä","Ae",text,99999999)
                text = re.sub(r"Ö","Oe",text,99999999)
                text = re.sub(r"Ü","Ue",text,99999999)
                text = re.sub(r"ß","ss",text,99999999)
                text = re.sub(r"\n","\n"*10000,text,99999999)
                open(datei1+"_watermark.txt","w").write(text)
                os.system("a2ps -B -1 -l 9999999 --borders=no -o " + datei1 + "_watermark.ps " + datei1 + "_watermark.txt")
                os.system("ps2pdf " + datei1 + "_watermark.ps " + datei1 + "_watermark.pdf")
                os.system("pdftk " + datei1 + ".pdf background " + datei1 + "_watermark.pdf output " + datei1 + "_ocr.pdf")
                os.unlink(datei1 + "_watermark.txt")
                os.unlink(datei1 + "_watermark.ps")
                os.unlink(datei1 + "_watermark.pdf")
                os.unlink(datei1 + ".pdf")
                ocr_complete = False
                del_datei.append(datei1+"_ocr.pdf")
            zaehler = "%06u" % (int(zaehler) + 1)

        if not ocr_complete:
            os.system("pdftk " + " ".join(del_datei) + " cat output " + datei)


        text = open(datei).read()   #  mark the additional streams, that they can be deleted again
        if not streams == "":
            streams = "STREAMS:" + "\n" + streams
        open(datei,"w").write(text+streams)


        for datei1 in del_datei:
            os.unlink(datei1)


#****************************************************

    def ocr_clear (self,pars):

        datei = pars[0] 
        if not os.path.isfile(datei):
            return()
            
        text0 = open(datei).read()
        m     = re.search(r"^(.*%%EOF.*?)STREAMS:(.*)$",text0,re.DOTALL)
        if not m:
            return()
            
        text0   = m.group(1)
        streams = m.group(2).split("\n")

        text1   = ""
        while (0 == 0):
            m = re.search(r"^(.*?\s)(stream\s*)(.*?)(\s+endstream)(.*)$",text0,re.DOTALL)
            if not m:
                text1 = text1 + text0
                break
            text1 = text1 + m.group(1) + m.group(2)
            if m.group(3)[0:30] in streams or len(m.group(3)) < 20:
                text1 = text1 + m.group(3)
            text1 = text1 + m.group(4)
            text0 = m.group(5)

        open("__xx__yy__zz__ww__.pdf","w").write(text1)
        os.system("pdftk __xx__yy__zz__ww__.pdf cat output " + datei)
        os.unlink("__xx__yy__zz__ww__.pdf")

#****************************************************

    def ocr_sync8 (self,pars):
    
        self.ocr8 = True
        self.abbyy = False
        self.ocr_sync(pars)

#****************************************************
 
    def ocr_sync (self,pars):
    
        import hashlib
        import base64
        
        try:
            self.ocr8
        except:
            self.ocr8 = False

        pdffiles = {}

        if not 'abbyy' in vars(self):
#            print os.popen("abbyyocr --help 2>&1").read()
            self.abbyy = re.search(r"FineReader Engine",os.popen("abbyyocr11 --help 2>&1").read())
            if not self.abbyy:
                self.abbyy = re.search(r"ABBYY CLI OCR 11 for Linux",os.popen("abbyyocr11 --help 2>&1").read())

        dir = pars[0]

        if os.path.isfile(dir + "/.gitignore"):
            print("wwww")
            text4 = open(dir+"/.gitignore").read()
            if "__no_ocr__" in text4:
                return()
        
        if os.path.isfile(dir+"/ocr.par"):
            ocr_par = re.sub(r"\n","",open(dir+"/ocr.par").readline(),99)
        else:
            ocr_par = "-adt -rl German -aeate "  #  -ascm  (abbyyocr 8)
            ocr_par = self.TESSERACT + " "  #   tesseract
        files = {}

        for item in os.listdir(dir):

            if os.path.isdir(dir+"/"+item):
                self.ocr_sync([dir+"/"+item])
            else:
                file = dir + "/" + item
#                if "_orig" in file or "orig_" in file or "_ORIG" in file or "ORIG_" in file:
#                    try:
#                        os.unlink(file)
#                    except:
#                        pass
                m    = re.search(r"^(.*)\.ocr$",file)
                if m:
                    m1 = None
                    try:
                        m1 = re.search(r"ID\: +([A-Za-z0-9\/\.\+-]+), PARAMETERS.?\: +(.*)",open(file).read())
                    except Exception as e:
                        print(item,e)
                    if m1:
                        files[m1.group(1)] = [file,m1.group(2).strip()]

        for item in os.listdir(dir):
        
            file = dir + "/" + item
#            if not re.search(r"^[a-zA-Z0-9\.\–\_]+$",file):
#                continue
                
            print (file)
            
            m    = re.search(r"^(.*)\.pdf$",file)
            if not re.search(r"^[\:\\\/a-zA-Z0-9\.\_-]+$",file):
                continue
            if "NOT_VALID" in file:
                continue


            if m:
                froot  = m.group(1)
                try:
                    text7  = open(froot+".pdf",errors="replace").read()
                except Exception as e:
                    print(file,e)
                ispdf7 = re.search(r"1\.[012345678]",text7[0:20])
                md5new = hashlib.md5(text7.encode(encoding="utf-8")).digest()
                md5new = str(base64.b64encode(md5new))
                md5new = re.sub(r"^(b\'|)(.*)\=\=\'?$","\\2",md5new)
                if md5new in pdffiles:
                    pdffiles[md5new].append(froot+".pdf")
                else:
                    pdffiles[md5new] = [froot+".pdf"]
                if md5new in files:
                    print (files[md5new])
                    print (file)
                    print (md5new)
                    print (ocr_par)
                    print (files[md5new][1])
                    if files[md5new][1] in (ocr_par.strip(),"pdftotext","ps2ascii"):
                        if not (froot + ".ocr") == files[md5new][0]:
                            if os.path.isfile(files[md5new][0]):
                                os.rename(files[md5new][0],froot+".ocr")
                        continue
                print (file)
                
                if "_ocr" in froot: 
                    os.system("cp " + froot + ".pdf " + froot + ".ocr")
                    ocr_par1 = "ocr"
                elif len(glob.glob(dir+"/no_pdftotext*")) > 0:
                    os.system("cp " + froot + ".pdf " + froot + ".ocr")
                    ocr_par1 = "ocr"
                elif len(glob.glob(dir+"/no_ocr*")) > 0:
                    print("continue")
                    continue
                elif ispdf7:
                    os.system("pdftotext -layout "+froot+".pdf; mv " + froot + ".txt " + froot + ".ocr")
                    ocr_par1 = "pdftotext"
                else:
                    os.system("cp " + froot + ".pdf " + froot + ".ocr")
                    ocr_par1 = "direct"
#                    os.system("ps2ascii "+froot+".pdf > "+froot+".ocr")
#                    ocr_par1 = "ps2ascii"
                print (ocr_par1)

#                ocr_par1 = "ocr"
#                os.system("cp " + froot + ".pdf " + froot + ".ocr")
                
                try:
                    ocr_text = open(froot+".ocr",errors="replace").read()
                except:
                    ocr_text = ""
                   
#                print(ocr_text)
#                print(ocr_par1) 
#                print(123)

                if 0 == 0 or not ocr_text == "" and ((not re.search(r"[A-Za-z].*[A-Za-z].*[A-Za-z]",ocr_text,re.DOTALL)
                      and not ocr_par1 == "direct") or "PDF Splitter" in ocr_text or ocr_par1 == "ocr"):
#                    print(1111)
#                if 0 == 0:
#                    print(887766)
                    if self.abbyy:
                        print ("   --->  OCR")
                        if os.path.isfile(froot+".ocm"):
                            os.system("cp " + froot + ".ocm " + froot + ".ocr")
                        else:
#                            os.system("abbyyocr " + ocr_par + " -if "+froot+".pdf -rkl -of "+froot+".ocr")

                            if self.ocr8:
                                ocr_par2 = re.sub(r"-aeate","-ascm",ocr_par)
                                ocr_par2 = re.sub(r"-adt ","",ocr_par2)
                                os.system("abbyyocr " + ocr_par2 + " -if "+froot+
                                          ".pdf -rkl -tpb -tet UTF8 -of "+froot+".ocr")
                            else:
                                os.system("abbyyocr11 " + ocr_par + " -if "+froot+
                                          ".pdf -f TextUnicodeDefaults -tel -tpb -tet UTF8 -rkl -trl -of "+froot+".ocr")


                        ocr_par1 = ocr_par
                        print("->",ocr_par1)
                    else:
#                        print(456)
                        ocr_par1 = ocr_par
                        os.system("pdftoppm -jpeg -r 300 " + froot + ".pdf t_e_s_s_e_r_a_c_t")
                        single_files = glob.glob("t_e_s_s_e_r_a_c_t*jpg")
                        single_files.sort()
                        text7 = ""
                        for single_file in single_files:
                            if os.path.isfile("eng.eng"):
                                print("English")
                                os.system("tesseract " + single_file + " " + single_file + " --oem 0 -l eng --psm 11 pdf")
                                ocr_par1 = re.sub(r"deu","eng",self.TESSERACT)
                            else:
                                os.system("tesseract " + single_file + " " + single_file + " --oem 0 -l deu --psm 11 pdf")
                                ocr_par1 = self.TESSERACT
                            os.system("pdftotext -layout " + single_file + ".pdf")
#                            if not text7 == "":
#                                text7 = text7 + ""
                            os.unlink(single_file)
                            os.unlink(single_file+".pdf")
                            try:
                                text7 = text7 + open(single_file+".txt").read()
                                os.unlink(single_file+".txt")
                            except:
                                pass
                        open(froot+".ocr","w").write(text7)
#                        os.remove(froot+".ocr")
#                        return()
                if self.ocr8:
                    o = "8"
                else:
                    o = ""
                o = ""
                open(froot+".ocr","a").write("\n\n\nID: "+md5new+", PARAMETERS" + o + ": " + ocr_par1 + "\n")
                        
        for item in os.listdir(dir):
        
            file = dir + "/" + item
            m    = re.search(r"^(.*)\.ocr$",file)
            if m:
                froot  = m.group(1)
                if not os.path.isfile(froot+".pdf"):
                    os.remove(file)
                    
        for md5key in pdffiles:
            if len(pdffiles[md5key]) > 1:
                print("doublette: --------------------------")
                for file in pdffiles[md5key]:
                    print("doublette: " + file)
                print("")
                

#****************************************************

    def beep (self,intervals):

        print ("beep")
        for interval in intervals:
            time.sleep(interval*0.001)
            os.system("echo '\a'")


#****************************************************

    def sms (self,pars):
    
        import serial
        phone = serial.Serial("/dev/ttyUSB0",460800,timeout=5)
#        phone.write("ATE0\r")
#        phone.write("AT S7=45 S0=0 L1 V1 X0 E1 Q0\r")
        phone.write("ATE0X0V1S7=45S0=0Q0\r")
        print (phone.readline())
        print (phone.readline())
        time.sleep(0.5)
#        phone.write("at+chup")
#        phone.readline()
#        time.sleep(0.5)
#        phone.write("at+CPMS=\"SM\"")
#        time.sleep(0.5)
        phone.write("at+cpin=\"2133\"")
        print (phone.readline())
        print (phone.readline())
        time.sleep(0.5)
        

#      $o = _mc($modem,$cr,"at+CPMS=\"SM\"");
#      print $o;
#      last if ($o !~ /ERROR/);
#      print _mc($modem,$cr,"at+cpin=\"2121\"");
#   }
##   print _mc($modem,$cr,"at+cmee=1");
#   print _mc($modem,$cr,"at+csca=\"01770610000\"");
##   print _mc($modem,$cr,"at+cmgf=?");
#   print _mc($modem,$cr,"at+cmgf=1");
#
#   foreach $o (split(/,/,$nr)) {
#      print _mc($modem,$cr,"at+cmgs=\"$o\"");
#      print _mc($modem,"",$text);
#      $modem->atsend(chr(26));
#      print $text . "\n";
#      $modem->atsend(chr(27));
#      sleep 5;
#      print _mc($modem,$cr,"at+chup");
#   }
#   print _mc($modem,$cr,"atz");
#
#   return(1);

#****************************************************

    def md_pptx(self,pars):

        from pptx import Presentation
        from pptx.util import Inches, Pt
#        from pptx.dml.color import RGBColor
#        from pptx.enum.dml import MSO_THEME_COLOR
        from pptx.enum.text import PP_ALIGN
    
        try:       
            def textbox(t1,t2,t3):
                t_slide = prs.slides.add_slide(title_slide_layout)
                t = t_slide.placeholders[0]
                st = t_slide.placeholders[1]
                j = True
                while j:
                    if md_text[i+1].isspace() and i < len(md_text) - 1:
                        del md_text[i+1]
                    else:
                        j = False                        
                if h_level == 1:
                    t1 = l.strip('\n')
                    if l.startswith('#'): 
                        t1 = t1[h_level:]
                    t.text = t1
                    t_slide.placeholders[0].text_frame.paragraphs[0].font.size = Pt(44)
#                    t_slide.placeholders[0].text_frame.paragraphs[0].font.color.rgb = RGBColor(5,65,150)
                    latest_t = t1
                    pht = t1 
                elif h_level == 2:
                    t2 = l.strip('\n')
                    if l.startswith('#'):
                        t2 = t2[h_level:]
                    t.text = t1
                    t_slide.placeholders[0].text_frame.paragraphs[0].font.size = Pt(44)
#                    t_slide.placeholders[0].text_frame.paragraphs[0].font.color.rgb = RGBColor(5,65,150)
                    st.text = t2
                    latest_t = t2
                    pht = t2
                elif h_level == 3:
                    t3 = l[h_level:].strip('\n')
                    t.text = t2
                    st.text = t3
                    latest_t = t3
                    pht = t3 
                return (t1,t2,t3,pht,latest_t)    

            def new_slide():
                l_slide = prs.slides.add_slide(line_slide_layout) 
                l_slide.placeholders[0].text = pht
                tf = l_slide.shapes[1].text_frame
                tf.word_wrap = True
                p = tf.paragraphs[0]
                return(tf,p,l_slide)

            def emphase(lc,p,r):        
                str_cont = True
                while str_cont:
                    l_content = re.findall(r'\_\_\_.*?\_\_\_|\_\_.*?\_\_|\_.*?\_|\*\*\*.*?\*\*\*|\*\*.*?\*\*|\*.*?\*|\`.*?\`|\[.*?\]\(.*?\)', lc)
                    if l_content:                                                       #  inline bold / italic / code / link conditions
                        l_part = lc.partition(l_content[0])
                        r.text = l_part[0]
                        lp2 = l_part[2]
                        r = p.add_run()
                        r.text = l_part[1]
                        if r.text.startswith('***') or r.text.startswith('___'):
                            r.font.bold = True
                            r.font.italic = True
                            r.text = r.text[3:-3]
                        elif r.text.startswith('**') or r.text.startswith('__'):
                            r.font.bold = True
                            r.text = r.text[2:-2]
                        elif r.text.startswith('*') or r.text.startswith('_'):
                            r.font.italic = True
                            r.text = r.text[1:-1]
                        elif r.text.startswith('`'):
                            r.font.name = 'Consolas'
                            r.font.size = Pt(15)
                            r.text = r.text[1:-1]
                        elif r.text.startswith('['):
                            r.text = r.text.split('(')[0][1:-1]
                            hlink = r.hyperlink
                            hlink.address = l_part[1].split('(')[1][0:-1]
                            lp2 =re.sub('\(.*?\)','', lp2)
                        r = p.add_run()
                        r.text = lp2
                        lc = r.text
                    else:
                        str_cont = False

            if len(pars) < 2:
                print ('\nmd_pptx: : missing parameter')
                return
                        
            md_file = pars[0]
            with open(md_file) as f:
                md_text = f.readlines()
            md_text.append('\n')
            md_text = [l.expandtabs(4) for l in md_text] 
            
            if len(pars) == 3:
                sf = pars[2]
                prs = Presentation('template.pptx')
            else:
                prs = Presentation()
                
            title_slide_layout = prs.slide_layouts[0]
            line_slide_layout = prs.slide_layouts[1]
            
            t1 = ''
            t2 = ''
            t3 = ''
            md_list = False
            md_code = False
            new_head = False
            md_tab = 0
            t_coln = 0
            t_cola = ''
            t_rown = 1
            t_cell = []
            latest_t = ''
            h_level = 0
            p_level = 0
            header = False
            md_break = False
            
            for i, l in enumerate(md_text):
                if i < len(md_text) - 1 and (header or l.startswith('#') or re.match('^\=+\n',md_text[i+1]) or re.match('^-+\n',md_text[i+1])):
                    header = True 
                    
                    if l[0] == '#' and l[1] != '#' or re.match('^\=+\n',md_text[i+1]):        # headers                   
                        if md_text[i+1].startswith('='):
                            md_text[i+1] = '\n'
                        h_level = 1
                        t1,t2,t3,pht,latest_t = textbox(t1,t2,t3)
                        new_head = True
                        if md_text[i+1].startswith('#') or re.match('^\=+\n|^-+\n',md_text[i+2]):
                            pass
                        else:
                            tf,p,l_slide = new_slide() 
                    elif l[0:2] == '##' and l[2] != '#' or re.match('^-+\n',md_text[i+1]):
                        if md_text[i+1].startswith('-'):
                            md_text[i+1] = '\n'
                        h_level = 2
                        t1,t2,t3,pht,latest_t = textbox(t1,t2,t3)
                        new_head = True
                        if md_text[i+1].startswith('#') or re.match('^\=+\n|^-+\n',md_text[i+2]): 
                            pass
                        else:
                            tf,p,l_slide = new_slide()                        
                    elif l[0:3] == '###' and l[3] != '#':
                        h_level = 3
                        t1,t2,t3,pht,latest_t = textbox(t1,t2,t3)
                        new_head = True
                        if md_text[i+1].startswith('#') or re.match('^\=+\n|^-+\n',md_text[i+2]):
                            pass
                        else:
                            tf,p,l_slide = new_slide()
                        
                    elif not l.isspace() and l.startswith('![',0) and not md_list:               # image
                        s = l.split(']')
                        m = re.search(r'.*\(.*?\)', s[1])
                        if m:
                            img_path = m.group(0)[1:-1]
                        l_slide.shapes[1].left = -10000000
                        top = Inches(1.7)
                        left = Inches(0.6)
                        height = Inches(4.9)
                        pic = l_slide.shapes.add_picture(img_path, left, top, height = height)
                        top = Inches(6.7)
                        width = Inches(8)
                        height = Inches(0.5)
                        txBox = l_slide.shapes.add_textbox(left, top, width, height)
                        tf = txBox.text_frame
                        m = re.search(r'.*\[.*?\]', l)
                        if m:
                            tf.text = m.group(0)[2:-1]
                            p = tf.paragraphs[0]
                            p.font.italic = True
    
                    elif not l.isspace() and md_tab == 1 and not md_list:                       # table
                        t_coln = l.count('|') + 1
                        cell = map(str.strip, l.split('|'))
                        t_cell = t_cell + cell
                        md_tab = 2
                    elif md_tab == 2:
                        t_cola = map(str.strip, l.split('|'))
                        i = 0
                        while i < t_coln:
                            if t_cola[i].startswith(':') and not t_cola[i].endswith(':'):
                                t_cola[i] = PP_ALIGN.LEFT
                            elif t_cola[i].endswith(':') and not t_cola[i].startswith(':'):
                                t_cola[i] = PP_ALIGN.RIGHT
                            elif t_cola[i].endswith(':') and t_cola[i].startswith(':'):
                               t_cola[i] = PP_ALIGN.CENTER 
                            else:
                                t_cola[i] = None
                            i = i + 1
                        md_tab = 3
                    elif md_tab == 3 and l.count('|') == t_coln - 1:
                        t_rown = t_rown + 1
                        cell = map(str.strip, l.split('|'))
                        t_cell = t_cell + cell
                        if md_text[i+1].count('|') != t_coln - 1:
                            l_slide.shapes[1].left = -10000000
                            top = Inches(1.7)
                            left = Inches(0.6)
                            height = Inches(0.31 * t_rown)
                            width = Inches(8.8)
                            tab = l_slide.shapes.add_table(t_rown, t_coln, left, top, width, height).table 
                            i = 0
                            row = 0
                            while i < len(t_cell):
                                tab.cell(row,i%t_coln).text = t_cell[i]
                                tab.cell(row,i%t_coln).text_frame.paragraphs[0].alignment = t_cola[i%t_coln]
                                if t_cell[i] != '':
                                    p = tab.cell(row,i%t_coln).text_frame.paragraphs[0]
                                    r = tab.cell(row,i%t_coln).text_frame.paragraphs[0].runs[0]
                                    emphase(t_cell[i],p,r) 
                                i= i + 1
                                if i%t_coln == 0:
                                    row = row + 1
                            md_tab = 0
                            t_coln = 0
                            t_cola = ''
                            t_rown = 1
                            t_cell = []
                                                     
                    elif not l.isspace() and re.match('^#{7,}\n', l):                           # add slide
                        l_slide = prs.slides.add_slide(line_slide_layout) 
                        l_slide.shapes[0].text = latest_t                        
                        tf = l_slide.shapes[1].text_frame
                        tf.word_wrap = True
                        p = tf.paragraphs[0]
                    
                    elif (md_text[i-1].isspace() or new_head) and l[:4].isspace() and not md_list or md_code: # code block
                        new_head = False
                        r = p.add_run()
                        r.font.name = 'Consolas'
                        r.font.size = Pt(15)
                        l = l.rstrip()
                        if not md_text[i+1].isspace():
                            l = l + '\n'
                        r.text = l[2:]
                        md_code = True
                        if md_text[i+1].isspace() or not md_text[i+1][:4].isspace():
                            md_code = False
                        
                    elif not l.isspace():                                                       # list
                        if p_level == 4:
                            p.level = 4
                            if re.match(' *?[-|\+|\*] ', l):
                                s = re.split('-|\+|\*', l, 1)
                                l = s[1]                        
                        elif p_level == 3:
                            p.level = 3
                            if re.match(' *?[-|\+|\*] ', l):
                                s = re.split('-|\+|\*', l, 1)
                                l = s[1]
                        elif p_level == 2:
                            p.level = 2
                            if re.match(' *?[-|\+|\*] ', l):
                                s = re.split('-|\+|\*', l, 1)
                                l = s[1]
                            if l.endswith('  \n'):
                                p_level = 5
#                        elif p_level == 1:
#                            p.level = 1
#                            s = re.split('^[0-9]+\.', l, 1)
#                            l = s[1]
                        elif p_level == 0:
                            p.level = 0
                            
                        r = p.add_run()
    
                        if not l.endswith('  \n'):                                   # line break condition
                            l = l.rstrip() + ' ' 
                            md_break = False
                        elif p.level > 0:
                            md_break = True
                            l = l.rstrip('\n') 
                            
                        l = re.sub(' +', ' ', l)
                        r.text = l
                        lc = l
                        emphase(lc,p,r)
                        
                    if md_text[i+1].isspace() and not md_text[i].isspace():
                        if not md_text[i].startswith('!') and not re.match('^#{7,}\n', l):
                            p = tf.add_paragraph()
                            if not md_list:
                                p = tf.add_paragraph()
                                
                    if  md_text[i+1].count('|') != 0 and md_text[i+1].count('|') == md_text[i+2].count('|') and md_tab == 0 and re.match('^(?!\s)', md_text[i+1]):
                        md_tab = 1
                        
#                    elif re.match(' {0,3}?^[0-9]+\. ', md_text[i+1]):
#                        if not new_head and not md_text[i].isspace():
#                            p = tf.add_paragraph()
#                        p_level = 1
#                        md_list = True
                    if re.match(' {0,3}?[-|\+|\*] ', md_text[i+1]):
                        if not new_head and not md_text[i].isspace():
                            p = tf.add_paragraph()
                        p_level = 2
                        md_list = True
                    elif re.match(' {4,7}?[-|\+|\*] ', md_text[i+1]) and md_list:
                        if not new_head and not md_text[i].isspace():
                            p = tf.add_paragraph()      
                        p_level = 3
                    elif re.match(' {8,11}?[-|\+|\*] ', md_text[i+1]) and md_list:
                        if not new_head and not md_text[i].isspace():
                            p = tf.add_paragraph()      
                        p_level = 4 
                        
                    if p_level == 5 and not md_text[i+1].isspace() and md_break:
                        p = tf.add_paragraph()
                        p.level = 5
                        
                    if not md_text[i+1].isspace() and not md_text[i+1].startswith('![',0) and not md_text[i+1][:4].isspace():
                        new_head = False

                    if md_list and md_text[i].isspace():
                        s = md_text[i+1].lstrip()
                        if re.match('^(?!-|\+|\*).*', s) or md_text[i+1].isspace():
#                        if re.match('^(?!-|\+|\*|^[0-9]+\.).*', s) or md_text[i+1].isspace():
                            md_list = False
                            p_level = 0
                            p = tf.add_paragraph()
                        
                elif i < len(md_text) - 1 and (md_text[i+1].startswith('#') or re.match('^=+\n',md_text[i+2]) or re.match('^-+\n',md_text[i+2])):
                    header = True
                
            prs.save(pars[1])
               
        except Exception as inst:
            print ('\nmd_pptx: ' + str(inst))
            
#****************************************************  

    def docx_ins(self,pars):

        """
        This module enables manipulations in an existing docx file via a command line.
        
        prerequisite:
        -------------
        pip install python-docx
        
        Placeholder:
        ------------
        A text string can be inserted at a placeholder within an arbitrary text passage of a docx file.
        The placeholder can be entered in a markdown file, which is converted then to a docx file,
        or can be entered in a docx file. 
        It can also be an already existing text in the file. There can be any number of placeholders.
        
        The syntax of a placeholder is {<placeholder text>}. Formatting the placeholder, e.g. bold, 
        affects the insertion.
        
        Command:
        --------
            
            yc docx_ins <docx file> <placeholder text> "<text string>"
                
        to insert <text string> at the placeholder.
        
        Before the first manipulation of <docx file> it is saved as <docx file>_save
        """  
    
        from docx import Document   

        try:
            if len(pars) < 3:
                print ('\ninsert_docx: missing parameter')
                return

            w_file = pars[0]
            document = Document(w_file)
            if not os.path.isfile(w_file + '_save'):
                document.save(w_file + '_save')
            ins_mark = pars[1].decode(sys.getfilesystemencoding())
            new_value = pars[2].decode(sys.getfilesystemencoding())
    
            """
            If the placeholder is edited in the docx file, more than one run objects may be created.
            In this case, the split string has to be gathered in only one run object.    
            """         
            for p in document.paragraphs:
                more_runs = False
                for i, r in enumerate(p.runs):
                    if more_runs:
                        if r.text.find('}', 0) != -1 and not '{' in r.text[0:r.text.find('}', 0)]:
                            more_runs = False
                        r.text = p.runs[i-1].text + r.text
                        p.runs[i-1].clear()
                    if '{' + ins_mark + '}' in r.text: 
                        r.text = r.text.replace('{' + ins_mark + '}', new_value,1)
                        document.save(w_file)
                        return
                    if r.text.rfind('{', 0) != -1 and not '}' in r.text[r.text.rfind('{', 0):]:
                        more_runs = True
                                
        except Exception as inst:
            if 'Package not found at' in '%s' % (inst.args[0]):
                print ('\ninsert_docx: nonexistent docx file')
            elif inst.args[0] == 13:
                print ('\ninsert_docx: open docx file')
            else:
                print ('\ninsert_docx: ' + str(inst))
                          
#****************************************************    

    def yt  (self,pars):
    
        ytxx = pars[-1]

#        print('youtube-dl https://youtube.com/results?search_query=\"' + "+".join(pars[:-1]) + '\" | tee ytxx')
#        os.system('youtube-dl -e --get-id https://youtube.com/results?search_query=\"' + "+".join(pars[:-1]) + '\" | tee ytxx')
        print('youtube-dl -e --get-id ytsearchall:' + "+".join(pars[:-1]) + ' | tee ytxx')
        os.system('youtube-dl -e --get-id ytsearchall:' + "+".join(pars[:-1]) + ' | tee ytxx')
        time.sleep(1)
        line1 = ""
        text  = ""
        for line in open("ytxx").read().split("\n"):
            if line1 == "":
                line1 = line
            else:
                text  = text + "# youtube-dl " + line + "  # " + line1 + "\n"
                line1 = ""
        open(ytxx,"a").write("\n#=========================\n#  " + 
                             " ".join(pars[:-1]) + 
                             "\n#=========================\n\n" + text)
        os.system("rm ytxx")
#        os.system("joe " + ytxx)
                        


#****************************************************    


if __name__ == "__main__":
    print (sys.argv)
    Util.__dict__[sys.argv[1]](Util(),sys.argv[2:])
bypass 1.0, Devloped By El Moujahidin (the source has been moved and devloped)
Email: contact@elmoujehidin.net bypass 1.0, Devloped By El Moujahidin (the source has been moved and devloped) Email: contact@elmoujehidin.net