# 1. "ß" gegen "SS" tauschen inhalt = inhalt.replace("ß","SS") # 2. Ersetzung franz. Akzente inhalt = inhalt.replace("é","E") inhalt = inhalt.replace("è","E") inhalt = inhalt.replace("ê","E") inhalt = inhalt.replace("É","E") inhalt = inhalt.replace("È","E") inhalt = inhalt.replace("Ê","E") inhalt = inhalt.replace("á","A") inhalt = inhalt.replace("à","A") inhalt = inhalt.replace("â","A") inhalt = inhalt.replace("Á","A") inhalt = inhalt.replace("À","A") inhalt = inhalt.replace("Â","A") inhalt = inhalt.replace("ó","O") inhalt = inhalt.replace("ò","O") inhalt = inhalt.replace("ô","O") inhalt = inhalt.replace("Ó","O") inhalt = inhalt.replace("Ò","O") inhalt = inhalt.replace("Ô","O") inhalt = inhalt.replace("ú","U") inhalt = inhalt.replace("ù","U") inhalt = inhalt.replace("û","U") inhalt = inhalt.replace("Ú","U") inhalt = inhalt.replace("Ù","U") inhalt = inhalt.replace("Û","U") inhalt = inhalt.replace("ć","L") inhalt = inhalt.replace("Ć","L") inhalt = inhalt.replace("ł","L") inhalt = inhalt.replace("Ł","L") inhalt = inhalt.replace("ó","O") inhalt = inhalt.replace("Ó","O") inhalt = inhalt.replace("ń","N") inhalt = inhalt.replace("Ń","N") inhalt = inhalt.replace("ř","R") inhalt = inhalt.replace("Ř","R") inhalt = inhalt.replace("ŕ","R") inhalt = inhalt.replace("Ŕ","R") inhalt = inhalt.replace("ě","E") inhalt = inhalt.replace("Ĕ","E") inhalt = inhalt.replace("ě","E") inhalt = inhalt.replace("Ě","E") inhalt = inhalt.replace("š","S") inhalt = inhalt.replace("Š","S") inhalt = inhalt.replace("ś","S") inhalt = inhalt.replace("Ś","S") inhalt = inhalt.replace("ž","Z") inhalt = inhalt.replace("Ž","Z") inhalt = inhalt.replace("ź","Z") inhalt = inhalt.replace("Ź","Z") # 3. Wandle alle Buchstaben in Großbuchstaben um inhalt = inhalt.upper(); # 4. Esetzung von Umlauten in Vokale inhalt = inhalt.replace("Ä","A") inhalt = inhalt.replace("Ö","O") inhalt = inhalt.replace("Ü","U") inhalt = inhalt.replace("Ë", "E") inhalt = inhalt.replace("AE","A") inhalt = inhalt.replace("OE","O") inhalt = inhalt.replace("UE","U") # 5.-8. Vereinheitlichung der Diphthonge, Ersetzung fuer langes "i" inhalt = inhalt.replace("EI","EI") inhalt = inhalt.replace("AI","EI") inhalt = inhalt.replace("EY","EI") inhalt = inhalt.replace("AY","EI") inhalt = inhalt.replace("IE","I") inhalt = inhalt.replace("TH","T") inhalt = inhalt.replace("CK","K") # 9. Ersetzung von -,.'`´ durch blank # inhalt = inhalt.replace(" "," ") inhalt = inhalt.replace("-"," ") inhalt = inhalt.replace("."," ") inhalt = inhalt.replace(","," ") inhalt = inhalt.replace(";"," ") inhalt = inhalt.replace("'"," ") inhalt = inhalt.replace("´"," ") inhalt = inhalt.replace("`"," ") inhalt = inhalt.replace("("," ") inhalt = inhalt.replace(")"," ") inhalt = inhalt.replace("/"," ") inhalt = inhalt.replace("\\"," ") # 10. fuehrende Namenszusaetze/Artikel kuerzen inhalt = re.sub(r'\bALT ', 'A ', inhalt) inhalt = re.sub(r'\bALTE ', 'A ', inhalt) inhalt = re.sub(r'\bALTEN ', 'A ', inhalt) inhalt = re.sub(r'\bALTEM ', 'A ', inhalt) inhalt = re.sub(r'\bALTER ', 'A ', inhalt) inhalt = re.sub(r'\bAN ', 'A ', inhalt) inhalt = re.sub(r'\bAM ', 'A ', inhalt) inhalt = re.sub(r'\bAUF M ', 'A D ', inhalt) inhalt = re.sub(r'\bAUF N ', 'A D ', inhalt) inhalt = re.sub(r'\bAUF ', 'A ', inhalt) inhalt = re.sub(r'\bBEI ', 'B ', inhalt) inhalt = re.sub(r'\bBEIM ', 'B ', inhalt) inhalt = re.sub(r'\bDER ', 'D ', inhalt) inhalt = re.sub(r'\bDIE ', 'D ', inhalt) inhalt = re.sub(r'\bDAS ', 'D ', inhalt) inhalt = re.sub(r'\bDEM ', 'D ', inhalt) inhalt = re.sub(r'\bDEN ', 'D ', inhalt) inhalt = re.sub(r'\bGEMEINDE ', 'GEM ', inhalt) inhalt = re.sub(r'\bIN ', 'I ', inhalt) inhalt = re.sub(r'\bIM ', 'I ', inhalt) inhalt = re.sub(r'\bKREIS ', 'KREIS ', inhalt) inhalt = re.sub(r'\bSANKT ', 'ST ', inhalt) inhalt = re.sub(r'\bUNTER ', 'U ', inhalt) inhalt = re.sub(r'\bVOM ', 'V ', inhalt) inhalt = re.sub(r'\bVON ', 'V ', inhalt) inhalt = re.sub(r'\bZU ', 'Z ', inhalt) inhalt = re.sub(r'\bZUM ', 'Z ', inhalt) inhalt = re.sub(r'\bZUR ', 'Z ', inhalt) # 11. Vervollständigen von abgekürzten Namen und Vereinheitlichung verschiedener Schreibweisen inhalt = re.sub(r'CONRAD', 'KONRAD', inhalt) inhalt = re.sub(r'\bALEX\b', 'ALEXANDER', inhalt) inhalt = re.sub(r'\bEV\b', 'EVANGELISCHE', inhalt) inhalt = re.sub(r'\bEVGL\b', 'EVANGELISCHE', inhalt) inhalt = re.sub(r'\bFRH\b', 'FREIHERR', inhalt) inhalt = re.sub(r'\bFRHR\b', 'FREIHERR', inhalt) inhalt = re.sub(r'\bFREIH\b', 'FREIHERR', inhalt) inhalt = re.sub(r'\bGEBR\b', 'GEBRUDER', inhalt) inhalt = re.sub(r'\bGERH\b', 'GERHARD', inhalt) inhalt = re.sub(r'\bGESCHW\b', 'GESCHWISTER', inhalt) inhalt = re.sub(r'\bGOTTFR\b', 'GOTTFRIED', inhalt) inhalt = re.sub(r'\bHEINR\b', 'HEINRICH', inhalt) inhalt = re.sub(r'\bKARD\b', 'KARDINAL', inhalt) inhalt = re.sub(r'\bLUDW\b', 'LUDWIG', inhalt) inhalt = re.sub(r'\bMAT\b', 'MATIAS', inhalt) inhalt = re.sub(r'\bMAX\b', 'MAXIMILIAN', inhalt) inhalt = re.sub(r'\bPF\b', 'PFARRER', inhalt) inhalt = re.sub(r'\bPROF\b', 'PROFESSOR', inhalt) inhalt = re.sub(r'\bRICH\b', 'RICHARD', inhalt) inhalt = re.sub(r'\bWILH\b', 'WILH', inhalt) inhalt = re.sub(r'\bDR\b', 'DOKTOR', inhalt) inhalt = re.sub(r'\bBGM\b', 'BURGERMEISTER', inhalt) inhalt = re.sub(r'\bBURGERM\b', 'BURGERMEISTER', inhalt) inhalt = re.sub(r'\bOBERBURGERM\b', 'BURGERMEISTER', inhalt) inhalt = re.sub(r'\bALTBURGERME\b', 'BURGERMEISTER', inhalt) inhalt = re.sub(r'\bOBERBURGERMEISTER\b', 'BURGERMEISTER', inhalt) inhalt = re.sub(r'\bALTBURGERMEISTER\b', 'BURGERMEISTER', inhalt) # 12. Ersetzen von unerwünschten Namenszusätzen durch blank inhalt = re.sub(r'\bLANDESHAUPTSTADT\b', ' ', inhalt) inhalt = re.sub(r'\bHAUPTSTADT\b', ' ', inhalt) inhalt = re.sub(r'\bSTADT\b', ' ', inhalt) # 13. Vervollständigung von abgekürzten Endungen inhalt = re.sub(r'STR\b', 'STRASE', inhalt) inhalt = re.sub(r'PL\b', 'PLATZ', inhalt) inhalt = re.sub(r'SIDL\b', 'SIDLUNG', inhalt) inhalt = re.sub(r'\bRHEINL\b', 'RHEINLAND', inhalt) inhalt = re.sub(r'\bWESTF\b', 'WESTFALEN', inhalt) inhalt = re.sub(r'\bNRW\b', 'NORDRHEIN WESTFALEN', inhalt) inhalt = re.sub(r'\bSH\b', 'SCHLESWEG HOLSTEIN', inhalt) inhalt = re.sub(r'\bMV\b', 'MEKLENBURG VORPOMMERN', inhalt) inhalt = re.sub(r'\bHH\b', 'HAMBURG', inhalt) inhalt = re.sub(r'\bHB\b', 'BREMEN', inhalt) inhalt = re.sub(r'\bNI\b', 'NIDERSACHSEN', inhalt) inhalt = re.sub(r'\bBB\b', 'BRANDENBURG', inhalt) inhalt = re.sub(r'\bBE\b', 'BERLIN', inhalt) inhalt = re.sub(r'\bNW\b', 'NORDRHEIN WESTFALEN', inhalt) inhalt = re.sub(r'\bHE\b', 'HESSEN', inhalt) inhalt = re.sub(r'\bSN\b', 'SACHSEN', inhalt) inhalt = re.sub(r'\bRP\b', 'RHEINLAND PFALZ', inhalt) inhalt = re.sub(r'\bSL\b', 'SAARLAND', inhalt) inhalt = re.sub(r'\bBW\b', 'BADEN WURTEMBERG', inhalt) inhalt = re.sub(r'\bBY\b', 'BEIRN', inhalt) # 14. Leerzeichen entfernen inhalt = inhalt.replace(" ","") # 15. Doppelte Buchstaben vereinzeln inhalt = re.sub(r"(.)\1+", r"\1", inhalt)