import csv import os from sys import getsizeof from numpy import array from pandas import DataFrame, to_datetime from versuche.helper import normalice_header def detect_tabnum(filename, tabstr, encoding='utf-8'): filename = os.path.normpath(filename) tabstr = tabstr.lower() #Einlesen with open(filename, 'r', encoding=encoding) as inFile: reader = csv.reader(inFile, delimiter='\t') counter = 0 for row in reader: row = [r.lower() for r in row] if any(tabstr in mystring for mystring in row): if 'plain' in row: return row[1] counter += 1 if counter > 100: return False def str2float(str): try: str = str.replace(',', '.') return float(str) except: return None def read_geosys(filename, table, pkdata='001', encoding='utf-8', to_si=False, debug=False): ''' :param filename: File-Name :param table: Table-Number :param pkdata: Table-Number of speciment definitions, default: 1 :param encoding: Encoding, default: utf-8 :param debug: debug-mode :return: ''' #print('start read GEOSYS') filename = os.path.normpath(filename) try: dictOut = {} dictOut['durch'] = 0 dictOut['hoehe'] = 0 #--------------------------------------------------------------------- #Daten einlesen und umwandeln #--------------------------------------------------------------------- data = [] zuordnung = [] #Einlesen with open(filename, 'r', encoding=encoding) as inFile: reader = csv.reader(inFile, delimiter='\t') try: for row in reader: if len(row) > 2: data.append(row) except: pass if debug: print('Anz. Datensätze: ', str(len(data)), getsizeof(data)) #aufräumen ##Datenstruktur anlegen data_clean = {} data_clean['head'] = [] data_clean['data'] = [] for idx, d in enumerate(data): try: v = d[0][0:3] if v in pkdata: data_clean['head'].append(d) if v in table: data_clean['data'].append(d) except: pass # aufräumen data = data_clean del (data_clean) if debug: print('data_clean fin') ## Header aufbereiten for idx, row in enumerate(data['head']): #print(idx,row) if idx == 0: id_durchmesser = None id_hoehe = None id_name = None for idx_name, name in enumerate(row): if name in [ r'Probekörberdurchmesser', r'Diameter of specimen', 'PK-Durchmesser', 'Probekörper-Durchmesser' ]: id_durchmesser = idx_name elif name in [r'Probekörperbezeichnung']: id_name = idx_name elif name in ['Probekörperhöhe', 'Gap length', 'PK-Höhe']: id_hoehe = idx_name if debug: print(id_durchmesser, id_hoehe, id_name) elif idx == 1: unit_durch = None unit_hoehe = None try: unit_durch = row[id_durchmesser] unit_hoehe = row[id_hoehe] except: pass elif idx == 2: durchmesser = None hoehe = None name = None try: durchmesser = str2float(row[id_durchmesser]) hoehe = str2float(row[id_hoehe]) name = row[id_name] except: pass header = { 'd': durchmesser, 'h': hoehe, 'name': name, 'unit_h': unit_hoehe, 'unit_d': unit_durch } if debug: print('header\n', header) ## Daten in Pandas DataFrame umwandeln if debug: print('daten umwandel') temp = [] for idx, row in enumerate(data['data']): if idx == 0: if debug: print('head') data_head = [] for idx_name, name in enumerate(row): if idx_name <= 1: continue data_head.append(name) elif idx == 1: data_units = [] for idx_name, name in enumerate(row): if idx_name <= 1: continue data_units.append(name) else: t = [] for idx_col, value in enumerate(row): if idx_col <= 1: continue else: t.append(str2float(value)) temp.append(t) data = array(temp) if debug: print(data_head, data_units) ## Bezeichnungen der Daten normalisieren # Pandas DataFrame erstellen data = DataFrame(data=data, columns=data_head) if debug: print(data.head()) #data = data.set_index('t') #data._units = data_units # Zykelnzähler anpassen if 'N' in data.columns: data['N'] = data['N'].astype(int) # Daten sortieren #data.sort_index() # Index normieren #data.index = data.index - data.index[0] return header, data except: print('Fehler beim lesen') raise