from numpy import array from pandas import to_datetime, DataFrame import csv import os from sys import getsizeof from versuche.helper import normalice_header def detect_tabnum(filename, tabstr,encoding='utf-8'): filename = os.path.normpath(filename) tabstr = tabstr.lower() #Einlesen with open(filename,'r', encoding=encoding) as inFile: reader = csv.reader(inFile, delimiter='\t') counter = 0 for row in reader: row = [r.lower() for r in row] if any(tabstr in mystring for mystring in row): if 'plain' in row: return row[1] counter += 1 if counter>100: return False def str2float(str): try: str = str.replace(',','.') return float(str) except: return None def read_data(filename, table, pkdata = '001', encoding='utf-8', to_si = False, debug=False): ''' :param filename: File-Name :param table: Table-Number :param pkdata: Table-Number of speciment definitions, default: 1 :param encoding: Encoding, default: utf-8 :param debug: debug-mode :return: ''' #print('start read GEOSYS') filename = os.path.normpath(filename) try: dictOut = {} dictOut['durch'] = 0 dictOut['hoehe'] = 0 #--------------------------------------------------------------------- #Daten einlesen und umwandeln #--------------------------------------------------------------------- data = [] zuordnung = [] #Einlesen with open(filename,'r', encoding=encoding) as inFile: reader = csv.reader(inFile, delimiter='\t') for row in reader: if len(row) > 2: data.append(row) if debug: print('Anz. Datensätze: ', str(len(data)), getsizeof(data)) #aufräumen ##Datenstruktur anlegen data_clean = {} data_clean['head'] = [] data_clean['data'] = [] for idx, d in enumerate(data): try: v = d[0][0:3] if v in pkdata: data_clean['head'].append(d) if v in table: data_clean['data'].append(d) except: pass # aufräumen data = data_clean del(data_clean) if debug: print('data_clean fin') ## Header aufbereiten for idx, row in enumerate(data['head']): #print(idx,row) if idx == 0: id_durchmesser = None id_hoehe = None id_name = None for idx_name, name in enumerate(row): if name in [r'Probekörberdurchmesser',r'Diameter of specimen', 'PK-Durchmesser', 'Probekörper-Durchmesser']: id_durchmesser = idx_name elif name in [r'Probekörperbezeichnung']: id_name = idx_name elif name in ['Probekörperhöhe','Gap length','PK-Höhe']: id_hoehe = idx_name if debug: print(id_durchmesser, id_hoehe, id_name) elif idx == 1: unit_durch = None unit_hoehe = None try: unit_durch = row[id_durchmesser] unit_hoehe = row[id_hoehe] except: pass elif idx == 2: durchmesser = None hoehe = None name = None try: durchmesser = str2float(row[id_durchmesser]) hoehe = str2float(row[id_hoehe]) name = row[id_name] except: pass header = {'d': durchmesser, 'h': hoehe, 'name': name, 'unit_h': unit_hoehe, 'unit_d': unit_durch} if debug: print('header\n', header) ## Daten in Pandas DataFrame umwandeln if debug: print('daten umwandel') temp = [] for idx, row in enumerate(data['data']): if idx == 0: if debug: print('head') data_head = [] for idx_name, name in enumerate(row): if idx_name <= 1: continue data_head.append(name) elif idx == 1: data_units = [] for idx_name, name in enumerate(row): if idx_name <= 1: continue data_units.append(name) else: t = [] for idx_col, value in enumerate(row): if idx_col <= 1: continue else: t.append(str2float(value)) temp.append(t) data = array(temp) if debug: print(data_head,data_units) ## Bezeichnungen der Daten normalisieren data_head = normalice_header(data_head) # Pandas DataFrame erstellen data = DataFrame(data=data, columns=data_head) if debug: print(data.head()) data = data.set_index('t') #data._units = data_units # Zykelnzähler anpassen if 'N' in data.columns: data['N'] = data['N'].astype(int) # Daten sortieren data.sort_index() # Index normieren data.index = data.index - data.index[0] return header, data except: print('Fehler beim lesen') raise