216 lines
5.6 KiB
Python
216 lines
5.6 KiB
Python
|
from numpy import array
|
||
|
from pandas import to_datetime, DataFrame
|
||
|
import csv
|
||
|
import os
|
||
|
from sys import getsizeof
|
||
|
from versuche.helper import normalice_header
|
||
|
|
||
|
|
||
|
def detect_tabnum(filename, tabstr,encoding='utf-8'):
|
||
|
filename = os.path.normpath(filename)
|
||
|
|
||
|
tabstr = tabstr.lower()
|
||
|
|
||
|
#Einlesen
|
||
|
with open(filename,'r', encoding=encoding) as inFile:
|
||
|
reader = csv.reader(inFile, delimiter='\t')
|
||
|
counter = 0
|
||
|
for row in reader:
|
||
|
|
||
|
row = [r.lower() for r in row]
|
||
|
if any(tabstr in mystring for mystring in row):
|
||
|
if 'plain' in row:
|
||
|
return row[1]
|
||
|
|
||
|
counter += 1
|
||
|
|
||
|
if counter>100:
|
||
|
return False
|
||
|
|
||
|
|
||
|
|
||
|
def str2float(str):
|
||
|
try:
|
||
|
str = str.replace(',','.')
|
||
|
return float(str)
|
||
|
except:
|
||
|
return None
|
||
|
|
||
|
|
||
|
def read_data(filename,
|
||
|
table,
|
||
|
pkdata = '001',
|
||
|
encoding='utf-8',
|
||
|
to_si = False,
|
||
|
debug=False):
|
||
|
'''
|
||
|
|
||
|
:param filename: File-Name
|
||
|
:param table: Table-Number
|
||
|
:param pkdata: Table-Number of speciment definitions, default: 1
|
||
|
:param encoding: Encoding, default: utf-8
|
||
|
:param debug: debug-mode
|
||
|
:return:
|
||
|
|
||
|
'''
|
||
|
|
||
|
#print('start read GEOSYS')
|
||
|
filename = os.path.normpath(filename)
|
||
|
|
||
|
try:
|
||
|
dictOut = {}
|
||
|
dictOut['durch'] = 0
|
||
|
dictOut['hoehe'] = 0
|
||
|
|
||
|
#---------------------------------------------------------------------
|
||
|
#Daten einlesen und umwandeln
|
||
|
#---------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
data = []
|
||
|
zuordnung = []
|
||
|
|
||
|
#Einlesen
|
||
|
with open(filename,'r', encoding=encoding) as inFile:
|
||
|
reader = csv.reader(inFile, delimiter='\t')
|
||
|
for row in reader:
|
||
|
if len(row) > 2:
|
||
|
data.append(row)
|
||
|
|
||
|
if debug:
|
||
|
print('Anz. Datensätze: ', str(len(data)), getsizeof(data))
|
||
|
#aufräumen
|
||
|
##Datenstruktur anlegen
|
||
|
|
||
|
|
||
|
data_clean = {}
|
||
|
data_clean['head'] = []
|
||
|
data_clean['data'] = []
|
||
|
|
||
|
for idx, d in enumerate(data):
|
||
|
try:
|
||
|
v = d[0][0:3]
|
||
|
if v in pkdata: data_clean['head'].append(d)
|
||
|
if v in table: data_clean['data'].append(d)
|
||
|
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
# aufräumen
|
||
|
data = data_clean
|
||
|
del(data_clean)
|
||
|
if debug:
|
||
|
print('data_clean fin')
|
||
|
|
||
|
## Header aufbereiten
|
||
|
|
||
|
for idx, row in enumerate(data['head']):
|
||
|
#print(idx,row)
|
||
|
if idx == 0:
|
||
|
id_durchmesser = None
|
||
|
id_hoehe = None
|
||
|
id_name = None
|
||
|
|
||
|
for idx_name, name in enumerate(row):
|
||
|
if name in [r'Probekörberdurchmesser',r'Diameter of specimen', 'PK-Durchmesser', 'Probekörper-Durchmesser']:
|
||
|
id_durchmesser = idx_name
|
||
|
elif name in [r'Probekörperbezeichnung']:
|
||
|
id_name = idx_name
|
||
|
elif name in ['Probekörperhöhe','Gap length','PK-Höhe']:
|
||
|
id_hoehe = idx_name
|
||
|
|
||
|
if debug:
|
||
|
print(id_durchmesser, id_hoehe, id_name)
|
||
|
elif idx == 1:
|
||
|
unit_durch = None
|
||
|
unit_hoehe = None
|
||
|
|
||
|
try:
|
||
|
unit_durch = row[id_durchmesser]
|
||
|
unit_hoehe = row[id_hoehe]
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
elif idx == 2:
|
||
|
durchmesser = None
|
||
|
hoehe = None
|
||
|
name = None
|
||
|
try:
|
||
|
durchmesser = str2float(row[id_durchmesser])
|
||
|
hoehe = str2float(row[id_hoehe])
|
||
|
name = row[id_name]
|
||
|
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
header = {'d': durchmesser,
|
||
|
'h': hoehe,
|
||
|
'name': name,
|
||
|
'unit_h': unit_hoehe,
|
||
|
'unit_d': unit_durch}
|
||
|
|
||
|
if debug:
|
||
|
print('header\n', header)
|
||
|
|
||
|
## Daten in Pandas DataFrame umwandeln
|
||
|
if debug:
|
||
|
print('daten umwandel')
|
||
|
|
||
|
temp = []
|
||
|
for idx, row in enumerate(data['data']):
|
||
|
if idx == 0:
|
||
|
if debug:
|
||
|
print('head')
|
||
|
data_head = []
|
||
|
for idx_name, name in enumerate(row):
|
||
|
if idx_name <= 1: continue
|
||
|
data_head.append(name)
|
||
|
elif idx == 1:
|
||
|
data_units = []
|
||
|
for idx_name, name in enumerate(row):
|
||
|
if idx_name <= 1: continue
|
||
|
data_units.append(name)
|
||
|
else:
|
||
|
t = []
|
||
|
for idx_col, value in enumerate(row):
|
||
|
if idx_col <= 1:
|
||
|
continue
|
||
|
else:
|
||
|
t.append(str2float(value))
|
||
|
|
||
|
temp.append(t)
|
||
|
|
||
|
data = array(temp)
|
||
|
|
||
|
if debug:
|
||
|
print(data_head,data_units)
|
||
|
|
||
|
## Bezeichnungen der Daten normalisieren
|
||
|
data_head = normalice_header(data_head)
|
||
|
|
||
|
# Pandas DataFrame erstellen
|
||
|
data = DataFrame(data=data, columns=data_head)
|
||
|
if debug:
|
||
|
print(data.head())
|
||
|
|
||
|
data = data.set_index('t')
|
||
|
|
||
|
#data._units = data_units
|
||
|
|
||
|
# Zykelnzähler anpassen
|
||
|
if 'N' in data.columns:
|
||
|
data['N'] = data['N'].astype(int)
|
||
|
|
||
|
# Daten sortieren
|
||
|
data.sort_index()
|
||
|
|
||
|
# Index normieren
|
||
|
data.index = data.index - data.index[0]
|
||
|
|
||
|
return header, data
|
||
|
|
||
|
|
||
|
except:
|
||
|
print('Fehler beim lesen')
|
||
|
raise
|