Extracting Data From A String Where The Data Structure Is Embedded In The String Itself
In a project we are doing we encounter log files of which each line has the following structure: 2012-01-02,12:50:32,658,2,1,2,0,0,0,0,1556,1555,62,60,2,3,0,0,0,0,1559,1557,1557,63
Solution 1:
Not a particularly nice data structure. Assuming there are always 4 control values, the following should work with arbitrary numbers of groups and sensors within.
sample = "2012-01-02,12:50:32,658,2,1,2,0,0,0,0,1556,1555,62,60,2,3,0,0,0,0,1559,1557,1557,63,64,65,0.305,0.265,0.304,0.308,0.309"
def parse_line(line):
line = line.split(',')
sensors = []
date= line[0]
time= line[1]
row= line[2]
groups=int(line[3])
c =4for i inrange(groups):
group_num = line[c]
sensor_count =int(line[c+1])
sensor_data_len =4+ sensor_count *2
sensor_data = line[c+2+4:c+2+sensor_data_len]
c +=2+ sensor_data_len
for j inrange(sensor_count):
sensors.append([group_num,str(j+1)] + sensor_data[j::sensor_count])
for s,v in zip(sensors,line[c:]):
s.append(v)
# Now have a list of lists, oneper sensor sensor containing all the data
for s in sensors:
print ",".join([date,time]+s)
parse_line(sample)
Yielding:
2012-01-02,12:50:32,1,1,1556,62,0.3052012-01-02,12:50:32,1,2,1555,60,0.2652012-01-02,12:50:32,2,1,1559,63,0.3042012-01-02,12:50:32,2,2,1557,64,0.3082012-01-02,12:50:32,2,3,1557,65,0.309
Solution 2:
It's a non-trivial task. Probably the most "pythonic" way would be to create a class.
I took the liberty and time to make an example:
from collections import namedtuple
classDataPack(object):
def__init__(self, line, seperator =',', headerfields = None, groupfields = None):
self.seperator = seperator
self.header_fields = headerfields or ('date', 'time', 'nr', 'groups')
self.group_fields = groupfields or ('nr', 'sensors','controlfields',
't1values', 't2values')
Header = namedtuple('Header', self.header_fields)
self.header_part = line.split(self.seperator)[:self.data_start]
self.data_start = len(self.header_fields)
self.data_part = line.split(self.seperator)[self.data_start:]
self.header = Header(*self.header_part)
self.groups = self._create_groups(self.data_part, self.header.groups)
def_create_groups(self, datalst, groups):
"""nr, sensors controllfield * 4, t1value*sensors, t2value*sensors """
Group = namedtuple('DataGroup', self.group_fields)
_groups = []
for i inrange(int(groups)):
nr = datalst[0]
sensors = datalst[1]
controlfields = datalst [2:6]
t1values = datalst[6:6+int(sensors)]
t2values = datalst[6+int(sensors):6+int(sensors)*2]
_groups.append(Group(nr, sensors, controlfields, t1values, t2values))
datalst = datalst[6+int(sensors)*2:]
return _groups
def__str__(self):
_return = []
for group in self.groups:
for sensor inrange(int(group.sensors)):
_return.append('%s, ' % self.header.date.replace('-','/'))
_return.append('%s, ' % self.header.time)
_return.append('%s, ' % group.nr)
_return.append('%s, ' % (int(sensor) + 1,))
_return.append('(%s, ' % group.t1values[int(sensor)])
_return.append('%s)\n' % group.t2values[int(sensor)])
returnu''.join(_return)
if __name__ == '__main__':
line = """2012-01-02,12:50:32,658,2,1,2,0,0,0,0,1556,1555,62,60,2,3,0,0,0,0,1559,1557,1557,63,64,65,0.305,0.265,0.304,0.308,0.309"""
data = DataPack(line)
for i in data.header: print i,
for i in data.groups: print'\n',i
print'\n',data
print'cfield 0:2 ', data.groups[0].controlfields[2]
print't2value 1:2 ', data.groups[1].t2values[2]
On bigger changes to the input-data you would have to subclass and overwrite the _create_groups
and __str__
methods.
Post a Comment for "Extracting Data From A String Where The Data Structure Is Embedded In The String Itself"