Regular Expressions Python - Plotting Data Out Of Text File
I am trying to get some data out of text file with the following format: jvm: 2011-08-29 17:09:54.438864: MemoryStatistics: [290328680, 381288448] moniData: 2011-08-29 17:09:5
Solution 1:
You can use python's builtin lib re for regular expressions. To get all of the timestamps you can do something like this:
import re
defmain():
file=open(file)
timestamp=(re.findall(r'\S+-\S+-\d\d\s\d\d:\S+:\S+:',file.read()))
print timestamp
if __name__=='__main__':
main()
Solution 2:
Why not simplicity ? :
import re
regx = re.compile('snData: +(\d{4}-\d\d-\d\d +\d\d:\d\d:\d\d\.\d+).+''Depth: +\[(\d+)\].+''RecordsSent: +\[(\d+)\]', re.DOTALL)
timestamp, snDataDepth, snDataRecordsSend = [], [], []
withopen('data_for_plot.txt') as f:
ch = f.read()
a,b,c = regx.search(ch).groups()
timestamp.append(a)
snDataDepth.append(b)
snDataRecordsSend.append(c)
print timestamp
print snDataDepth
print snDataRecordsSend
result
['2011-08-29 17:09:54.438864']['0']['0']
Solution 3:
You could parse your input data using lepl
:
#!/usr/bin/env pythonimport ast
import fileinput
import logging
from datetime import datetime
from pprint import pprint
from string import ascii_letters, digits
from lepl importAny, Iterate, Newline, Regexp, SkipTo, Space
# ABNF: property = name colon python_literal
name = Any(ascii_letters+digits)[1:,...] # \w+
colon = Space()[:,...] & ':' & Space()[:,...] # \s*:\s*
python_literal = Regexp(r'.+') >> ast.literal_eval
property_ = name & ~colon & python_literal > tuple# record consists of name, timestamp and one or more properties# ABNF: record = name colon timestamp colon 1*( NEWLINE indent property )
timestamp = Regexp(r'.*[^\s:]') >> (lambda s: (
datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f")))
record = (name & ~colon & timestamp & ~colon &
((~Newline() & ~Space()[1:,...] & property_)[1:] > dict)) > tuple# file consists of one or more records interlaced with newlines# ABNF: file = 1*( NEWLINE | record )# skip unrecognized text upto new line
unknown = SkipTo(Newline()) > (lambda s: logging.error('unknown: %r' % (s,)))
it = Iterate(record | ~Newline() | ~unknown) # consume input one record at a time
it.config.no_full_first_match().no_memoize() # improve performance
iterparse = it.get_parse_file_all() # output one record at a time
pprint([lst[0] for lst in iterparse(fileinput.input()) if lst])
Output
[('jvm',
datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
{'MemoryStatistics': [290328680, 381288448]}),
('moniData',
datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
{'Depth': [0], 'RecordsSent': [1]}),
('rdoutData',
datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
{'Depth': [0], 'RecordsSent': [0]}),
('rdoutReq',
datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
{'BytesReceived': [8184],
'RecordsReceived': [132],
'TotalRecordsReceived': 132}),
('sender',
datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
{'NumHitsCached': 0,
'NumHitsQueued': 310,
'NumHitsReceived': 2663,
'NumReadoutRequestsQueued': 0,
'NumReadoutRequestsReceived': 178,
'NumReadoutsSent': 1}),
('snData',
datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
{'Depth': [0], 'RecordsSent': [61]}),
# ... snip ...
('PyrateBufferManager',
datetime.datetime(2011, 8, 29, 17, 9, 57, 31479),
{'CurrentAquiredBuffers': 0,
'CurrentAquiredBytes': 0,
'ReturnBufferCount': 4285})]
Solution 4:
You could use yaml
to parse your file if you transform it slightly:
#!/usr/bin/env pythonimport datetime
import re
import yaml
text = open('input').read()
# transform text to make it a valid yaml
re_name = re.compile(r'^(\w+\:)\s', re.MULTILINE)
yaml_text = re.sub(re_name, r'\1\n ', text)
#
obj = yaml.load(yaml_text)
d = obj['sender'][datetime.datetime(2011, 8, 29, 17, 9, 54, 438864)]
print('number of hits: {NumHitsQueued}'.format(**d))
Output
numberofhits: 310
Post a Comment for "Regular Expressions Python - Plotting Data Out Of Text File"