Skip to content Skip to sidebar Skip to footer

Regular Expressions Python - Plotting Data Out Of Text File

I am trying to get some data out of text file with the following format: jvm: 2011-08-29 17:09:54.438864: MemoryStatistics: [290328680, 381288448] moniData: 2011-08-29 17:09:5

Solution 1:

You can use python's builtin lib re for regular expressions. To get all of the timestamps you can do something like this:

import re

defmain():
    file=open(file)
    timestamp=(re.findall(r'\S+-\S+-\d\d\s\d\d:\S+:\S+:',file.read()))
    print timestamp

if __name__=='__main__':
    main()

Solution 2:

Why not simplicity ? :

import re

regx = re.compile('snData: +(\d{4}-\d\d-\d\d +\d\d:\d\d:\d\d\.\d+).+''Depth: +\[(\d+)\].+''RecordsSent: +\[(\d+)\]', re.DOTALL)

timestamp, snDataDepth, snDataRecordsSend = [], [], []

withopen('data_for_plot.txt') as f:
    ch = f.read()

a,b,c = regx.search(ch).groups()

timestamp.append(a)
snDataDepth.append(b)
snDataRecordsSend.append(c)

print timestamp
print snDataDepth
print snDataRecordsSend

result

['2011-08-29 17:09:54.438864']['0']['0']

Solution 3:

You could parse your input data using lepl:

#!/usr/bin/env pythonimport ast
import fileinput
import logging

from datetime import datetime
from pprint   import pprint
from string   import ascii_letters, digits

from lepl importAny, Iterate, Newline, Regexp, SkipTo, Space

# ABNF: property = name colon python_literal
name = Any(ascii_letters+digits)[1:,...] # \w+
colon = Space()[:,...] & ':' & Space()[:,...] # \s*:\s*
python_literal = Regexp(r'.+') >> ast.literal_eval
property_ = name & ~colon & python_literal > tuple# record consists of name, timestamp and one or more properties#   ABNF: record = name colon timestamp colon 1*( NEWLINE indent property )
timestamp = Regexp(r'.*[^\s:]') >> (lambda s: (
    datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f")))
record = (name & ~colon & timestamp & ~colon &
          ((~Newline() & ~Space()[1:,...] & property_)[1:] > dict)) > tuple# file consists of one or more records interlaced with newlines#   ABNF: file = 1*( NEWLINE | record )#     skip unrecognized text upto new line 
unknown = SkipTo(Newline()) > (lambda s: logging.error('unknown: %r' % (s,)))
it = Iterate(record | ~Newline() | ~unknown) # consume input one record at a time
it.config.no_full_first_match().no_memoize() # improve performance

iterparse = it.get_parse_file_all() # output one record at a time
pprint([lst[0] for lst in iterparse(fileinput.input()) if lst])

Output

[('jvm',
  datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
  {'MemoryStatistics': [290328680, 381288448]}),
 ('moniData',
  datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
  {'Depth': [0], 'RecordsSent': [1]}),
 ('rdoutData',
  datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
  {'Depth': [0], 'RecordsSent': [0]}),
 ('rdoutReq',
  datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
  {'BytesReceived': [8184],
   'RecordsReceived': [132],
   'TotalRecordsReceived': 132}),
 ('sender',
  datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
  {'NumHitsCached': 0,
   'NumHitsQueued': 310,
   'NumHitsReceived': 2663,
   'NumReadoutRequestsQueued': 0,
   'NumReadoutRequestsReceived': 178,
   'NumReadoutsSent': 1}),
 ('snData',
  datetime.datetime(2011, 8, 29, 17, 9, 54, 438864),
  {'Depth': [0], 'RecordsSent': [61]}),
 # ... snip ...
 ('PyrateBufferManager',
  datetime.datetime(2011, 8, 29, 17, 9, 57, 31479),
  {'CurrentAquiredBuffers': 0,
   'CurrentAquiredBytes': 0,
   'ReturnBufferCount': 4285})]

Solution 4:

You could use yaml to parse your file if you transform it slightly:

#!/usr/bin/env pythonimport datetime
import re

import yaml

text = open('input').read()

# transform text to make it a valid yaml
re_name = re.compile(r'^(\w+\:)\s', re.MULTILINE)
yaml_text = re.sub(re_name, r'\1\n ', text)

#
obj = yaml.load(yaml_text)
d = obj['sender'][datetime.datetime(2011, 8, 29, 17, 9, 54, 438864)]
print('number of hits: {NumHitsQueued}'.format(**d))

Output

numberofhits: 310

Post a Comment for "Regular Expressions Python - Plotting Data Out Of Text File"