HWRF  trunk@4391
atparse.py
1 """!ATParser is a text parser that replaces strings with variables and
2 function output."""
3 
4 import sys, os, re, StringIO, logging
5 
6 ##@var functions
7 # List of functions recognized
8 # @protected
9 functions=dict(lc=lambda x:str(x).lower(),
10  uc=lambda x:str(x).upper(),
11  len=lambda x:str(len(x)),
12  trim=lambda x:str(x).strip())
13 
14 class ParserSyntaxError(Exception):
15  """!Raised when the parser encounters a syntax error."""
16 class ScriptAssertion(Exception):
17  """!Raised when a script @[VARNAME:?message] is encountered, and
18  the variable does not exist."""
19 class ScriptAbort(Exception):
20  """!Raised when an "@** abort" directive is reached in a script."""
21 class NoSuchVariable(Exception):
22  """!Raised when a script requests an unknown variable."""
23  def __init__(self,infile,varname,line=None):
24  """!NoSuchVariable constructor
25  @param infile the input file that caused problems
26  @param varname the variable that does not exist
27  @param line the line number of the problematic line"""
28  self.infile=infile
29  self.varname=varname
30  if line is None:
31  self.line=None
32  line='??'
33  else:
34  self.line=int(line)
35  line=str(line)
36  super(NoSuchVariable,self).__init__(
37  '%s:%s: undefined variable %s'%(infile,line,varname))
38  ##@var infile
39  # The file that caused the problem
40 
41  ##@var line
42  # The line number that caused the problem
43 
44  ##@var varname
45  # The problematic variable name
46 
48  """!Turns \\t to tab, \\n to end of line, \\r to carriage return, \\b to
49  backspace and \\(octal) to other characters.
50  @param text the text to scan"""
51  if '0123456789'.find(text[1])>=0:
52  return chr(int(text[1:],8))
53  elif text=='\\n':
54  return "\n"
55  elif text=='\\t':
56  return "\t"
57  elif text=='\\r':
58  return "\r"
59  elif text=='\\b':
60  return "\b"
61  else:
62  return text
63 
64 # Parser states:
65 
66 ##@var outer
67 # Parser state for the portion of the file outside @[] and @** blocks
68 outer=dict(active=True,in_if_block=False,in_ifelse_block=False,used_if=False,ignore=False)
69 
70 ##@ var if_unused_if
71 # Parser state for within @**if blocks that are inactive
72 if_unused_if=dict(active=False,in_if_block=True,in_ifelse_block=False,used_if=False,ignore=False)
73 
74 ##@var if_active_if
75 # Parser state for within @** if blocks that are active
76 if_active_if=dict(active=True,in_if_block=True,in_ifelse_block=False,used_if=True,ignore=False)
77 
78 ##@var if_used_if
79 # Parser state for after the end of an @** if block
80 if_used_if=dict(active=False,in_if_block=True,in_ifelse_block=True,used_if=True,ignore=False)
81 
82 ##@var if_active_else
83 # Parser state for inside an "else" block
84 if_active_else=dict(active=True,in_if_block=False,in_ifelse_block=True,used_if=True,ignore=False)
85 
86 ##@var if_inactive_else
87 # Parser state for inside an "else" block that was not used
88 if_inactive_else=dict(active=False,in_if_block=False,in_ifelse_block=True,used_if=True,ignore=False)
89 
90 ##@var ignore_if_block
91 # Parser state for an "if" block that was skipped
92 ignore_if_block=dict(active=False,in_if_block=True,in_ifelse_block=False,used_if=False,ignore=True)
93 
94 ##@var ignore_else_block
95 # Parser state for an "else" block that was skipped
96 ignore_else_block=dict(active=False,in_if_block=False,in_ifelse_block=True,used_if=False,ignore=True)
97 
98 class ATParser:
99  """!Takes input files or other data, and replaces certain strings
100  with variables or functions.
101 
102  The calling convention is quite simple:
103  @code{.py}
104  ap=ATParser(varhash={"NAME":"Katrina", "STID":"12L"})
105  ap.parse_file("input-file.txt")
106  lines="line 1\nline 2\nline 3 of @[NAME]"
107  ap.parse_lines(lines,"(string-data)")
108  ap.parse_stream(sys.stdin,"(stdin)")
109  @endcode
110 
111  Inputs are general strings with @@[...] and @@** escape sequences which
112  follow familiar shell syntax (but with @@[...] instead of ${...}):
113  @code{.unformatted}
114  My storm is @[NAME] and the RSMC is @[RSMC:-${center:-unknown}].
115  @endcode
116  In this case, it would print:
117  @code{.unformatted}
118  My storm is Katrina and the RSMC is unknown.
119  @endcode
120  since NAME is set, but RSMC and center are unset.
121 
122  There are also block if statements:
123  @code{.unformatted}
124  @** if NAME==BILLY
125  storm is billy
126  @** elseif name==KATRINA
127  storm is katrina
128  @** else
129  another storm
130  @** endif
131  @endcode
132 
133  and a variety of other things:
134  @code{.unformatted}
135  @[<anotherfile.txt] # read another file
136  @[var=value] # assign a variable
137  @[var:=value] # assign a variable and insert the value in the output stream
138  @[var2:?] # abort if var2 is not assigned, otherwise insert var2's contents
139  @[var3==BLAH?thencondition:elsecondition] # if-then-else substitution
140  @[var3!=BLAH?thencondition:elsecondition] # same, but with a "not equal"
141  @[var4:-substitution] # insert var4, or this substitution if var4 is unset
142  @[var5:+text] # insert text if var5 is set
143  @endcode
144 
145  There are also a small number of functions that modify text before
146  it is sent to stdout. (The original variable is unmodified, only
147  the output text is changed.)
148  @code{.unformatted}
149  @[var1.uc] # uppercase value of var1
150  @[var1.lc] # lowercase value of var1
151  @[var1.len] # length of var1
152  @[var1.trim] # var1 with leading and trailing whitespace removed
153  @endcode
154  """
155  def __init__(self,stream=sys.stdout,varhash=None,logger=None,
156  max_lines=1000000):
157  """!ATParser constructor
158  @param stream the output stream
159  @param varhash a dict of variables. All values must be strings.
160  If this is unspecified, os.environ will be used.
161  @param logger the logging.Logger to read.
162  @param max_lines the maximum number of lines to read"""
163  if varhash is None:
164  self.varhash=dict(os.environ)
165  else:
166  self.varhash=dict(varhash)
167  self.__infiles=['(string)']
168  self._states=list()
169  self.__stream=stream
170  self.__max_lines=int(max_lines)
171  self.__logger=logger
172  ##@var varhash
173  # The dict of variables. This is NOT the dict sent to the constructor --- a
174  # copy was made. That means it is safe to modify the variables all you want,
175  # even if os.environ was used.
176 
177  def warn(self,text):
178  """!Print a warning to the logger, if we have a logger.
179  @protected
180  @param text the warning text."""
181  if self.__logger is not None:
182  self.__logger.warn(text)
183  @property
184  def max_lines(self):
185  """!The maximum number of lines to read."""
186  return self.__max_lines
187  @property
188  def infile(self):
189  """!The current input file name."""
190  return self.__infiles[-1]
191  def _write(self,data):
192  """!Write data to the output stream
193  @param data the data to write."""
194  self.__stream.write(data)
195  def applyfun(self,val,fun1,morefun):
196  """!Applies a function to text.
197  @param val the text
198  @param fun1 the function to apply
199  @param morefun more functions to apply
200  @protected"""
201  runme=functions.get(fun1,None)
202  if runme is not None:
203  val=runme(val)
204  if val is None: val=''
205  else:
206  self.warn(
207  'Ignoring unknown function \"%s\" -- I only know these: %s'
208  %(fun1, ' '.join(functions.keys())))
209  m=re.match('\.([A-Za-z0-9_]+)(.*)',morefun)
210  if m:
211  (fun2,morefun2)=m.groups()
212  return self.applyfun(val,fun2,morefun2)
213  return val
214 
215  def from_var(self,varname,optional):
216  """!Return the value of a variable with functions applied.
217  @param varname the variable name, including functions
218  @param optional if False, raise an exception if the variable is
219  unset. If True, return '' for unset variables.
220  @protected"""
221  m=re.match('([A-Za-z0-9_]+)\.([A-Za-z0-9_]+)(.*)',varname)
222  if m:
223  (varname,fun1,morefun)=m.groups()
224  val=self.from_var(varname,optional=optional)
225  return self.applyfun(val,fun1,morefun)
226  elif varname in self.varhash:
227  return self.varhash[varname]
228  elif optional:
229  return ''
230  else:
231  raise NoSuchVariable(self.infile,varname)
232 
233  def optional_var(self,varname):
234  """!Return the value of a variable with functions applied, or
235  '' if the variable is unset.
236  @param varname the name of the variable.
237  @protected"""
238  return self.from_var(varname,optional=True)
239 
240  def require_var(self,varname):
241  """!Return the value of a variable with functions applied,
242  raising an exception if the variable is unset.
243  @param varname the name of the variable.
244  @protected"""
245  return self.from_var(varname,optional=False)
246 
247  def replace_vars(self,text):
248  """!Expand @@[...] blocks in a string.
249  @param text the string
250  @returns a new string with expansions performed
251  @protected"""
252  (text,n) = re.subn(r'(?<!\\)\$[a-zA-Z_0-9.]+',
253  lambda x: self.require_var(x.group(0)[1:]),
254  text)
255  (text,n) = re.subn(r'(?<!\\)\$\{[^{}]*\}',
256  lambda x: self.var_or_command(x.group(0)[2:-1]),
257  text)
258  (text,n) = re.subn(r'\\([0-9]{3}|.)',
259  lambda x: replace_backslashed(x.group(0)),text)
260  return text
261  def parse_stream(self,stream,streamname):
262  """!Read a stream and parse its contents
263  @param stream the stream (an opened file)
264  @param streamname a name for this stream for error messages"""
265  lineno=1
266  for line in stream:
267  self.parse_line(line,streamname,lineno)
268  lineno+=1
269 
270  def parse_file(self,filename):
271  """!Read a file and parse its contents.
272  @param filename the name of this file for error messages"""
273  lineno=1
274  with open(filename,'rt') as f:
275  for line in f:
276  self.parse_line(line,filename,lineno)
277  lineno+=1
278 
279  def require_file(self,filename_pattern):
280  """!Read the contents of a file and return it.
281  @param filename_pattern a filename with ${} or @@[] blocks in it.
282  @protected"""
283  filename=self.replace_vars(filename_pattern)
284  with open(filename,'rt') as f:
285  return f.read()
286 
287  def getvar(self,varname):
288  """!Return the value of a variable, or None if it is unset."""
289  if varname in self.varhash: return self.varhash[varname]
290  return None
291  def var_or_command(self,data):
292  """!Expand one ${...} or @@[...] block
293  @param data the contents of the block
294  @protected"""
295  m=re.match(r'(?ms)\A([a-z_A-Z][a-zA-Z_0-9]*)'
296  r'((?:\.[A-Za-z0-9.]+)?)'
297  r'(?:(==|!=|:\+|:-|=|:=|:\?|<|:<|:)(.*))?\Z',
298  data)
299  if not m:
300  return ''
301  (varname,functions,operator,operand)=m.groups()
302  if operator:
303  if operand is None: operand=''
304  vartext=self.getvar(varname)
305  varset = vartext is not None and vartext!=''
306  if functions:
307  if vartext is None: varetext=''
308  mf=re.match(r'\A\.([A-Z0-9a-z_]+)(.*)\Z',functions)
309  (fun,morefun)=mf.groups()
310  vartext=self.applyfun(vartext,fun,morefun)
311  if operator==':+':
312  return self.replace_vars(operand) if varset else ''
313  elif operator==':-':
314  if not varset: vartext=self.replace_vars(operand)
315  return vartext
316  elif operator==':':
317  val=vartext
318  if val is None: val=''
319  mo=re.match(r'\A([0-9]+)(?::([0-9]+))?',operand)
320  if mo is None:
321  return val
322  (start,count)=mo.groups()
323  length=len(val)
324  if start is None or start=='':
325  start=0
326  else:
327  start=int(start)
328  if start<0:
329  start=0
330  if count is None or count=='':
331  count=length-start
332  else:
333  count=int(count)
334  if start+count>length:
335  count=length-start
336  return val[ start : (start+count) ]
337  elif operator=='=':
338  replaced=self.replace_vars(operand)
339  self.varhash[varname]=replaced
340  elif operator=='==' or operator=='!=':
341  # This is the ternary ?: operator.
342  val=vartext
343  mo=re.match(r'(?ms)\A((?:[^\\\?]|(?:\\\\)*|(?:\\\\)*\\.)*)\?(.*?):((?:[^\\:]|(?:\\\\)*|(?:\\\\)*\\.)*)\Z',operand)
344  if mo is None:
345  (test,thendo,elsedo)=('','','')
346  else:
347  (test,thendo,elsedo)=mo.groups()
348  test=self.replace_vars(test)
349  if operator=='==':
350  return self.replace_vars(
351  thendo if (val==test) else elsedo)
352  else:
353  return self.replace_vars(
354  thendo if (val!=test) else elsedo)
355  elif operator==':=':
356  if not varset:
357  self.varhash[varname]=self.replace_vars(operand)
358  return self.varhash[varname]
359  elif operator==':?':
360  if varset:
361  return vartext
362  elif operand=='':
363  raise ScriptAssertion('%s: you did not define this '
364  'variable. Aborting.'%(varname,))
365  else:
366  raise ScriptAssertion('%s: %s'%(varname,operand))
367  elif varname is not None and varname!='':
368  return self.require_var(varname+functions)
369  else:
370  raise ParserSyntaxError(
371  "Don't know what to do with text \"%s\""%(data,))
372 
373  def require_data(self,data):
374  """!Expand text within an @@[...] block.
375  @param data the contents of the block
376  @protected"""
377  if data[0]=='<':
378  # This is an instruction to read in a file.
379  return self.require_file(data[1:])
380  elif data=='@':
381  return '@' # @[@] is replaced with @
382  elif data[0]=='#':
383  if data.find('@[')>=0:
384  raise ParserSyntaxError('Found a @[ construct nested within a comment (@[#...])')
385  return '' # @[#stuff] is a comment
386  else:
387  # This is a variable name, command or error:
388  return self.var_or_command(data)
389 
390  def str_state(self):
391  """!Return a string description of the parser stack for debugging."""
392  out=StringIO.StringIO()
393  out.write('STATE STACK: \n')
394  for state in self._states:
395  out.write('state: ')
396  if state['ignore']:
397  out.write('ignoring block: ')
398  out.write('active ' if(state['active']) else 'inactive ')
399  if state['in_if_block']:
400  out.write('in if block, before else ')
401  if state['in_ifelse_block']:
402  out.write('in if block, after else ')
403  if not state['in_if_block'] and not state['in_ifelse_block']:
404  out.write('not if or else')
405  if state['used_if']:
406  out.write('(have activated a past if/elseif/else) ')
407  out.write('\n')
408  out.write('END\n')
409  s=out.getvalue()
410  out.close()
411  return s
412 
413  @property
414  def active(self):
415  """!Is the current block active?
416  @protected"""
417  if self._states:
418  for state in self._states:
419  if not state['active']:
420  return False
421  return True
422 
423  def top_state(self,what=None):
424  """!Return the top parser state without removing it
425  @param what why the state is being examined. This is for
426  error messages.
427  @protected"""
428  if what:
429  if not self._states:
430  raise AssertionError('Internal error: no state to search when looking for %s in top state.'%(what,))
431  elif what not in self._states[-1]:
432  raise AssertionError('Internal error: cannot find %s in top state.'%(what,))
433  return bool(self._states[-1][what])
434  else:
435  return self._states[-1]
436 
437  def push_state(self,state):
438  """!Push a new state to the top of the parser state stack
439  @protected"""
440  self._states.append(state)
441 
442  def pop_state(self):
443  """!Remove and return the top parser state
444  @protected"""
445  return self._states.pop()
446 
447  def replace_state(self,state):
448  """!Replace the top parser state.
449  @protected
450  @param state the new parser state"""
451  self._states[len(self._states)-1]=state
452 
453  def parse_lines(self,lines,filename):
454  """!Given a multi-line string, parse the contents line-by-line
455  @param lines the multi-line string
456  @param filename the name of the file it was from, for error messages"""
457  lineno=1
458  for line in lines.splitlines():
459  self.parse_line(line,filename,lineno)
460  lineno+=1
461 
462  def parse_line(self,line,filename,lineno):
463  """!Parses one line of text.
464  @param line the line of text.
465  @param filename the name of the source file, for error messages
466  @param lineno the line number within the source file, for
467  error messages"""
468  top_state=self.top_state
469  replace_state=self.replace_state
470 
471  m=re.match(r'^\s*\@\*\*\s*if\s+([A-Za-z_][A-Za-z_0-9.]*)\s*([!=])=\s*(.*?)\s*$',line)
472  if m:
473  # This is the beginning of an IF block
474  if not self.active:
475  # This IF lies within an inactive block, so we skip
476  # this whole if, elseif, else, endif block.
477  self.push_state(ignore_if_block)
478  return
479  (left,comp,right)=m.groups()
480  left=self.optional_var(left)
481  right=self.replace_vars(right)
482  if left==right:
483  if comp=='=':
484  self.push_state(if_active_if)
485  else:
486  self.push_state(if_unused_if)
487 # self.push_state( if_active_if if(comp=='=') else if_unused_if )
488  else:
489  if comp=='=':
490  self.push_state(if_unused_if)
491  else:
492  self.push_state(if_active_if)
493 # self.push_state( if_unused_if if(comp=='=') else if_active_if )
494  return
495 
496  m=re.match(r'^\s*\@\*\*\s*abort\s+(.*)$',line)
497  if m:
498  if self.active:
499  raise ScriptAbort('Found an abort directive on line %d: %s'%(
500  lineno, m.group(1)))
501  return
502 
503  m=re.match(r'^\s*\@\*\*\s*warn\s+(.*)$',line)
504  if m:
505  if self.active:
506  self.warn(self.replace_vars(m.group(1)))
507  return
508 
509  m=re.match('^\s*\@\*\*\s*else\s*if\s+([A-Za-z_][A-Za-z_0-9.]*)\s*([!=])=\s*(.*?)\s*\Z',line)
510  if m:
511  if top_state('ignore'): return
512  (left, comp, right) = m.groups()
513  left=self.optional_var(left)
514  right=self.replace_vars(right)
515  if not self._states:
516  raise ParserSyntaxError(
517  'Found an elseif without a matching if at line %d'%lineno)
518  if not top_state('in_if_block'):
519  if top_state('in_ifelse_block'):
520  raise ParserSyntaxError(
521  'Unexpected elseif after an else at line %d'%lineno)
522  else:
523  raise ParserSyntaxError(
524  'Unexpected elseif at line %d'%lineno)
525  elif top_state('used_if'):
526  # the "if" or a prior elseif matched, so we ignore
527  # this elseif and deactivate the block so all future
528  # if/else/elseif will be unused.
529  replace_state(if_used_if)
530  elif not top_state('active'):
531  activate=0
532  if left==right:
533  activate = 3 if (comp=='=') else 0
534  else:
535  activate = 0 if (comp=='=') else 3
536  if activate:
537  replace_state(if_active_if)
538  return
539 
540  m=re.match(r'^\s*\@\*\*\s*else\s*(?:\#.*)?$',line)
541  if m:
542  if top_state("used_if"):
543  replace_state(if_inactive_else)
544  elif top_state('in_ifelse_block'):
545  raise ParserSyntaxError('Found an extra else at line %d'%lineno)
546  elif not top_state('in_if_block'):
547  raise ParserSyntaxError('Found an else outside an if at line %d'%lineno)
548  elif top_state('ignore'):
549  # We're ignoring a whole if/elseif/else/endif block
550  # because it lies within an inactive block.
551  replace_state(ignore_else_block)
552  elif not top_state('used_if'):
553  replace_state(if_active_else)
554  else:
555  replace_state(if_inactive_else)
556  return
557 
558  m=re.match(r'^\s*\@\*\*\s*endif\s*(?:\#.*)?$',line)
559  if m:
560  if top_state('in_if_block') or top_state('in_ifelse_block'):
561  self.pop_state()
562  else:
563  raise ParserSyntaxError('Found an endif without matching if at line %d'%lineno)
564  return
565 
566  m=re.match(r'^\s*\@\*\*\s*insert\s*(\S.*?)\s*$',line)
567  if m:
568  if self.active:
569  contents=self.require_file(m.group(1))
570  self._write(contents)
571  return
572 
573  m=re.match(r'^\s*\@\*\*\s*include\s*(\S.*?)\s*$',line)
574  if m:
575  if self.active:
576  ffilename=m.group(1)
577  contents=self.require_file(ffilename)
578  self.parse_lines(contents,ffilename)
579  return
580 
581  m=re.match(r'^\s*\@\*\*.*',line)
582  if m:
583  raise ParserSyntaxError('Invalid \@** directive in line \"%s\". Ignoring line.\n'%(line,))
584 
585  if self._states and not self.active:
586  return # inside a disabled block
587 
588  # Replace text of the form @[VARNAME] with the contents of the
589  # respective environment variable:
590  (outline,n)=re.subn(r'\@\[((?:\n|[^\]])*)\]',
591  lambda x: self.require_data(x.group(0)[2:-1]),
592  line)
593  if not isinstance(outline,basestring):
594  raise TypeError('The re.subn returned a %s %s instead of a basestring.'%(type(outline).__name__,repr(outline)))
595  self._write(outline)
596  if lineno>self.max_lines:
597  raise ParserLineLimit('Read past max_lines=%d lines from input file. Something is probably wrong.'%self.max_lines)
def from_var(self, varname, optional)
Return the value of a variable with functions applied.
Definition: atparse.py:215
def __init__
NoSuchVariable constructor.
Definition: atparse.py:23
Raised when a script @[VARNAME:?message] is encountered, and the variable does not exist...
Definition: atparse.py:16
def replace_state(self, state)
Replace the top parser state.
Definition: atparse.py:447
def applyfun(self, val, fun1, morefun)
Applies a function to text.
Definition: atparse.py:195
def warn(self, text)
Print a warning to the logger, if we have a logger.
Definition: atparse.py:177
Raised when an "@** abort" directive is reached in a script.
Definition: atparse.py:19
def require_data(self, data)
Expand text within an @[...] block.
Definition: atparse.py:373
def max_lines(self)
The maximum number of lines to read.
Definition: atparse.py:184
def require_var(self, varname)
Return the value of a variable with functions applied, raising an exception if the variable is unset...
Definition: atparse.py:240
def replace_vars(self, text)
Expand @[...] blocks in a string.
Definition: atparse.py:247
def var_or_command(self, data)
Expand one ${...} or @[...] block.
Definition: atparse.py:291
line
The line number that caused the problem.
Definition: atparse.py:31
def parse_file(self, filename)
Read a file and parse its contents.
Definition: atparse.py:270
def parse_lines(self, lines, filename)
Given a multi-line string, parse the contents line-by-line.
Definition: atparse.py:453
def __init__
ATParser constructor.
Definition: atparse.py:156
infile
The file that caused the problem.
Definition: atparse.py:28
Raised when a script requests an unknown variable.
Definition: atparse.py:21
def infile(self)
The current input file name.
Definition: atparse.py:188
def replace_backslashed(text)
Turns \t to tab, \n to end of line, \r to carriage return, \b to backspace and \(octal) to other char...
Definition: atparse.py:47
def parse_line(self, line, filename, lineno)
Parses one line of text.
Definition: atparse.py:462
def _write(self, data)
Write data to the output stream.
Definition: atparse.py:191
Raised when the parser encounters a syntax error.
Definition: atparse.py:14
def str_state(self)
Return a string description of the parser stack for debugging.
Definition: atparse.py:390
def top_state
Return the top parser state without removing it.
Definition: atparse.py:423
def require_file(self, filename_pattern)
Read the contents of a file and return it.
Definition: atparse.py:279
def pop_state(self)
Remove and return the top parser state.
Definition: atparse.py:442
def parse_stream(self, stream, streamname)
Read a stream and parse its contents.
Definition: atparse.py:261
varhash
The dict of variables.
Definition: atparse.py:164
Takes input files or other data, and replaces certain strings with variables or functions.
Definition: atparse.py:98
def optional_var(self, varname)
Return the value of a variable with functions applied, or '' if the variable is unset.
Definition: atparse.py:233
def active(self)
Is the current block active?
Definition: atparse.py:414
varname
The problematic variable name.
Definition: atparse.py:29
def push_state(self, state)
Push a new state to the top of the parser state stack.
Definition: atparse.py:437
def getvar(self, varname)
Return the value of a variable, or None if it is unset.
Definition: atparse.py:287