676 lines
18 KiB
Python
676 lines
18 KiB
Python
# [[[cog import cog; cog.outl('"""\n%s\n"""' % file('README.rst').read()) ]]]
|
|
"""
|
|
S-expression parser for Python
|
|
==============================
|
|
|
|
`sexpdata` is a simple S-expression parser/serializer. It has
|
|
simple `load` and `dump` functions like `pickle`, `json` or `PyYAML`
|
|
module.
|
|
|
|
>>> from sexpdata import loads, dumps
|
|
>>> loads('("a" "b")')
|
|
['a', 'b']
|
|
>>> print(dumps(['a', 'b']))
|
|
("a" "b")
|
|
|
|
|
|
You can install `sexpdata` from PyPI_::
|
|
|
|
pip install sexpdata
|
|
|
|
|
|
Links:
|
|
|
|
* `Documentation (at Read the Docs) <http://sexpdata.readthedocs.org/>`_
|
|
* `Repository (at GitHub) <https://github.com/tkf/sexpdata>`_
|
|
* `Issue tracker (at GitHub) <https://github.com/tkf/sexpdata/issues>`_
|
|
* `PyPI <http://pypi.python.org/pypi/sexpdata>`_
|
|
* `Travis CI <https://travis-ci.org/#!/tkf/sexpdata>`_
|
|
|
|
|
|
License
|
|
-------
|
|
|
|
`sexpdata` is licensed under the terms of the BSD 2-Clause License.
|
|
See the source code for more information.
|
|
|
|
"""
|
|
# [[[end]]]
|
|
|
|
# Copyright (c) 2012 Takafumi Arakaki
|
|
# All rights reserved.
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
|
|
# Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions and the following disclaimer.
|
|
|
|
# Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
__version__ = '0.0.3'
|
|
__author__ = 'Takafumi Arakaki'
|
|
__license__ = 'BSD License'
|
|
__all__ = [
|
|
# API functions:
|
|
'load', 'loads', 'dump', 'dumps',
|
|
# Utility functions:
|
|
'car', 'cdr',
|
|
# S-expression classes:
|
|
'Symbol', 'String', 'Quoted',
|
|
]
|
|
|
|
import re
|
|
from string import whitespace
|
|
import functools
|
|
|
|
BRACKETS = {'(': ')', '[': ']'}
|
|
|
|
|
|
### Python 3 compatibility
|
|
|
|
try:
|
|
unicode
|
|
PY3 = False
|
|
except NameError:
|
|
basestring = unicode = str # Python 3
|
|
PY3 = True
|
|
|
|
|
|
def uformat(s, *args, **kwds):
|
|
"""Alias of ``unicode(s).format(...)``."""
|
|
return tounicode(s).format(*args, **kwds)
|
|
|
|
|
|
### Utility
|
|
|
|
def tounicode(string):
|
|
"""
|
|
Decode `string` if it is not unicode. Do nothing in Python 3.
|
|
"""
|
|
if not isinstance(string, unicode):
|
|
string = unicode(string, 'utf-8')
|
|
return string
|
|
|
|
|
|
def return_as(converter):
|
|
"""
|
|
Decorator to convert result of a function.
|
|
|
|
It is just a function composition. The following two codes are
|
|
equivalent.
|
|
|
|
Using `@return_as`::
|
|
|
|
@return_as(converter)
|
|
def generator(args):
|
|
...
|
|
|
|
result = generator(args)
|
|
|
|
Manually do the same::
|
|
|
|
def generator(args):
|
|
...
|
|
|
|
result = converter(generator(args))
|
|
|
|
Example:
|
|
|
|
>>> @return_as(list)
|
|
... def f():
|
|
... for i in range(3):
|
|
... yield i
|
|
...
|
|
>>> f() # this gives a list, not an iterator
|
|
[0, 1, 2]
|
|
|
|
"""
|
|
def wrapper(generator):
|
|
@functools.wraps(generator)
|
|
def func(*args, **kwds):
|
|
return converter(generator(*args, **kwds))
|
|
return func
|
|
return wrapper
|
|
|
|
|
|
### Interface
|
|
|
|
def load(filelike, **kwds):
|
|
"""
|
|
Load object from S-expression stored in `filelike`.
|
|
|
|
:arg filelike: A text stream object.
|
|
|
|
See :func:`loads` for valid keyword arguments.
|
|
|
|
>>> import io
|
|
>>> fp = io.StringIO()
|
|
>>> sexp = [Symbol('a'), Symbol('b')] # let's dump and load this object
|
|
>>> dump(sexp, fp)
|
|
>>> _ = fp.seek(0)
|
|
>>> load(fp) == sexp
|
|
True
|
|
|
|
"""
|
|
return loads(filelike.read(), **kwds)
|
|
|
|
|
|
def loads(string, **kwds):
|
|
"""
|
|
Load object from S-expression `string`.
|
|
|
|
:arg string: String containing an S-expression.
|
|
:type nil: str or None
|
|
:keyword nil: A symbol interpreted as an empty list.
|
|
Default is ``'nil'``.
|
|
:type true: str or None
|
|
:keyword true: A symbol interpreted as True.
|
|
Default is ``'t'``.
|
|
:type false: str or None
|
|
:keyword false: A symbol interpreted as False.
|
|
Default is ``None``.
|
|
:type line_comment: str
|
|
:keyword line_comment: Beginning of line comment.
|
|
Default is ``';'``.
|
|
|
|
>>> loads("(a b)")
|
|
[Symbol('a'), Symbol('b')]
|
|
>>> loads("a")
|
|
Symbol('a')
|
|
>>> loads("(a 'b)")
|
|
[Symbol('a'), Quoted(Symbol('b'))]
|
|
>>> loads("(a '(b))")
|
|
[Symbol('a'), Quoted([Symbol('b')])]
|
|
>>> loads('''
|
|
... ;; This is a line comment.
|
|
... ("a" "b") ; this is also a comment.
|
|
... ''')
|
|
['a', 'b']
|
|
>>> loads('''
|
|
... # This is a line comment.
|
|
... ("a" "b") # this is also a comment.
|
|
... ''', line_comment='#')
|
|
['a', 'b']
|
|
|
|
``nil`` is converted to an empty list by default. You can use
|
|
keyword argument `nil` to change what symbol must be interpreted
|
|
as nil:
|
|
|
|
>>> loads("nil")
|
|
[]
|
|
>>> loads("null", nil='null')
|
|
[]
|
|
>>> loads("nil", nil=None)
|
|
Symbol('nil')
|
|
|
|
``t`` is converted to True by default. You can use keyword
|
|
argument `true` to change what symbol must be converted to True.:
|
|
|
|
>>> loads("t")
|
|
True
|
|
>>> loads("#t", true='#t')
|
|
True
|
|
>>> loads("t", true=None)
|
|
Symbol('t')
|
|
|
|
No symbol is converted to False by default. You can use keyword
|
|
argument `false` to convert a symbol to False.
|
|
|
|
>>> loads("#f")
|
|
Symbol('#f')
|
|
>>> loads("#f", false='#f')
|
|
False
|
|
>>> loads("nil", false='nil', nil=None)
|
|
False
|
|
|
|
"""
|
|
obj = parse(string, **kwds)
|
|
assert len(obj) == 1 # FIXME: raise an appropriate error
|
|
return obj[0]
|
|
|
|
|
|
def dump(obj, filelike, **kwds):
|
|
"""
|
|
Write `obj` as an S-expression into given stream `filelike`.
|
|
|
|
:arg obj: A Python object.
|
|
:arg filelike: A text stream object.
|
|
|
|
See :func:`dumps` for valid keyword arguments.
|
|
|
|
>>> import io
|
|
>>> fp = io.StringIO()
|
|
>>> dump([Symbol('a'), Symbol('b')], fp)
|
|
>>> print(fp.getvalue())
|
|
(a b)
|
|
|
|
"""
|
|
filelike.write(unicode(dumps(obj)))
|
|
|
|
|
|
def dumps(obj, **kwds):
|
|
"""
|
|
Convert python object into an S-expression.
|
|
|
|
:arg obj: A Python object.
|
|
:type str_as: ``'symbol'`` or ``'string'``
|
|
:keyword str_as: How string should be interpreted.
|
|
Default is ``'string'``.
|
|
:type tuple_as: ``'list'`` or ``'array'``
|
|
:keyword tuple_as: How tuple should be interpreted.
|
|
Default is ``'list'``.
|
|
:type true_as: str
|
|
:keyword true_as: How True should be interpreted.
|
|
Default is ``'t'``
|
|
:type false_as: str
|
|
:keyword false_as: How False should be interpreted.
|
|
Default is ``'()'``
|
|
:type none_as: str
|
|
:keyword none_as: How None should be interpreted.
|
|
Default is ``'()'``
|
|
|
|
Basic usage:
|
|
|
|
>>> print(dumps(['a', 'b']))
|
|
("a" "b")
|
|
>>> print(dumps(['a', 'b'], str_as='symbol'))
|
|
(a b)
|
|
>>> print(dumps(dict(a=1, b=2)))
|
|
(:a 1 :b 2)
|
|
>>> print(dumps([None, True, False, ()]))
|
|
(() t () ())
|
|
>>> print(dumps([None, True, False, ()],
|
|
... none_as='null', true_as='#t', false_as='#f'))
|
|
(null #t #f ())
|
|
>>> print(dumps(('a', 'b')))
|
|
("a" "b")
|
|
>>> print(dumps(('a', 'b'), tuple_as='array'))
|
|
["a" "b"]
|
|
|
|
More verbose usage:
|
|
|
|
>>> print(dumps([Symbol('a'), Symbol('b')]))
|
|
(a b)
|
|
>>> print(dumps(Symbol('a')))
|
|
a
|
|
>>> print(dumps([Symbol('a'), Quoted(Symbol('b'))]))
|
|
(a 'b)
|
|
>>> print(dumps([Symbol('a'), Quoted([Symbol('b')])]))
|
|
(a '(b))
|
|
|
|
"""
|
|
return tosexp(obj, **kwds)
|
|
|
|
|
|
def car(obj):
|
|
"""
|
|
Alias of ``obj[0]``.
|
|
|
|
>>> car(loads('(a . b)'))
|
|
Symbol('a')
|
|
>>> car(loads('(a b)'))
|
|
Symbol('a')
|
|
|
|
"""
|
|
return obj[0]
|
|
|
|
|
|
def cdr(obj):
|
|
"""
|
|
`cdr`-like function.
|
|
|
|
>>> cdr(loads('(a . b)'))
|
|
Symbol('b')
|
|
>>> cdr(loads('(a b)'))
|
|
[Symbol('b')]
|
|
>>> cdr(loads('(a . (b))'))
|
|
[Symbol('b')]
|
|
>>> cdr(loads('(a)'))
|
|
[]
|
|
>>> cdr(loads('(a . nil)'))
|
|
[]
|
|
|
|
"""
|
|
# This is very lazy implementation. Probably the best way to do
|
|
# it is to define `Cons` S-expression class.
|
|
if len(obj) > 2:
|
|
dot = obj[1]
|
|
if isinstance(dot, Symbol) and dot.value() == '.':
|
|
return obj[2]
|
|
return obj[1:]
|
|
|
|
|
|
### Core
|
|
|
|
def tosexp(obj, str_as='string', tuple_as='list',
|
|
true_as='t', false_as='()', none_as='()'):
|
|
"""
|
|
Convert an object to an S-expression (`dumps` is just calling this).
|
|
|
|
See this table for comparison of lispy languages, to support them
|
|
as much as possible:
|
|
`Lisp: Common Lisp, Scheme/Racket, Clojure, Emacs Lisp - Hyperpolyglot
|
|
<http://hyperpolyglot.org/lisp>`_
|
|
|
|
"""
|
|
_tosexp = lambda x: tosexp(
|
|
x, str_as=str_as, tuple_as=tuple_as,
|
|
true_as=true_as, false_as=false_as, none_as=none_as)
|
|
if isinstance(obj, list):
|
|
return Bracket(obj, '(').tosexp(_tosexp)
|
|
elif isinstance(obj, tuple):
|
|
if tuple_as == 'list':
|
|
return Bracket(obj, '(').tosexp(_tosexp)
|
|
elif tuple_as == 'array':
|
|
return Bracket(obj, '[').tosexp(_tosexp)
|
|
else:
|
|
raise ValueError(uformat("tuple_as={0!r} is not valid", tuple_as))
|
|
elif obj is True: # must do this before ``isinstance(obj, int)``
|
|
return true_as
|
|
elif obj is False:
|
|
return false_as
|
|
elif obj is None:
|
|
return none_as
|
|
elif isinstance(obj, (int, float)):
|
|
return str(obj)
|
|
elif isinstance(obj, basestring):
|
|
if str_as == 'symbol':
|
|
return obj
|
|
elif str_as == 'string':
|
|
return String(obj).tosexp()
|
|
else:
|
|
raise ValueError(uformat("str_as={0!r} is not valid", str_as))
|
|
elif isinstance(obj, dict):
|
|
return _tosexp(dict_to_plist(obj))
|
|
elif isinstance(obj, SExpBase):
|
|
return obj.tosexp(_tosexp)
|
|
else:
|
|
raise TypeError(uformat(
|
|
"Object of type '{0}' cannot be converted by `tosexp`. "
|
|
"It's value is '{1!r}'", type(obj), obj))
|
|
|
|
|
|
@return_as(list)
|
|
def dict_to_plist(obj):
|
|
for key in obj:
|
|
yield Symbol(uformat(":{0}", key))
|
|
yield obj[key]
|
|
|
|
|
|
class SExpBase(object):
|
|
|
|
def __init__(self, val):
|
|
self._val = val
|
|
|
|
def __repr__(self):
|
|
return uformat("{0}({1!r})", self.__class__.__name__, self._val)
|
|
|
|
def __eq__(self, other):
|
|
if isinstance(other, self.__class__):
|
|
return self._val == other._val
|
|
else:
|
|
return False
|
|
|
|
def value(self):
|
|
return self._val
|
|
|
|
def tosexp(self, tosexp=tosexp):
|
|
"""
|
|
Decode this object into an S-expression string.
|
|
|
|
:arg tosexp: A function to be used when converting sub S-expression.
|
|
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@classmethod
|
|
def quote(cls, string):
|
|
for (s, q) in cls._lisp_quoted_specials:
|
|
string = string.replace(s, q)
|
|
return tounicode(string)
|
|
|
|
@classmethod
|
|
def unquote(cls, string):
|
|
return cls._lisp_quoted_to_raw.get(string, string)
|
|
|
|
|
|
class Symbol(SExpBase):
|
|
|
|
_lisp_quoted_specials = [
|
|
('\\', '\\\\'), # must come first to avoid doubly quoting "\"
|
|
("'", r"\'"), ("`", r"\`"), ('"', r'\"'),
|
|
('(', r'\('), (')', r'\)'), ('[', r'\['), (']', r'\]'),
|
|
(' ', r'\ '), ('.', r'\.'), (',', r'\,'), ('?', r'\?'),
|
|
(';', r'\;'), ('#', r'\#'),
|
|
]
|
|
|
|
_lisp_quoted_to_raw = dict((q, r) for (r, q) in _lisp_quoted_specials)
|
|
|
|
def tosexp(self, tosexp=None):
|
|
return self.quote(self._val)
|
|
|
|
|
|
class String(SExpBase):
|
|
|
|
_lisp_quoted_specials = [ # from Pymacs
|
|
('\\', '\\\\'), # must come first to avoid doubly quoting "\"
|
|
('"', '\\"'), ('\b', '\\b'), ('\f', '\\f'),
|
|
('\n', '\\n'), ('\r', '\\r'), ('\t', '\\t')]
|
|
|
|
_lisp_quoted_to_raw = dict((q, r) for (r, q) in _lisp_quoted_specials)
|
|
|
|
def tosexp(self, tosexp=None):
|
|
return uformat('"{0}"', self.quote(self._val))
|
|
|
|
|
|
class Quoted(SExpBase):
|
|
|
|
def tosexp(self, tosexp=tosexp):
|
|
return uformat("'{0}", tosexp(self._val))
|
|
|
|
|
|
class Bracket(SExpBase):
|
|
|
|
def __init__(self, val, bra):
|
|
assert bra in BRACKETS # FIXME: raise an appropriate error
|
|
super(Bracket, self).__init__(val)
|
|
self._bra = bra
|
|
|
|
def __repr__(self):
|
|
return uformat("{0}({1!r}, {2!r})",
|
|
self.__class__.__name__, self._val, self._bra)
|
|
|
|
def tosexp(self, tosexp=tosexp):
|
|
bra = self._bra
|
|
ket = BRACKETS[self._bra]
|
|
c = ' '.join(tosexp(v) for v in self._val)
|
|
return uformat("{0}{1}{2}", bra, c, ket)
|
|
|
|
|
|
def bracket(val, bra):
|
|
if bra == '(':
|
|
return val
|
|
else:
|
|
return Bracket(val, bra)
|
|
|
|
|
|
class ExpectClosingBracket(Exception):
|
|
|
|
def __init__(self, got, expect):
|
|
super(ExpectClosingBracket, self).__init__(uformat(
|
|
"Not enough closing brackets. "
|
|
"Expected {0!r} to be the last letter in the sexp. "
|
|
"Got: {1!r}", expect, got))
|
|
|
|
|
|
class ExpectNothing(Exception):
|
|
|
|
def __init__(self, got):
|
|
super(ExpectNothing, self).__init__(uformat(
|
|
"Too many closing brackets. "
|
|
"Expected no character left in the sexp. "
|
|
"Got: {0!r}", got))
|
|
|
|
|
|
class Parser(object):
|
|
|
|
closing_brackets = set(BRACKETS.values())
|
|
atom_end = \
|
|
set(BRACKETS) | set(closing_brackets) | set('"\'') | set(whitespace)
|
|
atom_end_or_escape_re = re.compile("|".join(map(re.escape,
|
|
atom_end | set('\\'))))
|
|
quote_or_escape_re = re.compile(r'"|\\')
|
|
|
|
def __init__(self, string, string_to=None, nil='nil', true='t', false=None,
|
|
line_comment=';'):
|
|
self.string = string
|
|
self.nil = nil
|
|
self.true = true
|
|
self.false = false
|
|
self.string_to = (lambda x: x) if string_to is None else string_to
|
|
self.line_comment = line_comment
|
|
|
|
def parse_str(self, i):
|
|
string = self.string
|
|
chars = []
|
|
append = chars.append
|
|
search = self.quote_or_escape_re.search
|
|
|
|
assert string[i] == '"' # never fail
|
|
while True:
|
|
i += 1
|
|
match = search(string, i)
|
|
end = match.start()
|
|
append(string[i:end])
|
|
c = match.group()
|
|
if c == '"':
|
|
i = end + 1
|
|
break
|
|
elif c == '\\':
|
|
i = end + 1
|
|
append(String.unquote(c + string[i]))
|
|
else:
|
|
raise ExpectClosingBracket('"', None)
|
|
return (i, ''.join(chars))
|
|
|
|
def parse_atom(self, i):
|
|
string = self.string
|
|
chars = []
|
|
append = chars.append
|
|
search = self.atom_end_or_escape_re.search
|
|
atom_end = self.atom_end
|
|
|
|
while True:
|
|
match = search(string, i)
|
|
if not match:
|
|
append(string[i:])
|
|
i = len(string)
|
|
break
|
|
end = match.start()
|
|
append(string[i:end])
|
|
c = match.group()
|
|
if c in atom_end:
|
|
i = end # this is different from str
|
|
break
|
|
elif c == '\\':
|
|
i = end + 1
|
|
append(Symbol.unquote(c + string[i]))
|
|
i += 1
|
|
else:
|
|
raise ExpectClosingBracket('"', None)
|
|
return (i, self.atom(''.join(chars)))
|
|
|
|
def atom(self, token):
|
|
if token == self.nil:
|
|
return []
|
|
if token == self.true:
|
|
return True
|
|
if token == self.false:
|
|
return False
|
|
try:
|
|
return int(token)
|
|
except ValueError:
|
|
try:
|
|
return float(token)
|
|
except ValueError:
|
|
return Symbol(token)
|
|
|
|
def parse_sexp(self, i):
|
|
string = self.string
|
|
len_string = len(self.string)
|
|
sexp = []
|
|
append = sexp.append
|
|
while i < len_string:
|
|
c = string[i]
|
|
if c == '"':
|
|
(i, subsexp) = self.parse_str(i)
|
|
append(self.string_to(subsexp))
|
|
elif c in whitespace:
|
|
i += 1
|
|
continue
|
|
elif c in BRACKETS:
|
|
close = BRACKETS[c]
|
|
(i, subsexp) = self.parse_sexp(i + 1)
|
|
append(bracket(subsexp, c))
|
|
try:
|
|
nc = string[i]
|
|
except IndexError:
|
|
nc = None
|
|
if nc != close:
|
|
raise ExpectClosingBracket(nc, close)
|
|
i += 1
|
|
elif c in self.closing_brackets:
|
|
break
|
|
elif c == "'":
|
|
(i, subsexp) = self.parse_sexp(i + 1)
|
|
append(Quoted(subsexp[0]))
|
|
sexp.extend(subsexp[1:])
|
|
elif c == self.line_comment:
|
|
i = string.find('\n', i) + 1
|
|
if i <= 0:
|
|
i = len_string
|
|
break
|
|
else:
|
|
(i, subsexp) = self.parse_atom(i)
|
|
append(subsexp)
|
|
return (i, sexp)
|
|
|
|
def parse(self):
|
|
(i, sexp) = self.parse_sexp(0)
|
|
if i < len(self.string):
|
|
raise ExpectNothing(self.string[i:])
|
|
return sexp
|
|
|
|
|
|
def parse(string, **kwds):
|
|
"""
|
|
Parse s-expression.
|
|
|
|
>>> parse("(a b)")
|
|
[[Symbol('a'), Symbol('b')]]
|
|
>>> parse("a")
|
|
[Symbol('a')]
|
|
>>> parse("(a 'b)")
|
|
[[Symbol('a'), Quoted(Symbol('b'))]]
|
|
>>> parse("(a '(b))")
|
|
[[Symbol('a'), Quoted([Symbol('b')])]]
|
|
|
|
"""
|
|
return Parser(string, **kwds).parse()
|