# [[[cog import cog; cog.outl('"""\n%s\n"""' % file('README.rst').read()) ]]] """ S-expression parser for Python ============================== `sexpdata` is a simple S-expression parser/serializer. It has simple `load` and `dump` functions like `pickle`, `json` or `PyYAML` module. >>> from sexpdata import loads, dumps >>> loads('("a" "b")') ['a', 'b'] >>> print(dumps(['a', 'b'])) ("a" "b") You can install `sexpdata` from PyPI_:: pip install sexpdata Links: * `Documentation (at Read the Docs) `_ * `Repository (at GitHub) `_ * `Issue tracker (at GitHub) `_ * `PyPI `_ * `Travis CI `_ License ------- `sexpdata` is licensed under the terms of the BSD 2-Clause License. See the source code for more information. """ # [[[end]]] # Copyright (c) 2012 Takafumi Arakaki # All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. __version__ = '0.0.3' __author__ = 'Takafumi Arakaki' __license__ = 'BSD License' __all__ = [ # API functions: 'load', 'loads', 'dump', 'dumps', # Utility functions: 'car', 'cdr', # S-expression classes: 'Symbol', 'String', 'Quoted', ] import re from string import whitespace import functools BRACKETS = {'(': ')', '[': ']'} ### Python 3 compatibility try: unicode PY3 = False except NameError: basestring = unicode = str # Python 3 PY3 = True def uformat(s, *args, **kwds): """Alias of ``unicode(s).format(...)``.""" return tounicode(s).format(*args, **kwds) ### Utility def tounicode(string): """ Decode `string` if it is not unicode. Do nothing in Python 3. """ if not isinstance(string, unicode): string = unicode(string, 'utf-8') return string def return_as(converter): """ Decorator to convert result of a function. It is just a function composition. The following two codes are equivalent. Using `@return_as`:: @return_as(converter) def generator(args): ... result = generator(args) Manually do the same:: def generator(args): ... result = converter(generator(args)) Example: >>> @return_as(list) ... def f(): ... for i in range(3): ... yield i ... >>> f() # this gives a list, not an iterator [0, 1, 2] """ def wrapper(generator): @functools.wraps(generator) def func(*args, **kwds): return converter(generator(*args, **kwds)) return func return wrapper ### Interface def load(filelike, **kwds): """ Load object from S-expression stored in `filelike`. :arg filelike: A text stream object. See :func:`loads` for valid keyword arguments. >>> import io >>> fp = io.StringIO() >>> sexp = [Symbol('a'), Symbol('b')] # let's dump and load this object >>> dump(sexp, fp) >>> _ = fp.seek(0) >>> load(fp) == sexp True """ return loads(filelike.read(), **kwds) def loads(string, **kwds): """ Load object from S-expression `string`. :arg string: String containing an S-expression. :type nil: str or None :keyword nil: A symbol interpreted as an empty list. Default is ``'nil'``. :type true: str or None :keyword true: A symbol interpreted as True. Default is ``'t'``. :type false: str or None :keyword false: A symbol interpreted as False. Default is ``None``. :type line_comment: str :keyword line_comment: Beginning of line comment. Default is ``';'``. >>> loads("(a b)") [Symbol('a'), Symbol('b')] >>> loads("a") Symbol('a') >>> loads("(a 'b)") [Symbol('a'), Quoted(Symbol('b'))] >>> loads("(a '(b))") [Symbol('a'), Quoted([Symbol('b')])] >>> loads(''' ... ;; This is a line comment. ... ("a" "b") ; this is also a comment. ... ''') ['a', 'b'] >>> loads(''' ... # This is a line comment. ... ("a" "b") # this is also a comment. ... ''', line_comment='#') ['a', 'b'] ``nil`` is converted to an empty list by default. You can use keyword argument `nil` to change what symbol must be interpreted as nil: >>> loads("nil") [] >>> loads("null", nil='null') [] >>> loads("nil", nil=None) Symbol('nil') ``t`` is converted to True by default. You can use keyword argument `true` to change what symbol must be converted to True.: >>> loads("t") True >>> loads("#t", true='#t') True >>> loads("t", true=None) Symbol('t') No symbol is converted to False by default. You can use keyword argument `false` to convert a symbol to False. >>> loads("#f") Symbol('#f') >>> loads("#f", false='#f') False >>> loads("nil", false='nil', nil=None) False """ obj = parse(string, **kwds) assert len(obj) == 1 # FIXME: raise an appropriate error return obj[0] def dump(obj, filelike, **kwds): """ Write `obj` as an S-expression into given stream `filelike`. :arg obj: A Python object. :arg filelike: A text stream object. See :func:`dumps` for valid keyword arguments. >>> import io >>> fp = io.StringIO() >>> dump([Symbol('a'), Symbol('b')], fp) >>> print(fp.getvalue()) (a b) """ filelike.write(unicode(dumps(obj))) def dumps(obj, **kwds): """ Convert python object into an S-expression. :arg obj: A Python object. :type str_as: ``'symbol'`` or ``'string'`` :keyword str_as: How string should be interpreted. Default is ``'string'``. :type tuple_as: ``'list'`` or ``'array'`` :keyword tuple_as: How tuple should be interpreted. Default is ``'list'``. :type true_as: str :keyword true_as: How True should be interpreted. Default is ``'t'`` :type false_as: str :keyword false_as: How False should be interpreted. Default is ``'()'`` :type none_as: str :keyword none_as: How None should be interpreted. Default is ``'()'`` Basic usage: >>> print(dumps(['a', 'b'])) ("a" "b") >>> print(dumps(['a', 'b'], str_as='symbol')) (a b) >>> print(dumps(dict(a=1, b=2))) (:a 1 :b 2) >>> print(dumps([None, True, False, ()])) (() t () ()) >>> print(dumps([None, True, False, ()], ... none_as='null', true_as='#t', false_as='#f')) (null #t #f ()) >>> print(dumps(('a', 'b'))) ("a" "b") >>> print(dumps(('a', 'b'), tuple_as='array')) ["a" "b"] More verbose usage: >>> print(dumps([Symbol('a'), Symbol('b')])) (a b) >>> print(dumps(Symbol('a'))) a >>> print(dumps([Symbol('a'), Quoted(Symbol('b'))])) (a 'b) >>> print(dumps([Symbol('a'), Quoted([Symbol('b')])])) (a '(b)) """ return tosexp(obj, **kwds) def car(obj): """ Alias of ``obj[0]``. >>> car(loads('(a . b)')) Symbol('a') >>> car(loads('(a b)')) Symbol('a') """ return obj[0] def cdr(obj): """ `cdr`-like function. >>> cdr(loads('(a . b)')) Symbol('b') >>> cdr(loads('(a b)')) [Symbol('b')] >>> cdr(loads('(a . (b))')) [Symbol('b')] >>> cdr(loads('(a)')) [] >>> cdr(loads('(a . nil)')) [] """ # This is very lazy implementation. Probably the best way to do # it is to define `Cons` S-expression class. if len(obj) > 2: dot = obj[1] if isinstance(dot, Symbol) and dot.value() == '.': return obj[2] return obj[1:] ### Core def tosexp(obj, str_as='string', tuple_as='list', true_as='t', false_as='()', none_as='()'): """ Convert an object to an S-expression (`dumps` is just calling this). See this table for comparison of lispy languages, to support them as much as possible: `Lisp: Common Lisp, Scheme/Racket, Clojure, Emacs Lisp - Hyperpolyglot `_ """ _tosexp = lambda x: tosexp( x, str_as=str_as, tuple_as=tuple_as, true_as=true_as, false_as=false_as, none_as=none_as) if isinstance(obj, list): return Bracket(obj, '(').tosexp(_tosexp) elif isinstance(obj, tuple): if tuple_as == 'list': return Bracket(obj, '(').tosexp(_tosexp) elif tuple_as == 'array': return Bracket(obj, '[').tosexp(_tosexp) else: raise ValueError(uformat("tuple_as={0!r} is not valid", tuple_as)) elif obj is True: # must do this before ``isinstance(obj, int)`` return true_as elif obj is False: return false_as elif obj is None: return none_as elif isinstance(obj, (int, float)): return str(obj) elif isinstance(obj, basestring): if str_as == 'symbol': return obj elif str_as == 'string': return String(obj).tosexp() else: raise ValueError(uformat("str_as={0!r} is not valid", str_as)) elif isinstance(obj, dict): return _tosexp(dict_to_plist(obj)) elif isinstance(obj, SExpBase): return obj.tosexp(_tosexp) else: raise TypeError(uformat( "Object of type '{0}' cannot be converted by `tosexp`. " "It's value is '{1!r}'", type(obj), obj)) @return_as(list) def dict_to_plist(obj): for key in obj: yield Symbol(uformat(":{0}", key)) yield obj[key] class SExpBase(object): def __init__(self, val): self._val = val def __repr__(self): return uformat("{0}({1!r})", self.__class__.__name__, self._val) def __eq__(self, other): if isinstance(other, self.__class__): return self._val == other._val else: return False def value(self): return self._val def tosexp(self, tosexp=tosexp): """ Decode this object into an S-expression string. :arg tosexp: A function to be used when converting sub S-expression. """ raise NotImplementedError @classmethod def quote(cls, string): for (s, q) in cls._lisp_quoted_specials: string = string.replace(s, q) return tounicode(string) @classmethod def unquote(cls, string): return cls._lisp_quoted_to_raw.get(string, string) class Symbol(SExpBase): _lisp_quoted_specials = [ ('\\', '\\\\'), # must come first to avoid doubly quoting "\" ("'", r"\'"), ("`", r"\`"), ('"', r'\"'), ('(', r'\('), (')', r'\)'), ('[', r'\['), (']', r'\]'), (' ', r'\ '), ('.', r'\.'), (',', r'\,'), ('?', r'\?'), (';', r'\;'), ('#', r'\#'), ] _lisp_quoted_to_raw = dict((q, r) for (r, q) in _lisp_quoted_specials) def tosexp(self, tosexp=None): return self.quote(self._val) class String(SExpBase): _lisp_quoted_specials = [ # from Pymacs ('\\', '\\\\'), # must come first to avoid doubly quoting "\" ('"', '\\"'), ('\b', '\\b'), ('\f', '\\f'), ('\n', '\\n'), ('\r', '\\r'), ('\t', '\\t')] _lisp_quoted_to_raw = dict((q, r) for (r, q) in _lisp_quoted_specials) def tosexp(self, tosexp=None): return uformat('"{0}"', self.quote(self._val)) class Quoted(SExpBase): def tosexp(self, tosexp=tosexp): return uformat("'{0}", tosexp(self._val)) class Bracket(SExpBase): def __init__(self, val, bra): assert bra in BRACKETS # FIXME: raise an appropriate error super(Bracket, self).__init__(val) self._bra = bra def __repr__(self): return uformat("{0}({1!r}, {2!r})", self.__class__.__name__, self._val, self._bra) def tosexp(self, tosexp=tosexp): bra = self._bra ket = BRACKETS[self._bra] c = ' '.join(tosexp(v) for v in self._val) return uformat("{0}{1}{2}", bra, c, ket) def bracket(val, bra): if bra == '(': return val else: return Bracket(val, bra) class ExpectClosingBracket(Exception): def __init__(self, got, expect): super(ExpectClosingBracket, self).__init__(uformat( "Not enough closing brackets. " "Expected {0!r} to be the last letter in the sexp. " "Got: {1!r}", expect, got)) class ExpectNothing(Exception): def __init__(self, got): super(ExpectNothing, self).__init__(uformat( "Too many closing brackets. " "Expected no character left in the sexp. " "Got: {0!r}", got)) class Parser(object): closing_brackets = set(BRACKETS.values()) atom_end = \ set(BRACKETS) | set(closing_brackets) | set('"\'') | set(whitespace) atom_end_or_escape_re = re.compile("|".join(map(re.escape, atom_end | set('\\')))) quote_or_escape_re = re.compile(r'"|\\') def __init__(self, string, string_to=None, nil='nil', true='t', false=None, line_comment=';'): self.string = string self.nil = nil self.true = true self.false = false self.string_to = (lambda x: x) if string_to is None else string_to self.line_comment = line_comment def parse_str(self, i): string = self.string chars = [] append = chars.append search = self.quote_or_escape_re.search assert string[i] == '"' # never fail while True: i += 1 match = search(string, i) end = match.start() append(string[i:end]) c = match.group() if c == '"': i = end + 1 break elif c == '\\': i = end + 1 append(String.unquote(c + string[i])) else: raise ExpectClosingBracket('"', None) return (i, ''.join(chars)) def parse_atom(self, i): string = self.string chars = [] append = chars.append search = self.atom_end_or_escape_re.search atom_end = self.atom_end while True: match = search(string, i) if not match: append(string[i:]) i = len(string) break end = match.start() append(string[i:end]) c = match.group() if c in atom_end: i = end # this is different from str break elif c == '\\': i = end + 1 append(Symbol.unquote(c + string[i])) i += 1 else: raise ExpectClosingBracket('"', None) return (i, self.atom(''.join(chars))) def atom(self, token): if token == self.nil: return [] if token == self.true: return True if token == self.false: return False try: return int(token) except ValueError: try: return float(token) except ValueError: return Symbol(token) def parse_sexp(self, i): string = self.string len_string = len(self.string) sexp = [] append = sexp.append while i < len_string: c = string[i] if c == '"': (i, subsexp) = self.parse_str(i) append(self.string_to(subsexp)) elif c in whitespace: i += 1 continue elif c in BRACKETS: close = BRACKETS[c] (i, subsexp) = self.parse_sexp(i + 1) append(bracket(subsexp, c)) try: nc = string[i] except IndexError: nc = None if nc != close: raise ExpectClosingBracket(nc, close) i += 1 elif c in self.closing_brackets: break elif c == "'": (i, subsexp) = self.parse_sexp(i + 1) append(Quoted(subsexp[0])) sexp.extend(subsexp[1:]) elif c == self.line_comment: i = string.find('\n', i) + 1 if i <= 0: i = len_string break else: (i, subsexp) = self.parse_atom(i) append(subsexp) return (i, sexp) def parse(self): (i, sexp) = self.parse_sexp(0) if i < len(self.string): raise ExpectNothing(self.string[i:]) return sexp def parse(string, **kwds): """ Parse s-expression. >>> parse("(a b)") [[Symbol('a'), Symbol('b')]] >>> parse("a") [Symbol('a')] >>> parse("(a 'b)") [[Symbol('a'), Quoted(Symbol('b'))]] >>> parse("(a '(b))") [[Symbol('a'), Quoted([Symbol('b')])]] """ return Parser(string, **kwds).parse()