Source code for lucidity.template

# :coding: utf-8
# :copyright: Copyright (c) 2013 Martin Pengelly-Phillips
# :license: See LICENSE.txt.

import abc
import sys
import re
import functools
from collections import defaultdict

import lucidity.error

# Type of a RegexObject for isinstance check.
_RegexType = type(re.compile(''))


[docs]class Template(object): '''A template.''' _STRIP_EXPRESSION_REGEX = re.compile(r'{(.+?)(:(\\}|.)+?)}') _PLAIN_PLACEHOLDER_REGEX = re.compile(r'{(.+?)}') _TEMPLATE_REFERENCE_REGEX = re.compile(r'{@(?P<reference>.+?)}') ANCHOR_START, ANCHOR_END, ANCHOR_BOTH = (1, 2, 3) RELAXED, STRICT = (1, 2)
[docs] def __init__(self, name, pattern, anchor=ANCHOR_START, default_placeholder_expression='[\w_.\-]+', duplicate_placeholder_mode=RELAXED, template_resolver=None): '''Initialise with *name* and *pattern*. *anchor* determines how the pattern is anchored during a parse. A value of :attr:`~Template.ANCHOR_START` (the default) will match the pattern against the start of a path. :attr:`~Template.ANCHOR_END` will match against the end of a path. To anchor at both the start and end (a full path match) use :attr:`~Template.ANCHOR_BOTH`. Finally, ``None`` will try to match the pattern once anywhere in the path. *duplicate_placeholder_mode* determines how duplicate placeholders will be handled during parsing. :attr:`~Template.RELAXED` mode extracts the last matching value without checking the other values. :attr:`~Template.STRICT` mode ensures that all duplicate placeholders extract the same value and raises :exc:`~lucidity.error.ParseError` if they do not. If *template_resolver* is supplied, use it to resolve any template references in the *pattern* during operations. It should conform to the :class:`Resolver` interface. It can be changed at any time on the instance to affect future operations. ''' super(Template, self).__init__() self.duplicate_placeholder_mode = duplicate_placeholder_mode self.template_resolver = template_resolver self._default_placeholder_expression = default_placeholder_expression self._period_code = '_LPD_' self._at_code = '_WXV_' self._name = name self._pattern = pattern self._anchor = anchor # Check that supplied pattern is valid and able to be compiled. self._construct_regular_expression(self.pattern)
def __repr__(self): '''Return unambiguous representation of template.''' return '{0}(name={1!r}, pattern={2!r})'.format( self.__class__.__name__, self.name, self.pattern ) @property def name(self): '''Return name of template.''' return self._name @property def pattern(self): '''Return template pattern.''' return self._pattern
[docs] def expanded_pattern(self): '''Return pattern with all referenced templates expanded recursively. Raise :exc:`lucidity.error.ResolveError` if pattern contains a reference that cannot be resolved by currently set template_resolver. ''' return self._TEMPLATE_REFERENCE_REGEX.sub( self._expand_reference, self.pattern )
def _expand_reference(self, match): '''Expand reference represented by *match*.''' reference = match.group('reference') if self.template_resolver is None: raise lucidity.error.ResolveError( 'Failed to resolve reference {0!r} as no template resolver set.' .format(reference) ) template = self.template_resolver.get(reference) if template is None: raise lucidity.error.ResolveError( 'Failed to resolve reference {0!r} using template resolver.' .format(reference) ) return template.expanded_pattern()
[docs] def parse(self, path): '''Return dictionary of data extracted from *path* using this template. Raise :py:class:`~lucidity.error.ParseError` if *path* is not parsable by this template. ''' # Construct regular expression for expanded pattern. regex = self._construct_regular_expression(self.expanded_pattern()) # Parse. parsed = {} match = regex.search(path) if match: data = {} for key, value in sorted(match.groupdict().items()): # Strip number that was added to make group name unique. key = key[:-3] # If strict mode enabled for duplicate placeholders, ensure that # all duplicate placeholders extract the same value. if self.duplicate_placeholder_mode == self.STRICT: if key in parsed: if parsed[key] != value: raise lucidity.error.ParseError( 'Different extracted values for placeholder ' '{0!r} detected. Values were {1!r} and {2!r}.' .format(key, parsed[key], value) ) else: parsed[key] = value # Expand dot notation keys into nested dictionaries. target = data parts = key.split(self._period_code) for part in parts[:-1]: target = target.setdefault(part, {}) target[parts[-1]] = value return data else: raise lucidity.error.ParseError( 'Path {0!r} did not match template pattern.'.format(path) )
[docs] def format(self, data): '''Return a path formatted by applying *data* to this template. Raise :py:class:`~lucidity.error.FormatError` if *data* does not supply enough information to fill the template fields. ''' format_specification = self._construct_format_specification( self.expanded_pattern() ) return self._PLAIN_PLACEHOLDER_REGEX.sub( functools.partial(self._format, data=data), format_specification )
def _format(self, match, data): '''Return value from data for *match*.''' placeholder = match.group(1) parts = placeholder.split('.') try: value = data for part in parts: value = value[part] except (TypeError, KeyError): raise lucidity.error.FormatError( 'Could not format data {0!r} due to missing key {1!r}.' .format(data, placeholder) ) else: return value
[docs] def keys(self): '''Return unique set of placeholders in pattern.''' format_specification = self._construct_format_specification( self.expanded_pattern() ) return set(self._PLAIN_PLACEHOLDER_REGEX.findall(format_specification))
[docs] def references(self): '''Return unique set of referenced templates in pattern.''' format_specification = self._construct_format_specification( self.pattern ) return set(self._TEMPLATE_REFERENCE_REGEX.findall(format_specification))
def _construct_format_specification(self, pattern): '''Return format specification from *pattern*.''' return self._STRIP_EXPRESSION_REGEX.sub('{\g<1>}', pattern) def _construct_regular_expression(self, pattern): '''Return a regular expression to represent *pattern*.''' # Escape non-placeholder components. expression = re.sub( r'(?P<placeholder>{(.+?)(:(\\}|.)+?)?})|(?P<other>.+?)', self._escape, pattern ) # Replace placeholders with regex pattern. expression = re.sub( r'{(?P<placeholder>.+?)(:(?P<expression>(\\}|.)+?))?}', functools.partial( self._convert, placeholder_count=defaultdict(int) ), expression ) if self._anchor is not None: if bool(self._anchor & self.ANCHOR_START): expression = '^{0}'.format(expression) if bool(self._anchor & self.ANCHOR_END): expression = '{0}$'.format(expression) # Compile expression. try: compiled = re.compile(expression) except re.error as error: if any([ 'bad group name' in str(error), 'bad character in group name' in str(error) ]): raise ValueError('Placeholder name contains invalid ' 'characters.') else: _, value, traceback = sys.exc_info() message = 'Invalid pattern: {0}'.format(value) raise ValueError, message, traceback #@IgnorePep8 return compiled def _convert(self, match, placeholder_count): '''Return a regular expression to represent *match*. *placeholder_count* should be a `defaultdict(int)` that will be used to store counts of unique placeholder names. ''' placeholder_name = match.group('placeholder') # Support at symbol (@) as referenced template indicator. Currently, # this symbol not a valid character for a group name in the standard # Python regex library. Rather than rewrite or monkey patch the library # work around the restriction with a unique identifier. placeholder_name = placeholder_name.replace('@', self._at_code) # Support period (.) as nested key indicator. Currently, a period is # not a valid character for a group name in the standard Python regex # library. Rather than rewrite or monkey patch the library work around # the restriction with a unique identifier. placeholder_name = placeholder_name.replace('.', self._period_code) # The re module does not support duplicate group names. To support # duplicate placeholder names in templates add a unique count to the # regular expression group name and strip it later during parse. placeholder_count[placeholder_name] += 1 placeholder_name += '{0:03d}'.format( placeholder_count[placeholder_name] ) expression = match.group('expression') if expression is None: expression = self._default_placeholder_expression # Un-escape potentially escaped characters in expression. expression = expression.replace('\{', '{').replace('\}', '}') return r'(?P<{0}>{1})'.format(placeholder_name, expression) def _escape(self, match): '''Escape matched 'other' group value.''' groups = match.groupdict() if groups['other'] is not None: return re.escape(groups['other']) return groups['placeholder']
[docs]class Resolver(object): '''Template resolver interface.''' __metaclass__ = abc.ABCMeta
[docs] @abc.abstractmethod def get(self, template_name, default=None): '''Return template that matches *template_name*. If no template matches then return *default*. ''' return default
@classmethod def __subclasshook__(cls, subclass): '''Return whether *subclass* fulfils this interface.''' if cls is Resolver: return callable(getattr(subclass, 'get', None)) return NotImplemented