pysyncml 0.1 documentation

pysyncml.change.merger

Contents

Source code for pysyncml.change.merger

# -*- coding: utf-8 -*-
#------------------------------------------------------------------------------
# file: $Id$
# lib:  pysyncml.change.merger
# auth: griffin <griffin@uberdev.org>
# date: 2012/08/30
# copy: (C) CopyLoose 2012 UberDev <hardcore@uberdev.org>, No Rights Reserved.
#------------------------------------------------------------------------------

'''
The ``pysyncml.change.merger`` is a helper package that provides
routines to help with managing change-specs on a slightly higher level
than `pysyncml.change.tracker` by actually detecting changes,
reporting them to a tracker and merging them back
together. Furthermore, it provides routines to help with the detection
and merging of text-based changes, either multi-line or singl-line
based.
'''

import urllib, hashlib, difflib
from .. import constants
from ..common import adict, state2string
from .tracker import *

#------------------------------------------------------------------------------
def uu(s):
  return urllib.unquote(s)

#------------------------------------------------------------------------------
def u(s):
  return urllib.quote(s)

#------------------------------------------------------------------------------
[docs]class Merger(object): ''' Abstract base class for objects returned by the MergerFactory subclasses. ''' def pushChangeSpec(self, changeSpec): raise NotImplementedError()
[docs] def pushChange(self, attribute, currentValue, newValue): ''' Record the change to the specified `attribute` from the original value `currentValue` to `newValue`. The merger object itself (i.e. ``self``) is returned, allowing multiple changes to be chained. The change-spec returned by :meth:`getChangeSpec` will be updated according to this merger's change detection strategy. If `currentValue` is ``None``, the field is assumed to be *added*. Conversely, if `newValue` is ``None``, the field is assumed to be *deleted*. If both are ``None`` (or, more generally speaking, equal), the request is ignored. ''' raise NotImplementedError()
[docs] def getChangeSpec(self): ''' Returns the current change-spec representing all calls to :meth:`pushChange` since construction of this merger. ''' raise NotImplementedError()
[docs] def mergeChanges(self, attribute, localValue, remoteValue): ''' Returns the value of the specified `attribute` as determined by the change-spec stored by this, the current `localValue` of this SyncML peer (i.e. the serve-side) and the client-provided `remoteValue` (i.e. of the remote client-side). Raises a `pysyncml.ConflictError` if the local changes conflict with the value provided by the remote peer. If `localValue` is ``None``, the field is assumed to not exist locally. Conversely, if `remoteValue` is ``None``, the field is assumed to not exist on the remote peer. If both are ``None`` (or, more generally speaking, equal), the value is returned as-is without further investigation. ''' raise NotImplementedError() #------------------------------------------------------------------------------
[docs]class MergerFactory(object): ''' Abstract base class for factories to help with reporting and merging pysyncml change-specs. '''
[docs] def newMerger(self, changeSpec=None): ''' Returns a :class:`Merger` for the specified `changeSpec` which can be ``None`` if the merger is intended to generate a change-spec. ''' raise NotImplementedError() #------------------------------------------------------------------------------
[docs]class AttributeMergerFactory(MergerFactory): ''' A merger factory for generating :class:`AttributeMerger` merger objects. ''' def __init__(self, *args, **kw): super(AttributeMergerFactory, self).__init__(*args, **kw) def newMerger(self, changeSpec=None): return AttributeMerger(changeSpec) #------------------------------------------------------------------------------
[docs]class AttributeMerger(Merger): ''' A merger that simplifies usage of the :class:`pysyncml.AttributeChangeTracker <pysyncml.change.tracker.AttributeChangeTracker>` and allows it to be used by a CompositeMerger. ''' #---------------------------------------------------------------------------- def __init__(self, changeSpec, *args, **kw): super(AttributeMerger, self).__init__(*args, **kw) self.tracker = AttributeChangeTracker(changeSpec) #---------------------------------------------------------------------------- def pushChangeSpec(self, changeSpec): self.tracker.pushChangeSpec(changeSpec) #---------------------------------------------------------------------------- def pushChange(self, attribute, currentValue, newValue): if currentValue == newValue: return self if currentValue is None: self.tracker.append(attribute, constants.ITEM_ADDED) elif newValue is None: self.tracker.append(attribute, constants.ITEM_DELETED, currentValue) else: self.tracker.append(attribute, constants.ITEM_MODIFIED, currentValue) return self #---------------------------------------------------------------------------- def getChangeSpec(self): return self.tracker.getChangeSpec() #---------------------------------------------------------------------------- def mergeChanges(self, attribute, localValue, remoteValue): if localValue == remoteValue: return localValue # this will raise ConflictError if there is a conflict return self.tracker.update(attribute, localValue, remoteValue) #------------------------------------------------------------------------------
[docs]class TextMergerFactory(MergerFactory): ''' A merger factory for generating :class:`TextMerger` merger objects. ''' def __init__(self, multiLine=True, *args, **kw): super(TextMergerFactory, self).__init__(*args, **kw) self.multiLine = multiLine def newMerger(self, changeSpec=None): return TextMerger(self.multiLine, changeSpec) #------------------------------------------------------------------------------
[docs]class TextMerger(Merger): ''' A merger that simplifies usage of the :class:`pysyncml.ListChangeTracker <pysyncml.change.tracker.ListChangeTracker>` and allows it to be used by a CompositeMerger. TODO: currently, the merging algorithm is fairly aggressive (i.e. it may merge changes that should probably be conflicts). This should be made an option to provide either aggressive or conservative merging. ''' #---------------------------------------------------------------------------- def __init__(self, multiLine, changeSpec, *args, **kw): super(TextMerger, self).__init__(*args, **kw) self.sep = '\n' if multiLine else ' ' self.tracker = ListChangeTracker(changeSpec) #---------------------------------------------------------------------------- def pushChangeSpec(self, changeSpec): self.tracker.pushChangeSpec(changeSpec) #---------------------------------------------------------------------------- def pushChange(self, currentText, newText): cur = currentText.split(self.sep) new = newText.split(self.sep) for index, offset, changeType, curValue, newValue in self._getChangeSets(cur, new): self.tracker.append(index + offset, changeType, curValue) return self #---------------------------------------------------------------------------- def getChangeSpec(self): return self.tracker.getChangeSpec() #---------------------------------------------------------------------------- def _getChangeSets(self, currentList, newList): sm = difflib.SequenceMatcher(a=currentList, b=newList) idxoffset = 0 for opcode in sm.get_opcodes(): if opcode[0] == 'equal': continue if opcode[0] == 'insert': for idx in range(opcode[3], opcode[4]): yield opcode[1], idxoffset, constants.ITEM_ADDED, None, newList[idx] idxoffset += 1 continue if opcode[0] == 'replace': c0 = opcode[1] c1 = opcode[2] cD = c1 - c0 c1 = c0 + min(cD, opcode[4] - opcode[3]) for idx in range(c0, c1): yield idx, idxoffset, constants.ITEM_MODIFIED, \ currentList[idx], newList[opcode[3] + idx - c0] # pseudo-inserts for idx in range(opcode[4] - opcode[3] - ( cD )): yield c1, idxoffset, constants.ITEM_ADDED, None, newList[opcode[3] + cD + idx] idxoffset += 1 # pseudo-deletes for idx in range(c1, opcode[2]): yield idx, idxoffset, constants.ITEM_DELETED, currentList[idx], None continue if opcode[0] == 'delete': for idx in range(opcode[1], opcode[2]): yield idx, idxoffset, constants.ITEM_DELETED, currentList[idx], None continue raise Exception('unexpected difflib opcode "%s"' % (opcode[0],)) #---------------------------------------------------------------------------- def mergeChanges(self, localText, remoteText): if localText == remoteText: return localText cur = localText.split(self.sep) new = remoteText.split(self.sep) ret = cur[:] tok = None roff = 0 for index, offset, changeType, curValue, newValue in self._getChangeSets(cur, new): change = self.tracker.isChange(index, changeType, newValue, token=tok) if change is None: continue change, tok = change if change is None: continue if changeType == constants.ITEM_DELETED: ret[change + roff] = None elif changeType == constants.ITEM_MODIFIED: ret[change + roff] = newValue elif changeType == constants.ITEM_ADDED: ret.insert(change + roff, newValue) roff += 1 else: Exception('received unexpected change type %r' % (changeType,)) return self.sep.join([e for e in ret if e is not None]) #------------------------------------------------------------------------------
[docs]class CompositeMergerFactory(MergerFactory): ''' A merger factory for generating :class:`CompositeMerger` merger objects, which allows control of what kind of merger to use on a per-attribute basis. ''' def __init__(self, sharedDefault=True, default=None, mergers=None, **kw): ''' The CompositeMergerFactory constructor accepts the following parameters: :param default: The default merger factory (if unspecified, defaults to an AttributeMergerFactory). See `sharedDefault` if the default is not an attribute-based merger factory. :param mergers: A dictionary of (attribute => MergerFactory) that override the default merger factory for the specified attribute. If unspecified, all attributes will use the default merger factory. :param sharedDefault: The `sharedDefault` parameter controls how default attributes get handled. When ``True`` (the default), then all default attributes will share a Merger and the Merger will be passed the attribute name during operations. When ``False``, then each attribute will get its own Merger and operations will not get the attribute name. It is important that the `default` and `sharedDefault` parameters match - for example, if `default` is set to a ``TextMergerFactory``, then `sharedDefault` must be set to ``False``. ''' self.shared = sharedDefault self.default = default or AttributeMergerFactory() self.mergers = mergers or dict() self.mergers.update(kw) def newMerger(self, changeSpec=None): return CompositeMerger(self, changeSpec) #------------------------------------------------------------------------------
[docs]class CompositeMerger(Merger): ''' A composite merger is an attribute-based merger that allows a default merger for attributes to be specified, which can then be overridden for specific attributes. TODO: is there perhaps a better way to define when a subsidiary merger is a dispatch merger (i.e. takes the attribute name as operational parameter) than to use the factory "sharedDefault" parameter? ''' #---------------------------------------------------------------------------- def __init__(self, factory, changeSpec): self.factory = factory self.cspec = changeSpec self.default = self.factory.default.newMerger() self.attrs = dict() if changeSpec is not None: for cspec in changeSpec.split(';'): self.pushChangeSpec(cspec) #---------------------------------------------------------------------------- def pushChangeSpec(self, changeSpec): for cspec in changeSpec.split('&'): if '=' not in cspec: self.default.pushChangeSpec(uu(cspec)) continue attr, cspec = cspec.split('=', 1) self._getMerger(uu(attr)).pushChangeSpec(uu(cspec)) return self #---------------------------------------------------------------------------- def _getMerger(self, attribute): if attribute in self.attrs: return self.attrs[attribute] if attribute in self.factory.mergers: self.attrs[attribute] = self.factory.mergers[attribute].newMerger() return self.attrs[attribute] if not self.factory.shared: self.attrs[attribute] = self.factory.default.newMerger() return self.attrs[attribute] return self.default #---------------------------------------------------------------------------- def pushChange(self, attribute, currentValue, newValue): merger = self._getMerger(attribute) if merger is self.default: merger.pushChange(attribute, currentValue, newValue) else: merger.pushChange(currentValue, newValue) return self #---------------------------------------------------------------------------- def getChangeSpec(self): ret = self.cspec or '' if len(ret) > 0: ret += ';' if self.default.getChangeSpec() is None: return None ret += u(self.default.getChangeSpec()) for attr in sorted(self.attrs.iterkeys()): cspec = self.attrs[attr].getChangeSpec() if cspec is None: return None if len(cspec) <= 0: continue if len(ret) > 0 and ret[-1] != ';': ret += '&' ret += u(attr) + '=' + u(cspec) return ret #---------------------------------------------------------------------------- def mergeChanges(self, attribute, localValue, remoteValue): merger = self._getMerger(attribute) if merger is self.default: return merger.mergeChanges(attribute, localValue, remoteValue) return merger.mergeChanges(localValue, remoteValue) #------------------------------------------------------------------------------ # end of $Id$ #------------------------------------------------------------------------------

Contents