# ----------------------------------------------------------------------------
# moments
# Copyright (c) 2009-2011, Charles Brandt
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# ----------------------------------------------------------------------------
"""
Moments Journal object and functions related to using journals
A journal holds a collection of Moments
(and Entries, for moments with no timestamps)
"""
import re, codecs, os
import urllib, urllib2
from datetime import datetime
try:
import simplejson as json
except:
try:
import json
except:
print "No json module found"
exit()
from log import Log
from moment import Moment
from timestamp import Timestamp, Timerange
from association import Association
[docs]def log_action(destination, message, tags=[]):
"""
this is a common need...
open a journal
create a new entry (now)
and save the journal/log
this returns the entry created.
"""
j = Journal()
j.load(destination)
entry = j.make(message, tags)
j.to_file()
#print entry.render()
return entry
[docs]class Journal(object):
"""
Main Moments module for collecting Moment entries in one place
*2011.06.26 13:50:20
not sure that it even makes sense to base this on a standard list
should use a list internally
but we really don't use any of the methods for a list to interact
with a Journal object
so we could have
self._entries
self._tags
self._dates
to store everything internally
and then use custom methods for interacting with the Journal
these methods should be the same
whether the Journal is a local, native, instance,
or if it is remote.
i.e.
using journal object attributes directly in code is discouraged
to ensure that local and remote journal objects work identically
"""
def __init__(self, path=None, items=[], title=None, debug=False):
self._entries = []
#keys are compact date string
self._dates = Association()
#keys are tag name
self._tags = Association()
# renamed self.name to self.path
# then use name as a general name for the journal, if displayed
# *2009.11.07 10:28:44
# using title instead of name, then if anything else is still
# using name the old way, it will flag a more recognizable error
self.title = title
self.debug = debug
#*2011.06.26 13:45:20
#really no such thing as a default path (self.path)
# used for default file path:
# convert to string just incase a Path object is sent
#self.path = str(path)
#if we want to store it to a path, then should specify that
#in a to_file / save call
#otherwise should keep track of all paths loaded
#so that we can reload them later.
self.loaded = []
#or ...
#could index based on original source too
#this would allow them to be re-saved to their original source
self._sources = Association()
if path:
self.load(path)
if items:
self.update_entries(items)
#*2011.06.26 13:53:03
#should there be a to_file
#and also a to_original_files (or something like that) ?
#could loop through all of self.loaded and store changes to those
#entries affected in those sources
#aka to_file
[docs] def save(self, filename=None, order='original', include_path=False):
"""
>>> from entry import Entry
>>> e = Entry("test entry")
>>> j.add_entry(e)
>>> j.to_file("sample_log2.txt")
>>> k = Journal()
>>> k.load("sample_log2.txt")
>>> len(k.entries)
2
"""
if filename:
self.path = str(filename)
if hasattr(self, "path") and self.path:
l = Log(self.path)
else:
print "No name to save Journal to"
exit()
#l.from_journal(self, holder, entry)
l.from_entries(self.sort(order=order), include_path=include_path)
l.to_file()
l.close()
[docs] def save_originals(self):
"""
loop through all self.sources or self.loaded
and save the corresponding entries back
(only if there is an actual change???)
might want to return any entries that don't have a destination
or would it be better to return an error?
or not save if entries don't have a destination
"""
pass
[docs] def save_instance(self, instance_file):
"""
save the currently loaded sources to an instance file
"""
pass
[docs] def load_instance(self, instance_name, instance_file):
"""
load the first entry tagged instance_name from the instance file
"""
pass
#aka open, etc
#formerly: from_file, add_log_to_journal, add_file
[docs] def load(self, log_name, add_tags=[]):
"""
adds a log file to the journal object currently in memory
this can be called multiple times with different filenames
to merge those files/entries into the journal
>>> from journal import *
>>> j = Journal()
>>> j.load("sample_log.txt")
>>> len(j.entries)
1
return True if the file was able to be loaded
False if it was not a journal/Log file
"""
found_entries = 0
if not str(log_name) in self.loaded:
self.loaded.append(str(log_name))
#TODO:
#should also handle adding entry to self._sources??
#or will that happen in update_entries
#would it be better for sources and loaded to be associated
#on an update?
#that way if entries are added outside of a load
#the sources would still get updated.
l = Log()
l.from_file(str(log_name))
entries = l.to_entries(add_tags)
#print "%s entries loaded from file" % len(entries)
#print "%s entries in self before merging in entries" % len(self)
self.update_many(entries)
#print "%s entries in self after merging in entries" % len(self)
#if l.has_entries:
found_entries = len(entries)
l.close()
return found_entries
[docs] def reload(self):
"""
create a new instance of a journal
based on the paths we have previously loaded
(self.loaded)
load everything that was previously loaded
then swap out the contents old journal for the new one
"""
#use this to load new _entries, etc
temp_j = Journal()
for item in self.loaded:
new_j.load(item)
#temp_j = load_journal(item)
#new_j.from_entries(temp_j.entries())
#del temp_j
old_entries = self._entries
old_tags = self._tags
old_dates = self._dates
old_sources = self._sources
self._entries = new_j._entries
self._tags = new_j._tags
self._dates = new_j._dates
self._sources = new_j._sources
del old_entries
del old_tags
del old_dates
del old_sources
def _add(self, entry, position=None):
"""
this is the base case for adding an entry
blindly adds the entry object to the journal's list of entries
no checks are performed
will add multiple copies of the same entry to the journal
use update to avoid duplicates
"""
if position is None:
#cannot assume insert here...
#insert(0, entry) reverses the list order on log read
self._entries.append(entry)
else:
self._entries.insert(position, entry)
if hasattr(entry, "created") and entry.created:
entry_time = entry.created.compact()
self._dates.associate(entry, entry_time)
else:
self._dates.associate(entry, None)
for t in entry.tags:
self._tags.associate(entry, t)
#TODO:
#integrate source
[docs] def update(self, entry, position=None, source=None):
"""
checks if an entry already exists in the journal
if other entries in with that time stamp are similar,
see if they can be merged easily (i.e. only tags differ)
otherwise just add it as a separate entry
no longer attempting to choose which one to keep here
since journal can hold multiple entries with the same timestamp
can merge later as needed using dedicated script for that purpose
"""
if not hasattr(entry, "created") or not entry.created:
if entry not in self._entries:
self._add(entry, position)
if self.debug: print "Entry has no time associated, and no other entry found. added"
else:
#this makes entry_time available in the event the entry already
#is in the journal:
#print entry.created
entry_time = entry.created.compact()
if entry not in self._entries:
if not self._dates.has_key(entry_time):
self._add(entry, position)
if self.debug: print "No other entry found with time: %s. added" % entry_time
elif self._dates.has_key(entry_time):
#it must have *something* in that time slot
#check for duplicates
if self.debug: print "Other entries found with time: %s. checking all.." % entry_time
options = self._dates[entry_time]
found_match = False
for existing in options:
if existing.is_equal(entry, debug=self.debug):
#print "DUPE, but tags and data are same... skipping"
found_match = True
if self.debug: print "Equal entry found. Skipping"
#only want to merge if we have data
#otherwise blank entries can end up grouped together
elif entry.data and (existing.data == entry.data):
#tags must differ... those are easy to merge:
print "from: %s, %s" % (existing.path, existing.created)
print "and: %s, %s" % (entry.path, entry.created)
print "only TAGS differ"
print "original: %s" % existing.tags
print "new: %s" % entry.tags
existing.tags.union(entry.tags)
print "merged: %s" % existing.tags
found_match = True
else:
#this one didn't match
#but we won't add the entry until we've checked them all
pass
if not found_match:
#2009.12.04 16:03:15
#this information doesn't help much anymore:
#print "MULTIPLE ENTRIES EXISTS AT: %s" % (entry_time)
#print "but none matched this one. Adding now"
self._add(entry, position)
if self.debug: print "No equivalent entries found. adding"
else:
if self.debug: print "Entry (%s) already exists in journal" % entry_time
#aka create, new
[docs] def make(self, data, tags=[], created=None, source='', position=0):
"""
helper for making a new entry right in a journal object
this way should not need to import moments.entry.Entry elsewhere
"""
if not created:
created = datetime.now()
entry = Moment(data, tags, created, path=source)
#print "Journal.make.position: %s" % position
self.update(entry, position=position)
return entry
#AKA DELETE
[docs] def remove(self, entry):
"""
remove associations from self._dates and self._tags
then remove the entry from the journal.
"""
#text_time = str(entry.created)
#text_time = e.created.strftime(time_format)
self._tags.remove(entry)
self._dates.remove(entry)
#remove from the list of entries
self._entries.remove(entry)
#*2011.07.09 10:32:42
#is this ever used?
#seems dangerous to remove everything at a given timestamp
#more likely to add as a separate one
#or remove explicitly and then add/update/make
## def replace(self, entry):
## """
## remove all entries from the journal with the same timestamp as entry
## then add the new entry to the journal
## i.e.
## accepts a new entry
## and uses it to find and then remove the original one(s)
## add the new one to the journal
## thereby replacing the original(s)
## """
## entry_time = entry.created.compact()
## if self._dates.has_key(entry_time):
## options = self._dates[entry_time]
## else:
## options = []
## for existing in options:
## self.remove(existing)
## self._add(entry)
#aka from_entries
#aka add_entries
#aka update_entries
[docs] def update_many(self, entries, source=None):
"""
loop over a list of entries to add/update each one to the journal
"""
for e in entries:
self.update(e, source=source)
#aka remove_entries
[docs] def remove_many(self, entries):
"""
take a list of entry objects,
remove each one
"""
for e in entries:
self.remove(e)
#Following are all different ways to READ
#they are also closely related to the hidden properties:
#_tags, _dates, _entries
#*2011.07.05 21:14:47
#thinking that it makes sense to have two separate calls
#could combine tag and tags (etc)
#by returning the plural version (dict) when no tag specified
#but the function name is unclear in that case
[docs] def tag(self, tag_key=None):
"""
lookup tag_key in self._tags
should only return a list of entries associated with that tag
not a dict
with the tag name
server can do that
but server needs to be a little different
"""
#print self._tags.keys()
if tag_key and self._tags.has_key(tag_key):
#print self._tags[tag_key]
## moments = []
## for m in self._tags[tag_key]:
## #instead of rendering a string:
## #moments.append(m.render())
## #supply a dictionary of the moment item
## moments.append(m.as_dict())
#return { tag_key:self._tags[tag_key] }
return self._tags[tag_key]
## elif tag_key:
## #must not have any content associated with this tag
## return { tag_key:[] }
else:
#could also return self.tags()
#return self.tags()
#return { 'tags': self._tags.keys() }
#return { tag_key:[] }
return []
[docs] def date(self, date_key=None):
"""
lookup date_key in self._dates
date_key should be compact stamp
"""
if date_key:
if isinstance(date_key, Timestamp):
ts = date_key
else:
ts = Timestamp(compact=date_key)
#print ts, type(ts)
#print ts.accuracy
if ts.accuracy and ts.accuracy != "second":
rr = Timerange(ts)
#get the timerange
tr = rr.default()
#print tr
#print tr.start.datetime
#print tr.end.datetime
entries = self.range(tr.start, tr.end)
#return {ts.compact():entries}
return entries
elif self._dates.has_key(ts.compact()):
entries = self._dates[ts.compact()]
#print "LEN ENTRIES: %s" % len(entries)
#print entries
#return { ts.compact():entries }
return entries
else:
#return { ts.compact():[] }
return []
else:
#could also return self.dates()
#return self.dates()
#return { date_key:[] }
return []
[docs] def dates(self):
"""
return a dictionary with:
all dates as keys, and
number of entries for each date as values
"""
ddict = {}
for key in self._dates.keys():
#print "KEY:", key
#key might be blank here (i.e. no timestamp)
if key:
ts = Timestamp(compact=key)
#print ts
ddict[ts.compact()] = len(self._dates[key])
else:
ddict[key] = len(self._dates[key])
return ddict
#aka item???
[docs] def entry(self, index=None):
"""
return the item at index point in list
is this already defined on a list object? should be consistent
"""
if len(self._entries) > index:
return self._entries[index]
else:
return None
[docs] def entries(self):
"""
return a list of entries
making a function call rather than an attribute
to make consistent between local and remote calls
"""
return self._entries
[docs] def search(self, look_for, data=False, limit=0):
"""
scan tags for tags matching (searching) look_for
if data is True, look in entry.data too
"""
tags = self._tags.keys()
found = []
# in this case, we'll return the whole entry
if data:
for e in self._entries:
if re.search(look_for, e.data):
found.append(e)
else:
for t in e.tags():
if re.search(look_for, t):
found.append(e)
# in this case we'll only return matching tags
else:
results = []
for t in tags:
if re.search(look_for, t):
results.append(t)
## #now look for the results that start with "look_for"
## matches = []
## for r in results:
## if re.match(look_for, r):
## matches.append(r)
## results.remove(r)
# sort tags by the number of entries they have
priority = []
for tag in results:
priority.append( (len(self._tags[tag]), tag) )
priority.sort()
priority.reverse()
#print "Priority: %s" % priority
for p in priority:
found.append(p[1])
if limit:
found = found[:int(limit)]
return found
[docs] def sort(self, order='original'):
"""
Sorts the items in our Journal's ._entries list
returns a list of the rearranged order of the entries in the journal
can specify order:
'original'
to keep the original order that the entries were added to the journal
'reverse'
'chronological' or 'oldest to newest'
oldest entries first in the list
'reverse-chronological' or 'newest to oldest'
if not all entries are wanted, see self.range()
"""
#print order
if order == "original":
return self._entries
elif order == "reverse":
self._entries.reverse()
return self._entries
else:
entry_times = self._dates.keys()
if order == "reverse-chronological" or order == 'newest to oldest':
entry_times.sort()
entry_times.reverse()
elif order == "chronological" or order == 'oldest to newest':
if self.debug: print "oldest to newest"
entry_times.sort()
if self.debug: print entry_times
else:
raise ValueError, "Unknown sort option supplied: %s" % order
entries = []
for et in entry_times:
elist = self._dates[et]
for entry in elist:
entries.append(entry)
assert len(entries) == len(self._entries)
del self._entries
self._entries = entries
return entries
#aka limit, timerange, mindstream.time_range
[docs] def range(self, start=None, end=None):
"""
if no start *and* end specified
return the time range for the entries in the currently loaded journal
if only start
return the entries in range for the accuracy of the start (e.g. 1 day)
if start and end
return all entries in the journal that fall in that range
should accept a string, a datetime object, or a Timestamp object
"""
if start is None and end is None:
dates = self._dates.keys()
dates.sort()
start = dates[0]
end = dates[-1]
#might have entries with no timestamp first:
if start is None:
start = dates[1]
print start, end
return Timerange(start=start, end=end)
else:
start = Timestamp(start)
if end:
end = Timestamp(end)
else:
relative = Timerange(start)
end = relative.end
times = self._dates.keys()
times.sort()
matches = []
for t in times:
#not sure why we're using just time here
#seems like we would want to use the date too?
#pytime = Timestamp(t).time
#sometimes t is None... those don't fit in a range.
if t:
pytime = Timestamp(t).datetime
if (pytime >= start.datetime) and (pytime <= end.datetime):
matches.extend(self._dates[t])
return matches
[docs] def clear(self):
"""
clear mind
start fresh
in practice it's probably easier to just create a new journal
but reload might need this
"""
del self._entries
self._entries = []
del self._tags
self._tags = Association()
del self._dates
self._dates = Association()
self.loaded = []
self.sources = Association()
#todo
#way to see how much memory is consumed by current process?
#should show before and after if so
return True
[docs] def associate_data(self):
"""
add a new property to the Journal: datas
at one point this was generated automaticaly,
but that slowed things down
this allows it to be generated when it is needed
which so far is only when then node.directory object
is looking for a default image
"""
self._datas = Association()
for entry in self._entries:
self._datas.associate(entry, entry.data)
[docs] def associate_files(self):
"""
add a new property to the Journal: files
similar to associate_datas
but checks each entry's data for path information
if path is found
just take the filename portion
and associate the entry with that portion
otherwise associate each line of the entry's data (as is)
"""
self._files = Association()
for entry in self._entries:
lines = entry.data.splitlines()
for line in lines:
if re.search('/', line):
name = os.path.basename(line)
self._files.associate(entry, name)
elif line.strip():
self._files.associate(entry, line.strip())
else:
#must be a blank line
pass
[docs]class RemoteJournal(object):
"""
This is a drop in replacement for Journal
but rather than load and store the data in an internal data structure
all information is retrieved from a journal_server using json
ultimately this avoids needing to reload a large journal set
when working on applications to leverage information in a journal.
wraps calls to a journal_server to make the object interaction
behave the same as a local Journal object
Uses the following calls for GET and POST requests:
POST:
req = urllib2.urlopen(url, params)
GET:
req = urllib2.urlopen(url)
"""
def __init__(self, source):
#source should be the base url to connect to
self.source = source
#*2011.10.11 08:25:35
# I think something here is causing connections to be lost
# over time less data is returned from moments server
# restarting a server with a RemoteJournal instance fixes it
#make sure we can handle cookies, if used in the future
#self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
#self.opener = urllib2.build_opener(urllib2.HTTPHandler())
#urllib2.install_opener(self.opener)
[docs] def save(self, path=''):
"""
call the save option on server
"""
url = '%s/save/%s' % (self.source, path)
req = urllib2.urlopen(url)
return True
[docs] def save_post(self, path=''):
"""
call the save option on server
using POST
"""
values = { 'destination': path }
params = urllib.urlencode(values)
#url = '%s/save/%s' % (self.source, path)
url = '%s/save' % (self.source)
#POST:
req = urllib2.urlopen(url, params)
#json_raw = req.read()
return True
[docs] def load(self, path=''):
"""
call the load option on server
"""
url = '%s/load/%s' % (self.source, path)
req = urllib2.urlopen(url)
return True
[docs] def load_post(self, path=''):
"""
call the load option on server
using POST
"""
values = { 'source': path }
params = urllib.urlencode(values)
#url = '%s/load/%s' % (self.source, path)
url = '%s/load' % (self.source)
#POST:
req = urllib2.urlopen(url, params)
#json_raw = req.read()
return True
[docs] def update(self, entry, source=None, position=0):
"""
with a remote journal, we can't really have an update..
it's just a make behind the scenes
so we'll call that
"""
if source is None:
source = entry.source
if entry.created:
created = entry.created.compact()
else:
created = entry.created
self.make(data=entry.data, tags=entry.tags, created=created, source=source, position=position)
def make(self, data, tags=[], created='', source='', position=0):
values = { 'data' : data,
'tags' : tags,
'created' : created,
'source' : source,
'position' : position,
}
print values
params = urllib.urlencode(values)
print params
#url = '%s/load/%s' % (self.source, path)
url = '%s/make' % (self.source)
print url
#POST:
req = urllib2.urlopen(url, params)
def remove(self, entry):
values = { 'data' : entry.data,
'tags' : entry.tags,
'created' : entry.created,
'source' : entry.source,
}
params = urllib.urlencode(values)
#print params
url = '%s/remove' % (self.source)
#POST:
req = urllib2.urlopen(url, params)
#for debugging:
#print req.read()
req.close()
def tag(self, tag_key=''):
url = '%s/tag/%s' % (self.source, tag_key)
#print url
#GET:
req = urllib2.urlopen(url)
json_raw = req.read()
#print "json from server: %s" % json_raw
req.close()
result = json.loads(json_raw)
elist = []
for e in result[tag_key]:
m = Moment(data=e['data'], tags=e['tags'], created=e['created'], path=e['path'])
#m = Moment(data=e['data'], tags=e['tags'], created=e['created'])
elist.append(m)
#return { tag_key:elist }
return elist
def dates(self):
url = '%s/dates' % (self.source)
req = urllib2.urlopen(url)
json_raw = req.read()
req.close()
return json.loads(json_raw)
def date(self, date_key=''):
values = {}
params = urllib.urlencode(values)
if isinstance(date_key, Timestamp):
ts = date_key
else:
ts = Timestamp(compact=date_key)
url = '%s/date/%s' % (self.source, ts.compact())
#print url
#GET:
req = urllib2.urlopen(url)
json_raw = req.read()
#print "json from server: %s" % json_raw
req.close()
result = json.loads(json_raw)
elist = []
for e in result[ts.compact()]:
m = Moment(data=e['data'], tags=e['tags'], created=e['created'], path=e['path'])
elist.append(m)
return { ts.compact():elist }
#return json.loads(json_raw)
def entry(self, item=''):
url = '%s/entry/%s' % (self.source, item)
#GET:
req = urllib2.urlopen(url)
json_raw = req.read()
req.close()
#print json_raw
e = json.loads(json_raw)
#print e
m = Moment(data=e['data'], tags=e['tags'], created=e['created'], path=e['path'])
#print m
return m
def entries(self):
url = '%s/entries' % (self.source)
#GET:
req = urllib2.urlopen(url)
json_raw = req.read()
req.close()
result = json.loads(json_raw)
elist = []
for e in result['entries']:
m = Moment(data=e['data'], tags=e['tags'], created=e['created'], path=e['path'])
#m = Moment(data=e['data'], tags=e['tags'], created=e['created'])
elist.append(m)
return elist
def search(self, look_for, data=False, limit=20):
if data:
url = '%s/search/data/%s' % (self.source, look_for)
else:
url = '%s/search/%s/%s' % (self.source, look_for, limit)
#url = '%s/search/%s' % (self.source, look_for)
#GET:
req = urllib2.urlopen(url)
json_raw = req.read()
req.close()
result = json.loads(json_raw)
found = []
if data:
#for e in result['matches']:
for e in result:
m = Moment(data=e['data'], tags=e['tags'], created=e['created'], path=e['path'])
#m = Moment(data=e['data'], tags=e['tags'], created=e['created'])
found.append(m)
else:
#this should be a list of tags
#found = result['matches']
found = result
return found
def related(self, key):
url = '%s/related/%s' % (self.source, key)
#GET:
req = urllib2.urlopen(url)
json_raw = req.read()
req.close()
result = json.loads(json_raw)
tags = result[key]
return tags
## elist = []
## for e in result['entries']:
## m = Moment(data=e['data'], tags=e['tags'], created=e['created'])
## elist.append(m)
## return elist
def sort(self, order='original'):
url = '%s/sort/%s' % (self.source, order)
req = urllib2.urlopen(url)
return req.read()
[docs] def range(self, start=None, end=None):
"""
"""
expect_entries = True
if end:
start = Timestamp(start)
end = Timestamp(end)
url = '%s/range/%s/%s' % (self.source, start.compact(), end.compact())
elif start:
start = Timestamp(start)
url = '%s/range/%s' % (self.source, start.compact())
else:
url = '%s/range' % (self.source)
expect_entries = False
#print url
req = urllib2.urlopen(url)
response = req.read()
req.close()
if expect_entries:
result = json.loads(response)
elist = []
for e in result['entries']:
m = Moment(data=e['data'], tags=e['tags'], created=e['created'], path=e['path'])
#m = Moment(data=e['data'], tags=e['tags'], created=e['created'])
elist.append(m)
return elist
else:
return response
[docs] def clear(self):
"""
call the load option on server
"""
url = '%s/clear' % (self.source)
req = urllib2.urlopen(url)
return req.read()
if __name__ == "__main__":
import doctest
doctest.testmod()