Source code for auromat.solving.eol

# Copyright European Space Agency, 2013

"""
This module allows to easily download images
from `NASA's Earth Observation website <http://eol.jsc.nasa.gov>`_ in JPEG and RAW format.

As there is no API on NASA's end, we have to rely on a certain URL structure,
and, in case of RAW files, HTML structure. As this is not robust, it may fail
if the website is restructured. In that case, the code has to be adapted. 

Notes on JPEGs
--------------
JPEGs from the EOL archive are produced from the RAW camera files.
Different post-processing settings have been applied during the production
of these JPEGs, e.g. colour or exposure correction, or 180 degree rotation.
This is also true for images that belong to a single sequence of images, that is,
colour/exposure might change suddenly. The JPEGs are therefore not
suitable for scientific purposes. On the other hand, they often have hot pixels
removed. A lens distortion correction is typically not applied.

In theory the JPEGs can be used for astrometry while later using the RAW files
for scientic purposes. This is however a risky process as it has to be guaranteed
that the image orientation matches and that the lens distortion was not corrected
in the JPEGs already. To prevent checking these things each time, it is better to
use the RAW files in the first place and prepare them for astrometry ourselves,
that is, removing hot pixels and possibly noise.
"""

from __future__ import division, print_function, absolute_import

from six.moves import range
import six.moves.urllib as urllib
import os
import re
import shutil
import json
import warnings
from time import sleep
from datetime import datetime, timedelta
from collections import namedtuple

from auromat.util import exiftool
from auromat.util.os import makedirs
from auromat.util.url import urlResponseCode, downloadFiles 

try:
    import lensfunpy
    from auromat.util.lensdistortion import CameraNotFoundInDBError, LensNotFoundInDBError,\
        CameraNotFoundInEXIFError, LensNotFoundInEXIFError, getLensfunModifier
    import auromat.util.lensdistortion
except Exception as e:
    print(str(e))
    warnings.warn('lensfunpy not found, reduced functionality in auromat.solving.eol')

metadataFilename = 'meta.json'

[docs]class SequenceMetadata(object): def __init__(self, mission, roll, fromFrame, toFrame, pattern, frameGaps, lensDistortionCorrected, lensDistortionCorrectionParams=None): self.mission = mission self.roll = roll self.fromFrame = fromFrame self.toFrame = toFrame self.pattern = pattern self.frameGaps = frameGaps self.lensDistortionCorrected = lensDistortionCorrected self.lensDistortionCorrectionParams = lensDistortionCorrectionParams
[docs]class LensDistortionCorrectionParams(object): def __init__(self, cameraMaker, cameraModel, cameraVariant, lensMaker, lensModel, focalLength, aperture): self.cameraMaker = cameraMaker self.cameraModel = cameraModel self.cameraVariant = cameraVariant self.lensMaker = lensMaker self.lensModel = lensModel self.focalLength = focalLength self.aperture = aperture
jpgUrlPattern = 'http://eol.jsc.nasa.gov/DatabaseImages/ESC/large/{mission}/{mission}-{roll}-{frame}.JPG' jpgFilePattern = '{mission}-{roll}-{frame}.jpg' # the filename on disk # 'file' is extracted from the photoPage photoPageUrlPattern = 'http://eol.jsc.nasa.gov/SearchPhotos/photo.pl?mission={mission}&roll={roll}&frame={frame}' rawFilePhotoPagePattern = r'"RequestOriginalImage.pl\?mission=[A-Z\d]+&roll=[A-Z\d]+&frame=[\d]+&file=([\w\.]+)"' rawRequestUrlPattern = 'http://eol.jsc.nasa.gov/SearchPhotos/RequestOriginalImage.pl?mission={mission}&roll={roll}&frame={frame}&file={file}' rawUrlPattern = 'http://eol.jsc.nasa.gov/OriginalImagery/{file}' rawFilePatternNoExt = '{mission}-{roll}-{frame}' # extension not in here, could be .nef or something else auroraVideosUrl = 'http://eol.jsc.nasa.gov/ForFun/CrewEarthObservationsVideos/Aurora.htm' auroraVideosPattern = r'<a name="([a-zA-Z\d_]+)">(.+?)</a>.+?' +\ '<a href="/scripts/sseop/photo.pl\?mission=([A-Z\d]+)&roll=([A-Z\d]+)&frame=([\d]+)" target="_blank">' +\ '<nobr>[A-Z\d-]+</a> to ' +\ '<a href="/scripts/sseop/photo.pl\?mission=([A-Z\d]+)&roll=([A-Z\d]+)&frame=([\d]+)" target="_blank">'
[docs]def downloadImages(folderPath, ids, format_): """ Download images given by (mission,roll,frame) tuples in the specified format. Note: Use :func:`downloadImageSequence` to download a consecutive sequence of images. This function handles frame gaps (gaps in numbering) properly. :param folderPath: download location :param ids: list of tuples (mission,roll,frame) :param format_: jpg or raw """ if format_ == 'jpg': return downloadImagesJpg(folderPath, ids) elif format == 'raw': # TODO implement RAW download of single frames raise NotImplementedError else: raise ValueError('Unknown format: ' + format_)
[docs]def downloadImagesJpg(folderPath, ids): """ Download JPEG images given by (mission,roll,frame) tuples and return paths if successfull. On any error, False is returned. Files that are already existing are not downloaded again. :param folderPath: download location :param ids: list of tuples (mission,roll,frame) :rtype: list of str | False """ urls = [jpgUrlPattern.format(mission=mission, roll=roll, frame=frame) for mission, roll, frame in ids] paths = [os.path.join(folderPath, jpgFilePattern.format(mission=mission, roll=roll, frame=frame)) for mission, roll, frame in ids] makedirs(folderPath) if downloadFiles(urls, paths): return paths else: return False
[docs]def downloadImageSequence(folderPath, mission, fromFrame, toFrame, format_, roll='E', lensDistortionCorrected=False): """ Download an image sequence in the specified format and return a tuple (metadata, []) on success or (False, errors) in case of errors. :param folderPath: download location :param format_: jpg or raw :rtype: tuple (SequenceMetadata, failure list) """ if format_ == 'jpg': return _downloadImageSequenceJpg(folderPath, mission, fromFrame, toFrame, roll, lensDistortionCorrected=lensDistortionCorrected) elif format_ == 'raw': return _downloadImageSequenceRaw(folderPath, mission, fromFrame, toFrame, roll) else: raise ValueError('Unsupported format: ' + format_)
def _downloadImageSequenceJpg(folderPath, mission, fromFrame, toFrame, roll='E', lensDistortionCorrected=False): # first, download in temp folder, then copy over and remove temp folder if successful tempFolderPath = os.path.join(folderPath, 'in_progress') metadataPath = os.path.join(folderPath, metadataFilename) fromFrame, toFrame = int(fromFrame), int(toFrame) # check if already fully downloaded firstImage = os.path.join(folderPath, jpgFilePattern.format(mission=mission, roll=roll, frame=fromFrame)) if os.path.exists(firstImage): # as the files are only moved over at the very end, it is enough to # check for existance of the first image # write metadata if not existing yet (for whatever reason..) if not os.path.exists(metadataPath): frameGaps = [] for frame in range(fromFrame, toFrame+1): imagePath = os.path.join(folderPath, jpgFilePattern.format(mission=mission, roll=roll, frame=frame)) if not os.path.exists(imagePath): frameGaps.append(frame) meta = SequenceMetadata(mission=mission, roll=roll, fromFrame=fromFrame, toFrame=toFrame, pattern=jpgFilePattern, frameGaps=frameGaps, lensDistortionCorrected=lensDistortionCorrected) storeMetaData(metadataPath, meta) else: meta = loadMetaData(metadataPath) return meta, [] makedirs(folderPath, tempFolderPath) frames = range(fromFrame, toFrame+1) urls = [jpgUrlPattern.format(mission=mission, roll=roll, frame=frame) for frame in frames] paths = [os.path.join(tempFolderPath, jpgFilePattern.format(mission=mission, roll=roll, frame=frame)) for frame in frames] print('downloading sequence frames', fromFrame, 'to', toFrame, 'of', mission + '-' + roll) _, errors = downloadFiles(urls, paths, retFailures=True) # We ignore 404s for frames which are not the start or end. # This is because there are sometimes gaps in frame numbers. # E.g. for ISS030 the frames 115426 to 115442 don't exist within the # sequence 114986 to 115574 failures = [] frameGaps = [] for url, error in errors: if isinstance(error, urllib.error.HTTPError): i = urls.index(url) frame = frames[i] if error.code == 404: if fromFrame < frame < toFrame: frameGaps.append(frame) continue else: raise ValueError('Start/end frame ' + str(frame) + ' not downloadable (404)') else: failures.append((url, error.code)) else: failures.append((url, error)) if len(failures) > 0: return False, failures for filename in os.listdir(tempFolderPath): shutil.move(os.path.join(tempFolderPath, filename), folderPath) os.rmdir(tempFolderPath) meta = SequenceMetadata(mission=mission, roll=roll, fromFrame=fromFrame, toFrame=toFrame, pattern=jpgFilePattern, frameGaps=frameGaps, lensDistortionCorrected=lensDistortionCorrected) storeMetaData(metadataPath, meta) return meta, [] def _downloadImageSequenceRaw(folderPath, mission, fromFrame, toFrame, roll='E'): assert roll == 'E' # only those have RAW files # first, download in temp folder, then copy over and remove temp folder if successful tempFolderPath = os.path.join(folderPath, 'in_progress') metadataPath = os.path.join(folderPath, metadataFilename) fromFrame, toFrame = int(fromFrame), int(toFrame) # check if already fully downloaded if os.path.exists(metadataPath): return True makedirs(folderPath, tempFolderPath) # first, we determine the RAW filename pattern by looking at a photo page firstPhotoPageUrl = photoPageUrlPattern.format(mission=mission, roll=roll, frame=fromFrame) photoPageContent = urllib.request.urlopen(firstPhotoPageUrl).read() match = re.search(rawFilePhotoPagePattern, photoPageContent) if match is None: raise RuntimeError('Could not find RAW filename on page ' + firstPhotoPageUrl) rawFilename = match.group(1) rawFileBase, rawFileExt = os.path.splitext(rawFilename) assert mission in rawFileBase or mission.lower() in rawFileBase assert roll in rawFileBase or roll.lower() in rawFileBase assert str(fromFrame) in rawFileBase rawFileBasePattern = rawFileBase if mission in rawFileBase: rawFileBasePattern = rawFileBase.replace(mission, '{mission}') missionCased = mission elif mission.lower() in rawFileBasePattern: rawFileBasePattern = rawFileBasePattern.replace(mission.lower(), '{mission}') missionCased = mission.lower() else: raise RuntimeError('Could not find mission name in ' + rawFileBase) if roll in rawFileBasePattern: rawFileBasePattern = rawFileBasePattern.replace(roll, '{roll}') rollCased = roll elif roll.lower() in rawFileBasePattern: rawFileBasePattern = rawFileBasePattern.replace(roll.lower(), '{roll}') rollCased = roll.lower() else: raise RuntimeError('Could not find roll name in ' + rawFileBase) frameZfilled = lambda frame: str(frame).zfill(6) if frameZfilled(fromFrame) in rawFileBasePattern: rawFileBasePattern = rawFileBasePattern.replace(frameZfilled(fromFrame), '{frame}') frameFn = frameZfilled elif str(fromFrame) in rawFileBasePattern: rawFileBasePattern = rawFileBasePattern.replace(str(fromFrame), '{frame}') frameFn = str else: raise RuntimeError('Could not find frame number in ' + rawFileBase) rawFilenamePattern = rawFileBasePattern + rawFileExt print('Raw filename pattern: ' + rawFilenamePattern) frames = range(fromFrame, toFrame+1) rawFilenames = [rawFilenamePattern.format(mission=missionCased, roll=rollCased, frame=frameFn(frame)) for frame in frames] rawRequestUrls = [rawRequestUrlPattern.format(mission=mission, roll=roll, frame=frame, file=rawFilename) for frame, rawFilename in zip(frames, rawFilenames)] rawUrls = [rawUrlPattern.format(file=rawFilename) for rawFilename in rawFilenames] rawFilePatternDisk = rawFilePatternNoExt + rawFileExt.lower() rawFilenamesDisk = [rawFilePatternDisk.format(mission=mission, roll=roll, frame=frame) for frame in frames] paths = [os.path.join(tempFolderPath, rawFilenameDisk) for rawFilenameDisk in rawFilenamesDisk] # jpg URLs are used to check if the frame exists (or whether there's a frame gap) jpgUrls = [jpgUrlPattern.format(mission=mission, roll=roll, frame=frame) for frame in frames] frameGaps = [] failures = [] queue = [] for frame, jpgUrl, rawUrl, rawRequestUrl, path in zip(frames, jpgUrls, rawUrls, rawRequestUrls, paths): if os.path.exists(path): continue try: code = urlResponseCode(jpgUrl) if code == 200: queue.append((rawUrl, rawRequestUrl, path)) print('Got 200, added frame ' + str(frame) + ' to queue') elif code == 404: if fromFrame < frame < toFrame: frameGaps.append(frame) print('Got 404, ignoring frame ' + str(frame)) else: raise ValueError('Start/end frame ' + str(frame) + ' not downloadable (404)') else: failures.append((rawUrl, code)) print('Failure: Unexpected response code for jpgUrl: ' + str(code)) except Exception as e: failures.append((rawUrl, e)) print('Failure: ' + repr(e)) # download RAW files in batches to avoid overloading the server batchSize = 30 batches = [queue[i:i+batchSize] for i in range(0, len(queue), batchSize)] for batch in batches: batchUrls = [] batchPaths = [] for rawUrl, rawRequestUrl, path in batch: try: code = urlResponseCode(rawRequestUrl) if code == 200: print('queried ' + rawRequestUrl) batchUrls.append(rawUrl) batchPaths.append(path) else: failures.append((rawUrl, code)) print('Failure: Unexpected response code for rawRequestUrl: ' + str(code)) except Exception as e: failures.append((rawUrl, e)) print('Failure: ' + repr(e)) # now check rawUrls until the files are available for download # The request "may take 5 minutes or more to complete" (quote from request page) success, failures_ = downloadFiles(batchUrls, batchPaths, retFailures=True) failureCount = len(failures_) lastFailureCountDecrease = datetime.now() while not success and datetime.now() - lastFailureCountDecrease < timedelta(minutes=8): sleep(30) success, failures_ = downloadFiles(batchUrls, batchPaths, retFailures=True) if len(failures_) < failureCount: lastFailureCountDecrease = datetime.now() failureCount = len(failures_) failures.extend(failures_) if len(failures) > 0: return False, failures for filename in os.listdir(tempFolderPath): shutil.move(os.path.join(tempFolderPath, filename), folderPath) os.rmdir(tempFolderPath) meta = SequenceMetadata(mission=mission, roll=roll, fromFrame=fromFrame, toFrame=toFrame, pattern=rawFilePatternDisk, frameGaps=frameGaps, lensDistortionCorrected=False) storeMetaData(metadataPath, meta) return meta, [] Sequence = namedtuple('Sequence', ['mission', 'roll', 'fromFrame', 'toFrame', 'title', 'urlAnchor'])
[docs]def extractAuroraSequences(): """ Extracts metadata of all sequences found on http://eol.jsc.nasa.gov/ForFun/CrewEarthObservationsVideos/Aurora.htm. """ content = urllib.request.urlopen(auroraVideosUrl).read() sequences = [] for match in re.finditer(auroraVideosPattern, content, re.DOTALL): urlAnchor, title = match.group(1,2) mission, roll, fromFrame = match.group(3,4,5) mission_, roll_, toFrame = match.group(6,7,8) assert mission == mission_ and roll == roll_ sequences.append(Sequence(mission, roll, int(fromFrame), int(toFrame), title, urlAnchor)) return sequences
[docs]def storeMetaData(jsonPath, meta): if os.path.isdir(jsonPath): jsonPath = os.path.join(jsonPath, metadataFilename) metaDict = meta.__dict__ if meta.lensDistortionCorrectionParams is not None: metaDict['lensDistortionCorrectionParams'] = meta.lensDistortionCorrectionParams.__dict__ with open(jsonPath, 'w') as fp: json.dump(metaDict, fp, indent=4)
[docs]def loadMetaData(jsonPath): if os.path.isdir(jsonPath): jsonPath = os.path.join(jsonPath, metadataFilename) print('loading ' + jsonPath) with open(jsonPath) as fp: metaDict = json.load(fp) meta = SequenceMetadata(**metaDict) if meta.lensDistortionCorrectionParams is not None: meta.lensDistortionCorrectionParams = LensDistortionCorrectionParams(**meta.lensDistortionCorrectionParams) return meta
[docs]def filenameOf(frame, meta): return _filenameOf(meta.mission, meta.roll, frame, meta.pattern)
def _filenameOf(mission, roll, frame, pattern): return pattern.format(mission=mission, roll=roll, frame=frame)
[docs]def frameIter(meta): for frame in range(meta.fromFrame, meta.toFrame+1): if frame not in meta.frameGaps: yield frame
[docs]def filenameIter(meta): for frame in frameIter(meta): yield filenameOf(frame, meta), frame
[docs]def correctLensDistortion(folderPath, undistFolderPath, lensfunDbObj=None): """ Corrects the lens distortion of all images in `folderPath` using lensfun's distortion profile database. It is assumed that all images have the same camera and lens. Images are skipped whose corrected version already exists in undistFolderPath. """ meta = loadMetaData(folderPath) firstImagePath = os.path.join(folderPath, filenameOf(meta.fromFrame, meta)) mod, cam, lens = getLensfunModifier(firstImagePath, lensfunDbObj=lensfunDbObj) makedirs(undistFolderPath) print('starting lens distortion correction for ' + folderPath) splitted = os.path.splitext(meta.pattern) undistPattern = splitted[0] + '_dc' + splitted[1] with exiftool.ExifTool() as et: for filename, frame in filenameIter(meta): imagePath = os.path.join(folderPath, filename) filenameUndist = _filenameOf(meta.mission, meta.roll, frame, undistPattern) undistImagePath = os.path.join(undistFolderPath, filenameUndist) if os.path.exists(undistImagePath): continue auromat.util.lensdistortion.correctLensDistortion(imagePath, undistImagePath, exiftoolObj=et, mod=mod) dcParams = LensDistortionCorrectionParams(cam.maker, cam.model, cam.variant, lens.maker, lens.model, mod.focal_length, mod.aperture) meta.pattern = undistPattern meta.lensDistortionCorrected = True meta.lensDistortionCorrectionParams = dcParams storeMetaData(undistFolderPath, meta)