"""
Manage local downloaded data.
"""
from __future__ import (
print_function,
division,
absolute_import,
)
import logging
import yaml
from os.path import join, exists
from os import environ
from pipes import quote
from collections import OrderedDict
from appdirs import user_data_dir
from pkg_resources import resource_string
import pandas
ENVIRONMENT_VARIABLES = [
"MHCFLURRY_DATA_DIR",
"MHCFLURRY_DOWNLOADS_CURRENT_RELEASE",
"MHCFLURRY_DOWNLOADS_DIR",
"MHCFLURRY_DEFAULT_CLASS1_MODELS"
]
_DOWNLOADS_DIR = None
_CURRENT_RELEASE = None
_METADATA = None
_MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR = environ.get(
"MHCFLURRY_DEFAULT_CLASS1_MODELS")
_MHCFLURRY_DEFAULT_CLASS1_PRESENTATION_MODELS_DIR = environ.get(
"MHCFLURRY_DEFAULT_CLASS1_PRESENTATION_MODELS_DIR")
_MHCFLURRY_DEFAULT_CLASS1_PROCESSING_MODELS_DIR = environ.get(
"MHCFLURRY_DEFAULT_CLASS1_PROCESSING_MODELS_DIR")
[docs]def get_downloads_dir():
"""
Return the path to local downloaded data
"""
return _DOWNLOADS_DIR
[docs]def get_current_release():
"""
Return the current downloaded data release
"""
return _CURRENT_RELEASE
[docs]def get_default_class1_models_dir(test_exists=True):
"""
Return the absolute path to the default class1 models dir.
If environment variable MHCFLURRY_DEFAULT_CLASS1_MODELS is set to an
absolute path, return that path. If it's set to a relative path (i.e. does
not start with /) then return that path taken to be relative to the mhcflurry
downloads dir.
If environment variable MHCFLURRY_DEFAULT_CLASS1_MODELS is NOT set,
then return the path to downloaded models in the "models_class1" download.
Parameters
----------
test_exists : boolean, optional
Whether to raise an exception of the path does not exist
Returns
-------
string : absolute path
"""
if _MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR:
result = join(get_downloads_dir(), _MHCFLURRY_DEFAULT_CLASS1_MODELS_DIR)
if test_exists and not exists(result):
raise IOError("No such directory: %s" % result)
return result
return get_path(
"models_class1_pan", "models.combined", test_exists=test_exists)
[docs]def get_default_class1_presentation_models_dir(test_exists=True):
"""
Return the absolute path to the default class1 presentation models dir.
See `get_default_class1_models_dir`.
If environment variable MHCFLURRY_DEFAULT_CLASS1_PRESENTATION_MODELS is set
to an absolute path, return that path. If it's set to a relative path (does
not start with /) then return that path taken to be relative to the mhcflurry
downloads dir.
Parameters
----------
test_exists : boolean, optional
Whether to raise an exception of the path does not exist
Returns
-------
string : absolute path
"""
if _MHCFLURRY_DEFAULT_CLASS1_PRESENTATION_MODELS_DIR:
result = join(
get_downloads_dir(),
_MHCFLURRY_DEFAULT_CLASS1_PRESENTATION_MODELS_DIR)
if test_exists and not exists(result):
raise IOError("No such directory: %s" % result)
return result
return get_path(
"models_class1_presentation", "models", test_exists=test_exists)
[docs]def get_default_class1_processing_models_dir(test_exists=True):
"""
Return the absolute path to the default class1 processing models dir.
See `get_default_class1_models_dir`.
If environment variable MHCFLURRY_DEFAULT_CLASS1_PROCESSING_MODELS is set
to an absolute path, return that path. If it's set to a relative path (does
not start with /) then return that path taken to be relative to the mhcflurry
downloads dir.
Parameters
----------
test_exists : boolean, optional
Whether to raise an exception of the path does not exist
Returns
-------
string : absolute path
"""
if _MHCFLURRY_DEFAULT_CLASS1_PROCESSING_MODELS_DIR:
result = join(
get_downloads_dir(),
_MHCFLURRY_DEFAULT_CLASS1_PROCESSING_MODELS_DIR)
if test_exists and not exists(result):
raise IOError("No such directory: %s" % result)
return result
return get_path(
"models_class1_processing", "models", test_exists=test_exists)
[docs]def get_current_release_downloads():
"""
Return a dict of all available downloads in the current release.
The dict keys are the names of the downloads. The values are a dict
with two entries:
downloaded : bool
Whether the download is currently available locally
metadata : dict
Info about the download from downloads.yml such as URL
up_to_date : bool or None
Whether the download URL(s) match what was used to download the current
data. This is None if it cannot be determined.
"""
downloads = (
get_downloads_metadata()
['releases']
[get_current_release()]
['downloads'])
def up_to_date(dir, urls):
try:
df = pandas.read_csv(join(dir, "DOWNLOAD_INFO.csv"))
return list(df.url) == list(urls)
except IOError:
return None
return OrderedDict(
(download["name"], {
'downloaded': exists(join(get_downloads_dir(), download["name"])),
'up_to_date': up_to_date(
join(get_downloads_dir(), download["name"]),
[download['url']] if 'url' in download else download['part_urls']),
'metadata': download,
}) for download in downloads
)
[docs]def get_path(download_name, filename='', test_exists=True):
"""
Get the local path to a file in a MHCflurry download
Parameters
-----------
download_name : string
filename : string
Relative path within the download to the file of interest
test_exists : boolean
If True (default) throw an error telling the user how to download the
data if the file does not exist
Returns
-----------
string giving local absolute path
"""
assert '/' not in download_name, "Invalid download: %s" % download_name
path = join(get_downloads_dir(), download_name, filename)
if test_exists and not exists(path):
raise RuntimeError(
"Missing MHCflurry downloadable file: %s. "
"To download this data, run:\n\tmhcflurry-downloads fetch %s\n"
"in a shell."
% (quote(path), download_name))
return path
configure()