Ingesting Private Datasets (v2.0)

Download this notebook.

# imports
import warnings
warnings.filterwarnings('ignore')

import h5py
import specdb
import glob

from astropy.table import Table
from linetools import utils as ltu

from specdb.build import privatedb as pbuild
from specdb.build import utils as spbu
from specdb.specdb import IgmSpec

Test on Single Folder

tree = specdb.__path__[0]+'/build/tests/files'
#os.getenv('DROPBOX_DIR')+'/QSOPairs/data/MMT_redux/'
reload(pbuild)
flux_files = pbuild.grab_files(tree)
len(flux_files)
3
flux_files[:5]
([u'/Users/xavier/local/Python/specdb/specdb/build/tests/files//./SDSSJ001605.89+005654.3_b800_F.fits.gz',
  u'/Users/xavier/local/Python/specdb/specdb/build/tests/files//./SDSSJ001607.27+005653.1_b800_F.fits.gz'],
 None,
 None)

Directory Tree – Step by Step

tree2 = specdb.__path__[0]+'/data/test_privateDB'
branches = glob.glob(tree2+'/*')
branches[0]
'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS'

Get started

id_key = 'TEST_ID'
maindb, tkeys = spbu.start_maindb(id_key)

Files

reload(pbuild)
mflux_files, meta_file, _ = pbuild.grab_files(branches[0])
len(mflux_files)
2
mflux_files[:5]
[u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz',
 u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz']
meta_file
u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS/COS_meta.json'
meta_dict = ltu.loadjson(meta_file)
meta_dict
{u'maxpix': 60000,
 u'meta_dict': {u'TELESCOPE': u'HST'},
 u'parse_head': {u'DATE-OBS': u'DATE',
  u'GRATING': u'OPT_ELEM',
  u'INSTR': u'INSTRUME',
  u'R': True}}

ztbl (read from file)

ztbl = Table.read(specdb.__path__[0]+'/data/test_privateDB/testDB_ztbl.fits')
ztbl
<Table length=6>
RADECZEMZEM_SOURCESPEC_FILE
float64float64float64str5str35
331.99291666712.99563888891.0UNKNWSDSSJ220758.30+125944.3_F.fits
261.3527530.63441666671.1UNKNWSDSSJ172524.66+303803.9_F.fits
345.1848333331.928251.2UNKNWSDSSJ230044.36+015541.7_r600_F.fits
345.1848333331.928251.2UNKNWSDSSJ230044.36+015541.7_b400_F.fits
148.16737551.88056388891.3UNKNWJ095240.17+515250.03.fits.gz
148.17937551.8558751.4UNKNWJ095243.05+515121.15.fits.gz

Meta

reload(pbuild)
meta = pbuild.mk_meta(mflux_files, ztbl, fname=True, mdict=meta_dict['meta_dict'], parse_head=meta_dict['parse_head'])
meta[0:3]
<Table length=2>
RA_GROUPDEC_GROUPSTYPEzem_GROUPsig_zemflag_zemSPEC_FILEDATE-OBSGRATINGRINSTRTELESCOPEEPOCHGROUP_ID
float64float64str3float64float64str8unicode96str10str5float64str3unicode3float64int64
148.16737551.8805638889QSO1.30.0UNKNW/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz2015-05-31G130M17000.0COSHST2000.00
148.17937551.855875QSO1.40.0UNKNW/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz2015-12-08G130M17000.0COSHST2000.01

Without fname=True

Requires SPEC_FILE in ztbl
meta2 = pbuild.mk_meta(mflux_files, ztbl, fname=False, mdict=meta_dict['meta_dict'], parse_head=meta_dict['parse_head'])
meta2
<Table length=2>
RA_GROUPDEC_GROUPSTYPEzem_GROUPsig_zemflag_zemSPEC_FILEDATE-OBSGRATINGRINSTRTELESCOPEEPOCHGROUP_ID
float64float64str3float64float64str8unicode96str10str5float64str3unicode3float64int64
148.16737551.8805638889QSO1.30.0UNKNW/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz2015-05-31G130M17000.0COSHST2000.00
148.17937551.855875QSO1.40.0UNKNW/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz2015-12-08G130M17000.0COSHST2000.01

Add Group and IDs

gdict = {}
flag_g = spbu.add_to_group_dict('COS', gdict)
maindb = pbuild.add_ids(maindb, meta, flag_g, tkeys, id_key, first=(flag_g==1))
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... INSTR TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ----- --------- ----- --------
maindb
<Table length=2>
flag_groupsig_zemflag_zemRADECSTYPEzemTEST_ID
int64float64str8float64float64str3float64int64
10.0UNKNW148.16737551.8805638889QSO1.30
10.0UNKNW148.17937551.855875QSO1.41
gdict
{'COS': 1}

Spectra

hdf = h5py.File('tmp.hdf5','w')
reload(pbuild)
pbuild.ingest_spectra(hdf, 'test', meta, max_npix=meta_dict['maxpix'])
Adding test group to DB

Finish

pbuild.write_hdf(hdf, 'TEST_DB', maindb, [str('SDSS')], gdict, 'v01')

Directory Tree – All in One

ztbl = Table.read(specdb.__path__[0]+'/data/test_privateDB/testDB_ztbl.fits')
reload(pbuild)
pbuild.mk_db('TEST_DB', tree2, 'tmp.hdf5', ztbl, fname=True)
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... INSTR TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ----- --------- ----- --------
Adding COS group to DB
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/ESI
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... GRATING EPOCH GROUP_ID tGRB
-------- --------- ----- --------- ------- ... ------- ----- -------- ----
Adding ESI group to DB
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/LRIS
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... GRATING TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ------- --------- ----- --------
Adding LRIS group to DB
Wrote tmp.hdf5 DB file
# Without fname
pbuild.mk_db('TEST_DB', tree2, 'tmp2.hdf5', ztbl, fname=False)
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... INSTR TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ----- --------- ----- --------
Adding COS group to DB
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/ESI
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... GRATING EPOCH GROUP_ID tGRB
-------- --------- ----- --------- ------- ... ------- ----- -------- ----
Adding ESI group to DB
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/LRIS
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... GRATING TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ------- --------- ----- --------
Adding LRIS group to DB
Wrote tmp2.hdf5 DB file

By script

specdb_privatedb testDB ../../specdb/data/test_privateDB tst3_DB.hdf5

Check ESI meta

igmsp = IgmSpec(db_file='tmp2.hdf5', verbose=True)
Using tmp2.hdf5 for the DB file
Available groups: [u'COS', u'ESI', u'LRIS']
igmsp['ESI'].meta
<Table length=2>
RA_GROUPDEC_GROUPSTYPEzem_GROUPsig_zemflag_zemDATE-OBSREPOCHGROUP_IDtGRBPRIV_IDNPIXWV_MINWV_MAXSPEC_FILEINSTRTELESCOPEGRATING
float64float64str3float64float64str8str10float64float64int64str21int64int64float64float64str98str3str7str3
261.352830.6344QSO1.1000.0UNKNW2015-05-194545.02000.002009-11-23:10:12:13.22279313993.510131.6/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/ESI//./SDSSJ172524.66+303803.9_F.fitsESIKeck-IIECH
331.992912.9956QSO1.0000.0UNKNW2008-06-044545.02000.012007-08-13:10:22:23.33279263993.510129.9/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/ESI//./SDSSJ220758.30+125944.3_F.fitsESIKeck-IIECH

JSON files for meta table

parse_head = {'DATE-OBS':'DATE', 'TELESCOPE':'TELESCOP','INSTR':'INSTRUME', 'R': True}
mdict = dict(GRATING='ALL', R=8000.)
db_dict = dict(parse_head=parse_head, meta_dict=mdict, maxpix=60000)
jdict = ltu.jsonify(db_dict)
ltu.savejson('tst.json', jdict, easy_to_read=True, overwrite=True)