Python API: Searching and Finding Archives¶
This is the tested source code for the snippets used in
Searching and Finding Archives. The config file we’re using in this example
can be downloaded
here
.
Setup¶
>>> import datafs
>>> from fs.tempfs import TempFS
>>> import os
>>> import itertools
We test with the following setup:
>>> api = datafs.get_api(
... config_file='examples/snippets/resources/datafs_mongo.yml')
...
This assumes that you have a config file at the above location. The config file
we’re using in this example can be downloaded
here
.
clean up any previous test failures
>>> try:
... api.delete_archive('my_archive')
... api.delete_archive('streaming_archive')
... api.delete_archive('sample_archive')
... except (KeyError, OSError):
... pass
...
>>> try:
... api.manager.delete_table('DataFiles')
... except KeyError:
... pass
...
Add a fresh manager table:
>>> api.manager.create_archive_table('DataFiles')
Set up some archives to search
>>> with open('test.txt', 'w') as f:
... f.write('test test')
...
>>> tas_archive = api.create('impactlab/climate/tas/tas_day_us.csv')
>>> tas_archive.update('test.txt')
>>> precip_archive = api.create('impactlab/climate/pr/pr_day_us.csv')
>>> precip_archive.update('test.txt')
>>> socio = api.create('impactlab/mortality/global/mortality_glob_day.csv')
>>> socio.update('test.txt')
>>> socio1 = api.create('impactlab/conflict/global/conflict_glob_day.csv')
>>> socio1.update('test.txt')
>>> socio2 = api.create('impactlab/labor/global/labor_glob_day.csv')
>>> socio2.update('test.txt')
Example 1¶
Displayed example 1 code
>>> api.listdir('impactlab/conflict/global')
[u'conflict_glob_day.csv']
Example 3¶
Displayed example 3 code
>>> api.listdir('impactlab')
['labor', 'climate', 'conflict', 'mortality']
And the actual test:
>>> (set(api.listdir('impactlab')) == set([
... 'labor', 'climate', 'conflict', 'mortality']))
...
True
Example 5¶
Displayed example 5 code
>>> api.listdir('impactlab/conflict/global')
[u'conflict_glob_day.csv']
>>> api.listdir('impactlab/conflict/global/conflict_glob_day.csv')
[u'0.0.1']
Teardown¶
>>> try:
... tas_archive.delete()
... precip_archive.delete()
... socio.delete()
... socio1.delete()
... socio2.delete()
... os.remove('test.txt')
... except KeyError:
... pass
>>> try:
... api.manager.delete_table('DataFiles')
... except KeyError:
... pass
Setup
>>> api.manager.create_archive_table('DataFiles')
Filter example setup
>>> archive_names = []
>>> for indices in itertools.product(*(range(1, 6) for _ in range(3))):
... archive_name = (
... 'project{}_variable{}_scenario{}.nc'.format(*indices))
... archive_names.append(archive_name)
>>>
>>> for i, name in enumerate(archive_names):
... if i % 3 == 0:
... api.create(name, tags=['team1'])
... elif i % 2 == 0:
... api.create(name, tags=['team2'])
... else:
... api.create(name, tags=['team3'])
<DataArchive local://project1_variable1_scenario1.nc>
<DataArchive local://project1_variable1_scenario2.nc>
<DataArchive local://project1_variable1_scenario3.nc>
...
<DataArchive local://project5_variable5_scenario3.nc>
<DataArchive local://project5_variable5_scenario4.nc>
<DataArchive local://project5_variable5_scenario5.nc>
Example 6¶
Displayed example 6 code
>>> len(list(api.filter()))
125
>>> filtered_list1 = api.filter(prefix='project1_variable1_')
>>> list(filtered_list1)
[u'project1_variable1_scenario1.nc', u'project1_variable1_scenario2.nc',
u'project1_variable1_scenario3.nc', u'project1_variable1_scenario4.nc',
u'project1_variable1_scenario5.nc']
Example 7¶
Displayed example 7 code
>>> filtered_list2 = api.filter(pattern='*_variable4_scenario4.nc',
... engine='path')
>>> list(filtered_list2)
[u'project1_variable4_scenario4.nc', u'project2_variable4_scenario4.nc',
u'project3_variable4_scenario4.nc', u'project4_variable4_scenario4.nc',
u'project5_variable4_scenario4.nc']
Example 8¶
Displayed example 8 code
>>> filtered_list3 = list(api.filter(pattern='variable2', engine='str'))
>>> len(filtered_list3)
25
>>> filtered_list3[:4]
[u'project1_variable2_scenario1.nc', u'project1_variable2_scenario2.nc',
u'project1_variable2_scenario3.nc', u'project1_variable2_scenario4.nc']
Example 9¶
Displayed example 9 code
>>> archives_search = list(api.search())
>>> archives_filter = list(api.filter())
>>> len(archives_search)
125
>>> len(archives_filter)
125
Example 10¶
Displayed example 10 code
>>> tagged_search = list(api.search('team3'))
>>> len(tagged_search)
41
>>> tagged_search[:4]
[u'project1_variable1_scenario2.nc', u'project1_variable2_scenario1.nc',
u'project1_variable2_scenario3.nc', u'project1_variable3_scenario2.nc']
Example 11¶
Displayed example 11 code
>>> tags = []
>>> for arch in tagged_search[:4]:
... tags.append(api.manager.get_tags(arch)[0])
>>> tags
[u'team3', u'team3', u'team3', u'team3']
Example 12¶
Displayed example 12 code
>>> tagged_search_team1 = list(api.search('team1'))
>>> len(tagged_search_team1)
42
>>> tagged_search_team1[:4]
[u'project1_variable1_scenario1.nc', u'project1_variable1_scenario4.nc',
u'project1_variable2_scenario2.nc', u'project1_variable2_scenario5.nc']
Example 13¶
Displayed example 13 code
>>> tags = []
>>> for arch in tagged_search_team1[:4]:
... tags.append(api.manager.get_tags(arch)[0])
>>> tags
[u'team1', u'team1', u'team1', u'team1']
Teardown¶
>>> api.manager.delete_table('DataFiles')