Skip to content

datasets module

Module for accessing the Earth Engine Data Catalog with dot notation.

get_community_data_list()

Returns the list of community dataset IDs.

From https://github.com/samapriya/awesome-gee-community-datasets/blob/master/community_datasets.json

Source code in geemap/datasets.py
87
88
89
90
91
92
93
def get_community_data_list() -> list[str]:
    """Returns the list of community dataset IDs.

    From https://github.com/samapriya/awesome-gee-community-datasets/blob/master/community_datasets.json
    """
    collections = common.search_ee_data(".*", regex=True, source="community")
    return [collection.get("id", None) for collection in collections]

get_data_csv()

Returns the path to the CSV file summarizing the Earth Engine Data Catalog.

Source code in geemap/datasets.py
23
24
25
26
27
28
def get_data_csv() -> str:
    """Returns the path to the CSV file summarizing the Earth Engine Data Catalog."""
    pkg_dir = str(importlib.resources.files("geemap").joinpath("geemap.py").parent)
    template_dir = os.path.join(pkg_dir, "data/template")
    data_csv = os.path.join(template_dir, "ee_data_catalog.csv")
    return data_csv

get_data_dict()

Returns the Earth Engine Data Catalog as a nested dictionary.

Source code in geemap/datasets.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def get_data_dict() -> dict[str, Any]:
    """Returns the Earth Engine Data Catalog as a nested dictionary."""
    data_dict = {}
    datasets = get_data_list()

    for dataset in datasets:
        tree_dict = {}
        items = dataset.split("/")
        for index, key in enumerate(reversed(items)):
            if index == 0:
                tree_dict = {key: dataset}
            else:
                tree_dict = {key: tree_dict}

        data_dict = merge_dict(data_dict, tree_dict)
        data_dict[dataset.replace("/", "_")] = dataset

    return data_dict

get_data_list()

Returns a list of Earth Engine dataset IDs.

Source code in geemap/datasets.py
62
63
64
65
66
67
68
def get_data_list() -> list:
    """Returns a list of Earth Engine dataset IDs."""
    datasets = get_ee_stac_list()
    extra_datasets = get_geemap_data_list()
    community_datasets = get_community_data_list()

    return datasets + extra_datasets + community_datasets

get_ee_stac_list()

Returns the STAC list of the Earth Engine Data Catalog.

Raises:

Type Description
Exception

If the JSON file fails to download.

Source code in geemap/datasets.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def get_ee_stac_list() -> list[str]:
    """Returns the STAC list of the Earth Engine Data Catalog.

    Raises:
        Exception: If the JSON file fails to download.
    """
    stac_url = "https://raw.githubusercontent.com/samapriya/Earth-Engine-Datasets-List/master/gee_catalog.json"

    datasets = []
    with urllib.request.urlopen(stac_url) as url:
        data = json.loads(url.read().decode())
        datasets = [item["id"] for item in data]

    return datasets

get_geemap_data_list()

Returns the list of the public datasets from GEE users.

Source code in geemap/datasets.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def get_geemap_data_list() -> list[str]:
    """Returns the list of the public datasets from GEE users."""
    extra_ids = [
        "countries",
        "us_states",
        "us_cities",
        "chn_admin_line",
        "chn_admin_level0",
        "chn_admin_level1",
        "chn_admin_level2",
    ]

    extra_datasets = [f"users/giswqs/public/{uid}" for uid in extra_ids]
    return extra_datasets

get_metadata(asset_id, source='ee')

Returns metadata about an Earth Engine asset.

Parameters:

Name Type Description Default
asset_id str

The Earth Engine asset id.

required
source str

'ee', 'community' or 'all'.

'ee'

Raises:

Type Description
Exception

If search fails.

Source code in geemap/datasets.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def get_metadata(asset_id: str, source: str = "ee") -> dict[str, Any]:
    """Returns metadata about an Earth Engine asset.

    Args:
        asset_id: The Earth Engine asset id.
        source: 'ee', 'community' or 'all'.

    Raises:
        Exception: If search fails.
    """
    ee_assets = common.search_ee_data(asset_id, source=source)
    html = common.ee_data_html(ee_assets[0])
    html_widget = widgets.HTML()
    html_widget.value = html
    display(html_widget)

merge_dict(dict1, dict2)

Merges two nested dictionaries.

Parameters:

Name Type Description Default
dict1 dict[Any, Any]

The first dictionary to merge.

required
dict2 dict[Any, Any]

The second dictionary to merge.

required

Returns:

Type Description
dict

The merged dictionary.

Source code in geemap/datasets.py
112
113
114
115
116
117
118
119
120
121
122
def merge_dict(dict1: dict[Any, Any], dict2: dict[Any, Any]) -> dict:
    """Merges two nested dictionaries.

    Args:
        dict1: The first dictionary to merge.
        dict2: The second dictionary to merge.

    Returns:
        The merged dictionary.
    """
    return {**dict1, **dict2}

update_data_list(out_dir='.')

Updates the Earth Engine Data Catalog dataset list.

Parameters:

Name Type Description Default
out_dir str

The output directory to save the GitHub repository. Defaults to ".".

'.'

Raises:

Type Description
Exception

If the CSV file fails to save.

Source code in geemap/datasets.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def update_data_list(out_dir: str = ".") -> None:
    """Updates the Earth Engine Data Catalog dataset list.

    Args:
        out_dir: The output directory to save the GitHub repository. Defaults to ".".

    Raises:
        Exception: If the CSV file fails to save.
    """
    url = "https://github.com/samapriya/Earth-Engine-Datasets-List/archive/master.zip"
    filename = "Earth-Engine-Datasets-List-master.zip"
    dir_name = filename.replace(".zip", "")

    out_dir = os.path.abspath(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    common.download_from_url(
        url, out_file_name=filename, out_dir=out_dir, unzip=True, verbose=False
    )

    work_dir = os.path.join(out_dir, dir_name)
    in_csv = list(pathlib.Path(work_dir).rglob("*.csv"))[0]

    out_csv = get_data_csv()

    shutil.copyfile(in_csv, out_csv)
    os.remove(os.path.join(out_dir, filename))
    shutil.rmtree(os.path.join(out_dir, dir_name))