Uncomment the following line to install geemap if needed.
# !pip install geemap scikit-learn
How to use locally trained machine learning models with GEE¶
This notebook illustrates how to train a random forest (or any other ensemble tree estimator) locally using scikit-learn, convert the estimator into a string representation that Earth Engine can interpret, and how to apply the machine learning model with EE. The notebook and the geemap machine learning module (ml.py) were contributed by Kel Markert. A huge thank you to him.
import ee
import geemap
import pandas as pd
from geemap import ml
from sklearn import ensemble
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Input In [2], in <cell line: 6>() 3 import pandas as pd 5 from geemap import ml ----> 6 from sklearn import ensemble ModuleNotFoundError: No module named 'sklearn'
geemap.ee_initialize()
# read the feature table to train our RandomForest model
# data taken from ee.FeatureCollection('GOOGLE/EE/DEMOS/demo_landcover_labels')
url = "https://raw.githubusercontent.com/giswqs/geemap/master/examples/data/rf_example.csv"
df = pd.read_csv(url)
df
B2 | B3 | B4 | B5 | B6 | B7 | landcover | |
---|---|---|---|---|---|---|---|
0 | 0.139846 | 0.114738 | 0.109982 | 0.119542 | 0.125795 | 0.105720 | 0 |
1 | 0.130316 | 0.109207 | 0.107499 | 0.140210 | 0.132006 | 0.108497 | 0 |
2 | 0.146690 | 0.135766 | 0.146550 | 0.225686 | 0.218105 | 0.167111 | 0 |
3 | 0.119413 | 0.108924 | 0.105196 | 0.144868 | 0.159775 | 0.122056 | 0 |
4 | 0.155492 | 0.139932 | 0.137486 | 0.151377 | 0.153771 | 0.133134 | 0 |
... | ... | ... | ... | ... | ... | ... | ... |
93 | 0.117331 | 0.092176 | 0.062548 | 0.020362 | 0.005813 | 0.004047 | 2 |
94 | 0.118353 | 0.093785 | 0.060253 | 0.020083 | 0.007317 | 0.004719 | 2 |
95 | 0.123362 | 0.095831 | 0.069663 | 0.027320 | 0.011386 | 0.008357 | 2 |
96 | 0.122907 | 0.100083 | 0.079527 | 0.024564 | 0.008570 | 0.006321 | 2 |
97 | 0.119945 | 0.097548 | 0.066974 | 0.021062 | 0.006598 | 0.004311 | 2 |
98 rows × 7 columns
# specify the names of the features (i.e. band names) and label
# feature names used to extract out features and define what bands
feature_names = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
label = "landcover"
# get the features and labels into separate variables
X = df[feature_names]
y = df[label]
# create a classifier and fit
n_trees = 10
rf = ensemble.RandomForestClassifier(n_trees).fit(X, y)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [8], in <cell line: 3>() 1 # create a classifier and fit 2 n_trees = 10 ----> 3 rf = ensemble.RandomForestClassifier(n_trees).fit(X, y) NameError: name 'ensemble' is not defined
Convert a sklearn classifier object to a list of strings¶
# convert the estimator into a list of strings
# this function also works with the ensemble.ExtraTrees estimator
trees = ml.rf_to_strings(rf, feature_names)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [9], in <cell line: 3>() 1 # convert the estimator into a list of strings 2 # this function also works with the ensemble.ExtraTrees estimator ----> 3 trees = ml.rf_to_strings(rf, feature_names) NameError: name 'rf' is not defined
# print the first tree to see the result
print(trees[0])
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [10], in <cell line: 2>() 1 # print the first tree to see the result ----> 2 print(trees[0]) NameError: name 'trees' is not defined
print(trees[1])
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [11], in <cell line: 1>() ----> 1 print(trees[1]) NameError: name 'trees' is not defined
# number of trees we converted should equal the number of trees we defined for the model
len(trees) == n_trees
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [12], in <cell line: 2>() 1 # number of trees we converted should equal the number of trees we defined for the model ----> 2 len(trees) == n_trees NameError: name 'trees' is not defined
Convert sklearn classifier to GEE classifier¶
At this point you can take the list of strings and save them locally to avoid training again. However, we want to use the model with EE so we need to create an ee.Classifier and persist the data on ee for best results.
# create a ee classifier to use with ee objects from the trees
ee_classifier = ml.strings_to_classifier(trees)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [13], in <cell line: 2>() 1 # create a ee classifier to use with ee objects from the trees ----> 2 ee_classifier = ml.strings_to_classifier(trees) NameError: name 'trees' is not defined
# ee_classifier.getInfo()
Classify image using GEE classifier¶
# Make a cloud-free Landsat 8 TOA composite (from raw imagery).
l8 = ee.ImageCollection('LANDSAT/LC08/C01/T1')
image = ee.Algorithms.Landsat.simpleComposite(
collection=l8.filterDate('2018-01-01', '2018-12-31'), asFloat=True
)
# classify the image using the classifier we created from the local training
# note: here we select the feature_names from the image that way the classifier knows which bands to use
classified = image.select(feature_names).classify(ee_classifier)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [16], in <cell line: 3>() 1 # classify the image using the classifier we created from the local training 2 # note: here we select the feature_names from the image that way the classifier knows which bands to use ----> 3 classified = image.select(feature_names).classify(ee_classifier) NameError: name 'ee_classifier' is not defined
# display results
Map = geemap.Map(center=(37.75, -122.25), zoom=11)
Map.addLayer(
image,
{"bands": ['B7', 'B5', 'B3'], "min": 0.05, "max": 0.55, "gamma": 1.5},
'image',
)
Map.addLayer(
classified,
{"min": 0, "max": 2, "palette": ['red', 'green', 'blue']},
'classification',
)
Map
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [17], in <cell line: 9>() 2 Map = geemap.Map(center=(37.75, -122.25), zoom=11) 4 Map.addLayer( 5 image, 6 {"bands": ['B7', 'B5', 'B3'], "min": 0.05, "max": 0.55, "gamma": 1.5}, 7 'image', 8 ) 9 Map.addLayer( ---> 10 classified, 11 {"min": 0, "max": 2, "palette": ['red', 'green', 'blue']}, 12 'classification', 13 ) 15 Map NameError: name 'classified' is not defined
Yay!! 🎉 Looks like our example works. Don't party too much because there is a catch...
This workflow has several limitations particularly due to how much data you can pass from the client to the server and how large of a model ee can actually handle. EE can only handle 40MB of data passed to the server, so if you have a lot of large decision tree strings then this will not work. Also, creating a classifier from strings has limitation (see this ee-forum discussion: https://groups.google.com/g/google-earth-engine-developers/c/lFFU1GBPzi8/m/6MewQk1FBwAJ), this is again limited by string lengths when ee creates a computation graph.
So, you can use this but know you will probably run into errors when training large models.
Save trees to the cloud¶
Now we have the strings in a format that ee can use, we want to save it for later use. There is a function to export a list of tree strings to a feature collection. The feature collection will have a pro
user_id = geemap.ee_user_id()
user_id
'users/giswqs'
# specify asset id where to save trees
# be sure to change <user_name> to your ee user name
asset_id = user_id + "/random_forest_strings_test"
asset_id
'users/giswqs/random_forest_strings_test'
# kick off an export process so it will be saved to the ee asset
ml.export_trees_to_fc(trees, asset_id)
# this will kick off an export task, so wait a few minutes before moving on
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [20], in <cell line: 2>() 1 # kick off an export process so it will be saved to the ee asset ----> 2 ml.export_trees_to_fc(trees, asset_id) NameError: name 'trees' is not defined
# read the exported tree feature collection
rf_fc = ee.FeatureCollection(asset_id)
# convert it to a classifier, very similar to the `ml.trees_to_classifier` function
another_classifier = ml.fc_to_classifier(rf_fc)
# classify the image again but with the classifier from the persisted trees
classified = image.select(feature_names).classify(another_classifier)
# display results
# we should get the exact same results as before
Map = geemap.Map(center=(37.75, -122.25), zoom=11)
Map.addLayer(
image,
{"bands": ['B7', 'B5', 'B3'], "min": 0.05, "max": 0.55, "gamma": 1.5},
'image',
)
Map.addLayer(
classified,
{"min": 0, "max": 2, "palette": ['red', 'green', 'blue']},
'classification',
)
Map
Save trees locally¶
import os
out_csv = os.path.expanduser("~/Downloads/trees.csv")
ml.trees_to_csv(trees, out_csv)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [24], in <cell line: 1>() ----> 1 ml.trees_to_csv(trees, out_csv) NameError: name 'trees' is not defined
another_classifier = ml.csv_to_classifier(out_csv)
/home/runner/Downloads/trees.csv could not be found.
classified = image.select(feature_names).classify(another_classifier)
# display results
# we should get the exact same results as before
Map = geemap.Map(center=(37.75, -122.25), zoom=11)
Map.addLayer(
image,
{"bands": ['B7', 'B5', 'B3'], "min": 0.05, "max": 0.55, "gamma": 1.5},
'image',
)
Map.addLayer(
classified,
{"min": 0, "max": 2, "palette": ['red', 'green', 'blue']},
'classification',
)
Map
--------------------------------------------------------------------------- HttpError Traceback (most recent call last) File ~/.local/lib/python3.9/site-packages/ee/data.py:330, in _execute_cloud_call(call, num_retries) 329 try: --> 330 return call.execute(num_retries=num_retries) 331 except googleapiclient.errors.HttpError as e: File ~/.local/lib/python3.9/site-packages/googleapiclient/_helpers.py:134, in positional.<locals>.positional_decorator.<locals>.positional_wrapper(*args, **kwargs) 133 logger.warning(message) --> 134 return wrapped(*args, **kwargs) File ~/.local/lib/python3.9/site-packages/googleapiclient/http.py:915, in HttpRequest.execute(self, http, num_retries) 914 if resp.status >= 300: --> 915 raise HttpError(resp, content, uri=self.uri) 916 return self.postproc(resp, content) HttpError: <HttpError 400 when requesting https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/maps?fields=name&alt=json returned "Parameter 'classifier' is required.". Details: "Parameter 'classifier' is required."> During handling of the above exception, another exception occurred: EEException Traceback (most recent call last) Input In [27], in <cell line: 10>() 3 Map = geemap.Map(center=(37.75, -122.25), zoom=11) 5 Map.addLayer( 6 image, 7 {"bands": ['B7', 'B5', 'B3'], "min": 0.05, "max": 0.55, "gamma": 1.5}, 8 'image', 9 ) ---> 10 Map.addLayer( 11 classified, 12 {"min": 0, "max": 2, "palette": ['red', 'green', 'blue']}, 13 'classification', 14 ) 16 Map File ~/.local/lib/python3.9/site-packages/geemap/foliumap.py:234, in Map.add_layer(self, ee_object, vis_params, name, shown, opacity, **kwargs) 231 print("The provided palette is invalid.") 232 raise Exception(e) --> 234 map_id_dict = ee.Image(image).getMapId(vis_params) 236 # if a layer starts with a number, add "Layer" to name. 237 if name[0].isdigit(): File ~/.local/lib/python3.9/site-packages/ee/image.py:130, in Image.getMapId(self, vis_params) 128 vis_image, request = self._apply_visualization(vis_params) 129 request['image'] = vis_image --> 130 response = data.getMapId(request) 131 response['image'] = self 132 return response File ~/.local/lib/python3.9/site-packages/ee/data.py:569, in getMapId(params) 566 request['visualizationOptions'] = visualizationOptions 567 # Make it return only the name field, as otherwise it echoes the entire 568 # request, which might be large. --> 569 result = _execute_cloud_call( 570 _get_cloud_api_resource().projects().maps().create( 571 parent=_get_projects_path(), fields='name', body=request)) 572 map_name = result['name'] 573 url_format = '%s/%s/%s/tiles/{z}/{x}/{y}' % ( 574 _tile_base_url, _cloud_api_utils.VERSION, map_name) File ~/.local/lib/python3.9/site-packages/ee/data.py:332, in _execute_cloud_call(call, num_retries) 330 return call.execute(num_retries=num_retries) 331 except googleapiclient.errors.HttpError as e: --> 332 raise _translate_cloud_exception(e) EEException: Parameter 'classifier' is required.