In [58]:
%%writefile requirements_eda.txt
# jupyterlab
# basic
joblib
numpy
pandas
tifffile
imagecodecs
rasterio
matplotlib
seaborn
tqdm
# data
# awscli
Overwriting requirements_eda.txt
In [59]:
!pip install -q -r requirements_eda.txt
In [7]:
!conda install -y gdal
Collecting package metadata (current_repodata.json): done Solving environment: done ==> WARNING: A newer version of conda exists. <== current version: 4.9.2 latest version: 4.13.0 Please update conda by running $ conda update -n base -c defaults conda ## Package Plan ## environment location: /home/syu/anaconda3/envs/sn8 added / updated specs: - gdal The following packages will be downloaded: package | build ---------------------------|----------------- cairo-1.16.0 | h19f5f5c_2 1.1 MB cfitsio-3.470 | h5893167_7 833 KB curl-7.82.0 | h7f8727e_0 95 KB hdf5-1.10.6 | h3ffc7dd_1 3.6 MB jpeg-9e | h7f8727e_0 240 KB libboost-1.73.0 | h28710b8_12 13.8 MB libcurl-7.82.0 | h0b77cf5_0 342 KB libgdal-3.4.1 | h05199a0_1 19.5 MB libgfortran-ng-11.2.0 | h00389a5_1 20 KB libgfortran5-11.2.0 | h1234567_1 2.0 MB libpq-12.9 | h16c4e8d_3 2.1 MB libssh2-1.10.0 | h8f2d780_0 274 KB libtiff-4.2.0 | h2818925_1 452 KB libwebp-base-1.2.2 | h7f8727e_0 440 KB libxcb-1.15 | h7f8727e_0 505 KB libxml2-2.9.14 | h74e7548_0 718 KB libzip-1.8.0 | h5cef20c_0 102 KB numpy-1.22.3 | py38he7a7128_0 10 KB numpy-base-1.22.3 | py38hf524024_0 5.4 MB tiledb-2.3.3 | h1132f93_2 2.1 MB zstd-1.5.2 | ha4553b6_0 488 KB ------------------------------------------------------------ Total: 54.1 MB The following NEW packages will be INSTALLED: blas pkgs/main/linux-64::blas-1.0-mkl bzip2 pkgs/main/linux-64::bzip2-1.0.8-h7b6447c_0 c-ares pkgs/main/linux-64::c-ares-1.18.1-h7f8727e_0 cairo pkgs/main/linux-64::cairo-1.16.0-h19f5f5c_2 cfitsio pkgs/main/linux-64::cfitsio-3.470-h5893167_7 curl pkgs/main/linux-64::curl-7.82.0-h7f8727e_0 expat pkgs/main/linux-64::expat-2.4.4-h295c915_0 fontconfig pkgs/main/linux-64::fontconfig-2.13.1-h6c09931_0 freetype pkgs/main/linux-64::freetype-2.11.0-h70c0345_0 freexl pkgs/main/linux-64::freexl-1.0.6-h27cfd23_0 gdal pkgs/main/linux-64::gdal-3.4.1-py38h2c27f0e_0 geos pkgs/main/linux-64::geos-3.8.0-he6710b0_0 geotiff pkgs/main/linux-64::geotiff-1.7.0-hd69d5b1_0 giflib pkgs/main/linux-64::giflib-5.2.1-h7b6447c_0 glib pkgs/main/linux-64::glib-2.69.1-h4ff587b_1 hdf4 pkgs/main/linux-64::hdf4-4.2.13-h3ca952b_2 hdf5 pkgs/main/linux-64::hdf5-1.10.6-h3ffc7dd_1 icu pkgs/main/linux-64::icu-58.2-he6710b0_3 intel-openmp pkgs/main/linux-64::intel-openmp-2021.4.0-h06a4308_3561 jpeg pkgs/main/linux-64::jpeg-9e-h7f8727e_0 json-c pkgs/main/linux-64::json-c-0.13.1-h1bed415_0 kealib pkgs/main/linux-64::kealib-1.4.14-hb50703a_1 krb5 pkgs/main/linux-64::krb5-1.19.2-hac12032_0 libboost pkgs/main/linux-64::libboost-1.73.0-h28710b8_12 libcurl pkgs/main/linux-64::libcurl-7.82.0-h0b77cf5_0 libdap4 pkgs/main/linux-64::libdap4-3.19.1-h6ec2957_0 libedit pkgs/main/linux-64::libedit-3.1.20210910-h7f8727e_0 libev pkgs/main/linux-64::libev-4.33-h7f8727e_1 libgdal pkgs/main/linux-64::libgdal-3.4.1-h05199a0_1 libgfortran-ng pkgs/main/linux-64::libgfortran-ng-11.2.0-h00389a5_1 libgfortran5 pkgs/main/linux-64::libgfortran5-11.2.0-h1234567_1 libkml pkgs/main/linux-64::libkml-1.3.0-h7ecb851_5 libnetcdf pkgs/main/linux-64::libnetcdf-4.8.1-h42ceab0_1 libnghttp2 pkgs/main/linux-64::libnghttp2-1.46.0-hce63b2e_0 libpng pkgs/main/linux-64::libpng-1.6.37-hbc83047_0 libpq pkgs/main/linux-64::libpq-12.9-h16c4e8d_3 libspatialite pkgs/main/linux-64::libspatialite-4.3.0a-hbedb2dc_20 libssh2 pkgs/main/linux-64::libssh2-1.10.0-h8f2d780_0 libtiff pkgs/main/linux-64::libtiff-4.2.0-h2818925_1 libuuid pkgs/main/linux-64::libuuid-1.0.3-h7f8727e_2 libwebp-base pkgs/main/linux-64::libwebp-base-1.2.2-h7f8727e_0 libxcb pkgs/main/linux-64::libxcb-1.15-h7f8727e_0 libxml2 pkgs/main/linux-64::libxml2-2.9.14-h74e7548_0 libzip pkgs/main/linux-64::libzip-1.8.0-h5cef20c_0 lz4-c pkgs/main/linux-64::lz4-c-1.9.3-h295c915_1 mkl pkgs/main/linux-64::mkl-2021.4.0-h06a4308_640 mkl-service pkgs/main/linux-64::mkl-service-2.4.0-py38h7f8727e_0 mkl_fft pkgs/main/linux-64::mkl_fft-1.3.1-py38hd3c417c_0 mkl_random pkgs/main/linux-64::mkl_random-1.2.2-py38h51133e4_0 numpy pkgs/main/linux-64::numpy-1.22.3-py38he7a7128_0 numpy-base pkgs/main/linux-64::numpy-base-1.22.3-py38hf524024_0 openjpeg pkgs/main/linux-64::openjpeg-2.4.0-h3ad879b_0 pcre pkgs/main/linux-64::pcre-8.45-h295c915_0 pixman pkgs/main/linux-64::pixman-0.40.0-h7f8727e_1 poppler pkgs/main/linux-64::poppler-0.81.0-h01f5e8b_2 poppler-data pkgs/main/linux-64::poppler-data-0.4.11-h06a4308_0 proj pkgs/main/linux-64::proj-6.2.1-h05a3930_0 six pkgs/main/noarch::six-1.16.0-pyhd3eb1b0_1 tiledb pkgs/main/linux-64::tiledb-2.3.3-h1132f93_2 xerces-c pkgs/main/linux-64::xerces-c-3.2.3-h780794e_0 zstd pkgs/main/linux-64::zstd-1.5.2-ha4553b6_0 Downloading and Extracting Packages libzip-1.8.0 | 102 KB | ##################################### | 100% hdf5-1.10.6 | 3.6 MB | ##################################### | 100% tiledb-2.3.3 | 2.1 MB | ##################################### | 100% libssh2-1.10.0 | 274 KB | ##################################### | 100% zstd-1.5.2 | 488 KB | ##################################### | 100% cfitsio-3.470 | 833 KB | ##################################### | 100% libwebp-base-1.2.2 | 440 KB | ##################################### | 100% libboost-1.73.0 | 13.8 MB | ##################################### | 100% jpeg-9e | 240 KB | ##################################### | 100% libtiff-4.2.0 | 452 KB | ##################################### | 100% libgfortran-ng-11.2. | 20 KB | ##################################### | 100% libgfortran5-11.2.0 | 2.0 MB | ##################################### | 100% numpy-1.22.3 | 10 KB | ##################################### | 100% libcurl-7.82.0 | 342 KB | ##################################### | 100% curl-7.82.0 | 95 KB | ##################################### | 100% libxml2-2.9.14 | 718 KB | ##################################### | 100% numpy-base-1.22.3 | 5.4 MB | ##################################### | 100% cairo-1.16.0 | 1.1 MB | ##################################### | 100% libgdal-3.4.1 | 19.5 MB | ##################################### | 100% libxcb-1.15 | 505 KB | ##################################### | 100% libpq-12.9 | 2.1 MB | ##################################### | 100% Preparing transaction: done Verifying transaction: done Executing transaction: done
In [ ]:
# another env
# !aws s3 cp s3://spacenet-dataset/spacenet/SN8_floods/ ../data/ --recursive
In [5]:
!ls ../data/
Germany_Training_Public Louisiana-West_Test_Public tarballs Louisiana-East_Training_Public resolutions.txt
In [9]:
# check cli and version
!gdalinfo --version
!python --version
GDAL 3.4.1, released 2021/12/27 Python 3.8.13
In [64]:
import os
import warnings
import random
from pprint import pprint
from glob import glob
import json
import dataclasses
import rasterio as rio
from joblib import Parallel, delayed
from tqdm import tqdm
import tifffile
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
pd.options.display.max_colwidth = 250
pd.options.display.max_rows = 30
%matplotlib inline
In [37]:
@dataclasses.dataclass(frozen=False)
class CFG(object):
PATH_ROOT: str = '../data/'
seed: int = 471
csvs :list = [
'Germany_Training_Public/Germany_Training_Public_label_image_mapping.csv',
]
np.random.seed(CFG.seed)
random.seed(CFG.seed)
pprint({k:v for k, v in dict(vars(CFG)).items() if '__' not in k})
{'PATH_ROOT': '../data/', 'seed': 471}
In [44]:
df = pd.DataFrame()
# data/Germany_Training_Public/PRE-event/10500500C4DD7000_0_15_63.tif
df['pre'] = sorted(glob(f'{CFG.PATH_ROOT}*_Training_Public/PRE-event/*.tif'))
df.head(4)
Out[44]:
pre | |
---|---|
0 | ../data/Germany_Training_Public/PRE-event/10500500C4DD7000_0_15_63.tif |
1 | ../data/Germany_Training_Public/PRE-event/10500500C4DD7000_0_15_65.tif |
2 | ../data/Germany_Training_Public/PRE-event/10500500C4DD7000_0_15_66.tif |
3 | ../data/Germany_Training_Public/PRE-event/10500500C4DD7000_0_15_67.tif |
In [48]:
def get_post_annotation(row):
PATH_PRE = row['pre']
sp = PATH_PRE.split('/')
PATH_POST = PATH_PRE.replace('PRE-event', 'POST-event')
FNAME_ANNO = '/'.join(sp[-1].split('_')[1:])
PATH_ANNO = '/'.join(PATH_PRE.replace('PRE-event', 'annotations').split('/')[:-2] + [FNAME_ANNO])
# check exist
exist_psot = os.path.exists(PATH_POST)
exist_anno = os.path.exists(PATH_ANNO)
return PATH_POST, exist_psot, PATH_ANNO, exist_anno
df[['pst', 'pst_exist', 'ano', 'ano_exist']] = df.apply(get_post_annotation, axis=1, result_type='expand')
df.head(3)
Out[48]:
pre | pst | pst_exist | ano | ano_exist | |
---|---|---|---|---|---|
0 | ../data/Germany_Training_Public/PRE-event/10500500C4DD7000_0_15_63.tif | ../data/Germany_Training_Public/POST-event/10500500C4DD7000_0_15_63.tif | False | ../data/Germany_Training_Public/0/15/63.tif | False |
1 | ../data/Germany_Training_Public/PRE-event/10500500C4DD7000_0_15_65.tif | ../data/Germany_Training_Public/POST-event/10500500C4DD7000_0_15_65.tif | False | ../data/Germany_Training_Public/0/15/65.tif | False |
2 | ../data/Germany_Training_Public/PRE-event/10500500C4DD7000_0_15_66.tif | ../data/Germany_Training_Public/POST-event/10500500C4DD7000_0_15_66.tif | False | ../data/Germany_Training_Public/0/15/66.tif | False |
In [ ]:
In [49]:
df = pd.read_csv('../data/Germany_Training_Public/Germany_Training_Public_label_image_mapping.csv')
df.head(2)
Out[49]:
label | pre-event image | post-event image 1 | post-event image 2 | |
---|---|---|---|---|
0 | 0_41_59.geojson | 10500500C4DD7000_0_41_59.tif | 10500500E6DD3C00_0_41_59.tif | NaN |
1 | 0_45_64.geojson | 10500500C4DD7000_0_45_64.tif | 10500500E6DD3C00_0_45_64.tif | NaN |
In [81]:
idx = 0
PATH_BASE = '../data/Germany_Training_Public/'
row = df.iloc[idx]
PATH_PRE = PATH_BASE + 'PRE-event/' + row['pre-event image']
PATH_PST = PATH_BASE + 'POST-event/' + row['post-event image 1']
PATH_ANO = PATH_BASE + 'annotations/' + row['label']
In [79]:
with rio.open(PATH_PRE) as img :
img = img.read()
plt.figure(figsize=(12, 8), dpi=200)
plt.title(f'PRE event idx:{idx}')
plt.imshow(img.transpose(1, 2, 0))
# plt.colorbar()
plt.show();
Copyright © Code Fetcher 2022