Snippet 1
from scipy.sparse import csr_matrix
from pandas.api.types import CategoricalDtype
person_c = CategoricalDtype(sorted(frame.person.unique()), ordered=True)
thing_c = CategoricalDtype(sorted(frame.thing.unique()), ordered=True)
row = frame.person.astype(person_c).cat.codes
col = frame.thing.astype(thing_c).cat.codes
sparse_matrix = csr_matrix((frame["count"], (row, col)),
shape=(person_c.categories.size, thing_c.categories.size))
>>> sparse_matrix
<3x4 sparse matrix of type ''
with 6 stored elements in Compressed Sparse Row format>
>>> sparse_matrix.todense()
matrix([[0, 1, 0, 1],
[1, 0, 0, 1],
[1, 0, 1, 0]], dtype=int64)
dfs = pd.SparseDataFrame(sparse_matrix,
index=person_c.categories,
columns=thing_c.categories,
default_fill_value=0)
>>> dfs
a b c d
him 0 1 0 1
me 1 0 0 1
you 1 0 1 0
Copyright © Code Fetcher 2020