Examples¶

Basic transformation¶

In [17]:

Copied!

from data_disaggregation.actions import transform
from data_disaggregation.vtypes import VT_Nominal, VT_NumericExt, VT_Numeric, VT_Ordinal
from data_disaggregation.actions import transform
from data_disaggregation.vtypes import VT_Nominal, VT_NumericExt, VT_Numeric, VT_Ordinal

In [18]:

Copied!





# create data (key-values)
data = {
    "a": 5,
    "b": 10,
    "c": 30
}

# create weight mapping
weights = {    
    ("b", "D"): 0.7,
    ("c", "E"): 0.7,
    ("a", "F"): 0.4,
    ("b", "F"): 0.3,
    ("c", "F"): 0.3,
}
# create data (key-values)
data = {
    "a": 5,
    "b": 10,
    "c": 30
}

# create weight mapping
weights = {    
    ("b", "D"): 0.7,
    ("c", "E"): 0.7,
    ("a", "F"): 0.4,
    ("b", "F"): 0.3,
    ("c", "F"): 0.3,
}

In [19]:

Copied!

# if data as categorical/nominal:
# "F" is 5 because "a" has largest share 0.4 (mode)
transform(VT_Nominal, data, weights)
# if data as categorical/nominal:
# "F" is 5 because "a" has largest share 0.4 (mode)
transform(VT_Nominal, data, weights)

Out[19]:

{'F': 5, 'E': 30, 'D': 10}

In [20]:

Copied!





# if data as categorical/ordinal:
# "F" is 10 because "b" has cumulative share (sum of shares <= "b") 0.4 + 0.3 = 0.7 
# that is covering midpoint 0.5
transform(VT_Ordinal, data, weights)
# if data as categorical/ordinal:
# "F" is 10 because "b" has cumulative share (sum of shares <= "b") 0.4 + 0.3 = 0.7 
# that is covering midpoint 0.5
transform(VT_Ordinal, data, weights)

Out[20]:

{'F': 10, 'E': 30, 'D': 10}

In [21]:

Copied!





# if data is numerical/intensive
# average (weighted) density
# D = 10 * 0.7 / 0.7
# E = 30 * 0.7 / 0.7
# F = (10 * 0.3 + 30 * 0.3 + 5 * 0.4) / 1.0
transform(VT_Numeric, data, weights)
# if data is numerical/intensive
# average (weighted) density
# D = 10 * 0.7 / 0.7
# E = 30 * 0.7 / 0.7
# F = (10 * 0.3 + 30 * 0.3 + 5 * 0.4) / 1.0
transform(VT_Numeric, data, weights)

Out[21]:

{'F': 14.0, 'E': 30.0, 'D': 10.0}

In [22]:

Copied!





# if data is numerical/extensive
# redistribute total sum of 45 according to (relative) shares:
# D = 10 * 0.7 / 1.0
# E = 30 * 0.7 / 1.0
# F = 10 * (0.3 / 1.0) + 30 * (0.3 / 1.0) + 5 * (0.4 / 0.4)
transform(VT_NumericExt, data, weights)
# if data is numerical/extensive
# redistribute total sum of 45 according to (relative) shares:
# D = 10 * 0.7 / 1.0
# E = 30 * 0.7 / 1.0
# F = 10 * (0.3 / 1.0) + 30 * (0.3 / 1.0) + 5 * (0.4 / 0.4)
transform(VT_NumericExt, data, weights)

Out[22]:

{'F': 17.0, 'E': 21.0, 'D': 7.0}

Basic transformation with pandas¶

In [23]:

Copied!

import pandas as pd
from data_disaggregation.actions import transform_pandas
from data_disaggregation.vtypes import VT_Nominal, VT_NumericExt, VT_Numeric, VT_Ordinal
import pandas as pd
from data_disaggregation.actions import transform_pandas
from data_disaggregation.vtypes import VT_Nominal, VT_NumericExt, VT_Numeric, VT_Ordinal

In [24]:

Copied!

# data as pandas series (with named index)
ds_data = pd.Series(data).rename_axis(index="dim_from")
ds_data
# data as pandas series (with named index)
ds_data = pd.Series(data).rename_axis(index="dim_from")
ds_data

Out[24]:

dim_from
a     5
b    10
c    30
dtype: int64

In [25]:

Copied!

# weights as pandas series (with named indices)
ds_weights = pd.Series(weights).rename_axis(index=["dim_from", "dim_to"])
ds_weights
# weights as pandas series (with named indices)
ds_weights = pd.Series(weights).rename_axis(index=["dim_from", "dim_to"])
ds_weights

Out[25]:

dim_from  dim_to
b         D         0.7
c         E         0.7
a         F         0.4
b         F         0.3
c         F         0.3
dtype: float64

In [26]:

Copied!

# if data as categorical/nominal:
# "F" is 5 because "a" has largest share 0.4 (mode)
transform_pandas(VT_Nominal, ds_data, ds_weights)
# if data as categorical/nominal:
# "F" is 5 because "a" has largest share 0.4 (mode)
transform_pandas(VT_Nominal, ds_data, ds_weights)

Out[26]:

dim_to
D         10
E         30
F          5
Name: 0, dtype: int64

In [27]:

Copied!





# if data as categorical/ordinal:
# "F" is 10 because "b" has cumulative share (sum of shares <= "b") 0.4 + 0.3 = 0.7 
# that is covering midpoint 0.5
transform_pandas(VT_Ordinal, ds_data, ds_weights)
# if data as categorical/ordinal:
# "F" is 10 because "b" has cumulative share (sum of shares <= "b") 0.4 + 0.3 = 0.7 
# that is covering midpoint 0.5
transform_pandas(VT_Ordinal, ds_data, ds_weights)

Out[27]:

dim_to
D         10
E         30
F         10
Name: 0, dtype: int64

In [28]:

Copied!





# if data is numerical/intensive
# average (weighted) density
# D = 10 * 0.7 / 0.7
# E = 30 * 0.7 / 0.7
# F = (10 * 0.3 + 30 * 0.3 + 5 * 0.4) / 1.0
transform_pandas(VT_Numeric, ds_data, ds_weights)
# if data is numerical/intensive
# average (weighted) density
# D = 10 * 0.7 / 0.7
# E = 30 * 0.7 / 0.7
# F = (10 * 0.3 + 30 * 0.3 + 5 * 0.4) / 1.0
transform_pandas(VT_Numeric, ds_data, ds_weights)

Out[28]:

dim_to
D         10.0
E         30.0
F         14.0
Name: 0, dtype: float64

In [29]:

Copied!





# if data is numerical/extensive
# redistribute total sum of 45 according to (relative) shares:
# D = 10 * 0.7 / 1.0
# E = 30 * 0.7 / 1.0
# F = 10 * (0.3 / 1.0) + 30 * (0.3 / 1.0) + 5 * (0.4 / 0.4)
transform_pandas(VT_NumericExt, ds_data, ds_weights)
# if data is numerical/extensive
# redistribute total sum of 45 according to (relative) shares:
# D = 10 * 0.7 / 1.0
# E = 30 * 0.7 / 1.0
# F = 10 * (0.3 / 1.0) + 30 * (0.3 / 1.0) + 5 * (0.4 / 0.4)
transform_pandas(VT_NumericExt, ds_data, ds_weights)

Out[29]:

dim_to
D          7.0
E         21.0
F         17.0
Name: 0, dtype: float64

change output dimensions¶

Sometimes we need to keep some of the origial index dimensions. In this case we we have to specify the desired output dimension index explicitly

In [30]:

Copied!

idx_out = ds_weights.index
transform_pandas(VT_NumericExt, ds_data, ds_weights, dim_out=idx_out)
idx_out = ds_weights.index
transform_pandas(VT_NumericExt, ds_data, ds_weights, dim_out=idx_out)

Out[30]:

dim_from  dim_to
a         F          5.0
b         D          7.0
          F          3.0
c         E         21.0
          F          9.0
Name: 0, dtype: float64

In [ ]: