Examples¶
Basic transformation¶
In [17]:
Copied!
from data_disaggregation.actions import transform
from data_disaggregation.vtypes import VT_Nominal, VT_NumericExt, VT_Numeric, VT_Ordinal
from data_disaggregation.actions import transform
from data_disaggregation.vtypes import VT_Nominal, VT_NumericExt, VT_Numeric, VT_Ordinal
In [18]:
Copied!
# create data (key-values)
data = {
"a": 5,
"b": 10,
"c": 30
}
# create weight mapping
weights = {
("b", "D"): 0.7,
("c", "E"): 0.7,
("a", "F"): 0.4,
("b", "F"): 0.3,
("c", "F"): 0.3,
}
# create data (key-values)
data = {
"a": 5,
"b": 10,
"c": 30
}
# create weight mapping
weights = {
("b", "D"): 0.7,
("c", "E"): 0.7,
("a", "F"): 0.4,
("b", "F"): 0.3,
("c", "F"): 0.3,
}
In [19]:
Copied!
# if data as categorical/nominal:
# "F" is 5 because "a" has largest share 0.4 (mode)
transform(VT_Nominal, data, weights)
# if data as categorical/nominal:
# "F" is 5 because "a" has largest share 0.4 (mode)
transform(VT_Nominal, data, weights)
Out[19]:
{'F': 5, 'E': 30, 'D': 10}
In [20]:
Copied!
# if data as categorical/ordinal:
# "F" is 10 because "b" has cumulative share (sum of shares <= "b") 0.4 + 0.3 = 0.7
# that is covering midpoint 0.5
transform(VT_Ordinal, data, weights)
# if data as categorical/ordinal:
# "F" is 10 because "b" has cumulative share (sum of shares <= "b") 0.4 + 0.3 = 0.7
# that is covering midpoint 0.5
transform(VT_Ordinal, data, weights)
Out[20]:
{'F': 10, 'E': 30, 'D': 10}
In [21]:
Copied!
# if data is numerical/intensive
# average (weighted) density
# D = 10 * 0.7 / 0.7
# E = 30 * 0.7 / 0.7
# F = (10 * 0.3 + 30 * 0.3 + 5 * 0.4) / 1.0
transform(VT_Numeric, data, weights)
# if data is numerical/intensive
# average (weighted) density
# D = 10 * 0.7 / 0.7
# E = 30 * 0.7 / 0.7
# F = (10 * 0.3 + 30 * 0.3 + 5 * 0.4) / 1.0
transform(VT_Numeric, data, weights)
Out[21]:
{'F': 14.0, 'E': 30.0, 'D': 10.0}
In [22]:
Copied!
# if data is numerical/extensive
# redistribute total sum of 45 according to (relative) shares:
# D = 10 * 0.7 / 1.0
# E = 30 * 0.7 / 1.0
# F = 10 * (0.3 / 1.0) + 30 * (0.3 / 1.0) + 5 * (0.4 / 0.4)
transform(VT_NumericExt, data, weights)
# if data is numerical/extensive
# redistribute total sum of 45 according to (relative) shares:
# D = 10 * 0.7 / 1.0
# E = 30 * 0.7 / 1.0
# F = 10 * (0.3 / 1.0) + 30 * (0.3 / 1.0) + 5 * (0.4 / 0.4)
transform(VT_NumericExt, data, weights)
Out[22]:
{'F': 17.0, 'E': 21.0, 'D': 7.0}
Basic transformation with pandas¶
In [23]:
Copied!
import pandas as pd
from data_disaggregation.actions import transform_pandas
from data_disaggregation.vtypes import VT_Nominal, VT_NumericExt, VT_Numeric, VT_Ordinal
import pandas as pd
from data_disaggregation.actions import transform_pandas
from data_disaggregation.vtypes import VT_Nominal, VT_NumericExt, VT_Numeric, VT_Ordinal
In [24]:
Copied!
# data as pandas series (with named index)
ds_data = pd.Series(data).rename_axis(index="dim_from")
ds_data
# data as pandas series (with named index)
ds_data = pd.Series(data).rename_axis(index="dim_from")
ds_data
Out[24]:
dim_from a 5 b 10 c 30 dtype: int64
In [25]:
Copied!
# weights as pandas series (with named indices)
ds_weights = pd.Series(weights).rename_axis(index=["dim_from", "dim_to"])
ds_weights
# weights as pandas series (with named indices)
ds_weights = pd.Series(weights).rename_axis(index=["dim_from", "dim_to"])
ds_weights
Out[25]:
dim_from dim_to b D 0.7 c E 0.7 a F 0.4 b F 0.3 c F 0.3 dtype: float64
In [26]:
Copied!
# if data as categorical/nominal:
# "F" is 5 because "a" has largest share 0.4 (mode)
transform_pandas(VT_Nominal, ds_data, ds_weights)
# if data as categorical/nominal:
# "F" is 5 because "a" has largest share 0.4 (mode)
transform_pandas(VT_Nominal, ds_data, ds_weights)
Out[26]:
dim_to D 10 E 30 F 5 Name: 0, dtype: int64
In [27]:
Copied!
# if data as categorical/ordinal:
# "F" is 10 because "b" has cumulative share (sum of shares <= "b") 0.4 + 0.3 = 0.7
# that is covering midpoint 0.5
transform_pandas(VT_Ordinal, ds_data, ds_weights)
# if data as categorical/ordinal:
# "F" is 10 because "b" has cumulative share (sum of shares <= "b") 0.4 + 0.3 = 0.7
# that is covering midpoint 0.5
transform_pandas(VT_Ordinal, ds_data, ds_weights)
Out[27]:
dim_to D 10 E 30 F 10 Name: 0, dtype: int64
In [28]:
Copied!
# if data is numerical/intensive
# average (weighted) density
# D = 10 * 0.7 / 0.7
# E = 30 * 0.7 / 0.7
# F = (10 * 0.3 + 30 * 0.3 + 5 * 0.4) / 1.0
transform_pandas(VT_Numeric, ds_data, ds_weights)
# if data is numerical/intensive
# average (weighted) density
# D = 10 * 0.7 / 0.7
# E = 30 * 0.7 / 0.7
# F = (10 * 0.3 + 30 * 0.3 + 5 * 0.4) / 1.0
transform_pandas(VT_Numeric, ds_data, ds_weights)
Out[28]:
dim_to D 10.0 E 30.0 F 14.0 Name: 0, dtype: float64
In [29]:
Copied!
# if data is numerical/extensive
# redistribute total sum of 45 according to (relative) shares:
# D = 10 * 0.7 / 1.0
# E = 30 * 0.7 / 1.0
# F = 10 * (0.3 / 1.0) + 30 * (0.3 / 1.0) + 5 * (0.4 / 0.4)
transform_pandas(VT_NumericExt, ds_data, ds_weights)
# if data is numerical/extensive
# redistribute total sum of 45 according to (relative) shares:
# D = 10 * 0.7 / 1.0
# E = 30 * 0.7 / 1.0
# F = 10 * (0.3 / 1.0) + 30 * (0.3 / 1.0) + 5 * (0.4 / 0.4)
transform_pandas(VT_NumericExt, ds_data, ds_weights)
Out[29]:
dim_to D 7.0 E 21.0 F 17.0 Name: 0, dtype: float64
change output dimensions¶
Sometimes we need to keep some of the origial index dimensions. In this case we we have to specify the desired output dimension index explicitly
In [30]:
Copied!
idx_out = ds_weights.index
transform_pandas(VT_NumericExt, ds_data, ds_weights, dim_out=idx_out)
idx_out = ds_weights.index
transform_pandas(VT_NumericExt, ds_data, ds_weights, dim_out=idx_out)
Out[30]:
dim_from dim_to a F 5.0 b D 7.0 F 3.0 c E 21.0 F 9.0 Name: 0, dtype: float64
In [ ]:
Copied!