Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
simplify joined kwargs
  • Loading branch information
somefreestring committed Feb 13, 2020
commit a9d8127fb32a8508566d2f7899211b5188d84a7b
3 changes: 2 additions & 1 deletion pandas_ml_utils/datafetching/fetch_yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd

from ..pandas_utils_extension import inner_join
from ..utils.functions import join_kwargs


@cachetools.func.ttl_cache(maxsize=1, ttl=10 * 60)
Expand All @@ -16,7 +17,7 @@ def fetch_yahoo(*args: str, period: str = 'max', multi_index: bool = False, **kw
else:
# convert args to kwargs
if len(args) > 0:
kwargs = {**{arg: arg for arg in args}, **kwargs}
kwargs = join_kwargs({arg: arg for arg in args}, kwargs)

for k, v in kwargs.items():
px = f'{k}_'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd

from pandas_ml_utils.model.features_and_labels.target_encoder import TargetLabelEncoder
from pandas_ml_utils.utils.functions import join_kwargs

_log = logging.getLogger(__name__)
_LABELS = Union[List[str], TargetLabelEncoder, Dict[str, Union[List[str], TargetLabelEncoder]]]
Expand Down Expand Up @@ -137,7 +138,7 @@ def with_labels(self, labels: _LABELS):

def with_kwargs(self, **kwargs):
copy = deepcopy(self)
copy.kwargs = {**self.kwargs, **kwargs}
copy.kwargs = join_kwargs(self.kwargs, kwargs)
return copy

def __repr__(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from pandas_ml_utils.model.features_and_labels.target_encoder import TargetLabelEncoder, \
MultipleTargetEncodingWrapper, IdentityEncoder
from pandas_ml_utils.utils.classes import ReScaler
from pandas_ml_utils.utils.functions import log_with_time, call_callable_dynamic_args, unique_top_level_columns
from pandas_ml_utils.utils.functions import log_with_time, call_callable_dynamic_args, unique_top_level_columns, \
join_kwargs

_log = logging.getLogger(__name__)

Expand All @@ -24,10 +25,10 @@ def __init__(self, df: pd.DataFrame, features_and_labels: FeaturesAndLabels, **k
labels = features_and_labels.labels
encoder = lambda frame, **kwargs: frame
label_columns = None
joined_kwargs = join_kwargs(features_and_labels.kwargs, kwargs)

# eventually transform callable labels to its expected structure
if callable(labels):
joined_kwargs = {**features_and_labels.kwargs, **kwargs}
labels = call_callable_dynamic_args(labels, df, **joined_kwargs)

# unfold labels, currently supported types are:
Expand Down Expand Up @@ -56,14 +57,15 @@ def __init__(self, df: pd.DataFrame, features_and_labels: FeaturesAndLabels, **k
self._targets = features_and_labels.targets
self._gross_loss = features_and_labels.gross_loss
self._encoder = encoder
self._joined_kwargs = joined_kwargs

# pre assign this variable
# but notice that it get overwritten by an engineered data frame later on
self._df = df

# this function uses clojures
def call_dynamic(func, *args):
joined_kwargs = {**self.__dict__, **features_and_labels.kwargs, **kwargs}
joined_kwargs = join_kwargs(self.__dict__, self._joined_kwargs)
return call_callable_dynamic_args(func, *args, **joined_kwargs)

self._df = call_dynamic(features_and_labels.pre_processor, df)
Expand Down Expand Up @@ -216,7 +218,8 @@ def label_names(self, level_above=None) -> List[Union[Tuple[str, ...],str]]:
@property
def labels_df(self) -> pd.DataFrame:
# here we can do all sorts of tricks and encodings ...
df = self._encoder(self._df[self._labels_columns], **self._features_and_labels.kwargs).dropna().copy()
# joined_kwargs(self._features_and_labels.kwargs, self.)
df = self._encoder(self._df[self._labels_columns], **self._joined_kwargs).dropna().copy()
return df if self._label_type is None else df.astype(self._label_type)

@property
Expand Down
15 changes: 12 additions & 3 deletions pandas_ml_utils/model/features_and_labels/target_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
from typing import Iterable, List, Dict, Union, Callable

from pandas_ml_utils.utils.functions import one_hot, call_callable_dynamic_args
from pandas_ml_utils.utils.functions import one_hot, call_callable_dynamic_args, join_kwargs


class TargetLabelEncoder(object):
Expand All @@ -28,7 +28,7 @@ def decode(self, df: pd.DataFrame) -> pd.DataFrame:

def with_kwargs(self, **kwargs):
copy = deepcopy(self)
copy.kwargs = {**copy.kwargs, **kwargs}
copy.kwargs = join_kwargs(copy.kwargs, kwargs)
return copy

def __len__(self):
Expand Down Expand Up @@ -163,8 +163,17 @@ def encoded_labels_columns(self) -> List[str]:
return [f'{self.label}_{i}' for i in range(self.nr_of_categories)]

def encode(self, df: pd.DataFrame, **kwargs) -> pd.DataFrame:
s = (call_callable_dynamic_args(self.pre_processor, df, **kwargs) if self.pre_processor else df)[self.label]
# eventually pre-process data
joined_kwargs = join_kwargs(self.kwargs, kwargs)
sf = (call_callable_dynamic_args(self.pre_processor, df, **joined_kwargs) if self.pre_processor else df)

# extract single series for one hot encoding
if isinstance(sf, pd.Series):
s = sf.rename(self.label)
else:
s = sf[self.label]

# one hot encode and return
return s.to_frame().apply(lambda r: one_hot(r.values.sum(), self.nr_of_categories), axis=1, result_type='expand')

def decode(self, df: pd.DataFrame) -> pd.DataFrame:
Expand Down
4 changes: 2 additions & 2 deletions pandas_ml_utils/model/fitting/fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pandas_ml_utils.model.fitting.fit import Fit
from pandas_ml_utils.model.models import Model
from pandas_ml_utils.summary.summary import Summary
from pandas_ml_utils.utils.functions import log_with_time
from pandas_ml_utils.utils.functions import log_with_time, join_kwargs

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -132,7 +132,7 @@ def __hyper_opt(hyper_parameter_space,

def f(args):
sampled_parameters = {k: args[i] for i, k in enumerate(keys)}
model = model_provider(**sampled_parameters, **constants)
model = model_provider(**join_kwargs(sampled_parameters, constants))
loss = __train_loop(model, cross_validation, train, test)
if loss is None:
raise ValueError("Can not hyper tune if model loss is None")
Expand Down
4 changes: 2 additions & 2 deletions pandas_ml_utils/model/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pandas_ml_utils.model.features_and_labels.features_and_labels import FeaturesAndLabels
from pandas_ml_utils.model.features_and_labels.target_encoder import TargetLabelEncoder
from pandas_ml_utils.summary.summary import Summary
from pandas_ml_utils.utils.functions import suitable_kwargs
from pandas_ml_utils.utils.functions import suitable_kwargs, join_kwargs

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -392,7 +392,7 @@ def __call__(self, *args, **kwargs):
self.summary_provider,
self.epochs,
deepcopy(self.callbacks),
**deepcopy(self.kwargs), **kwargs)
**join_kwargs(deepcopy(self.kwargs), kwargs))

# copy weights before return
new_model.set_weights(self.get_weights())
Expand Down
9 changes: 9 additions & 0 deletions pandas_ml_utils/utils/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
import numpy as np
import pandas as pd


def join_kwargs(*dicts) -> Dict:
dict = {}
for d in dicts:
dict = {**dict, **d}

return dict


def log_with_time(log_statement: Callable[[], None]):
log_statement()
return pc()
Expand Down