Source code for causallift.pipeline

# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
#     or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Pipeline construction."""

from kedro.pipeline import Pipeline, node

from .nodes.estimate_propensity import *  # NOQA
from .nodes.model_for_each import *  # NOQA

# Here you can define your data-driven pipeline by importing your functions
# and adding them to the pipeline as follows:
#
# from nodes.data_wrangling import clean_data, compute_features
#
# pipeline = Pipeline([
#     node(clean_data, 'customers', 'prepared_customers'),
#     node(compute_features, 'prepared_customers', ['X_train', 'Y_train'])
# ])
#
# Once you have your pipeline defined, you can run it from the root of your
# project by calling:
#
# $ kedro run
#


[docs]def create_pipeline(**kwargs): """Create the project's pipeline. Args: kwargs: Ignore any additional arguments added in the future. Returns: Pipeline: The resulting pipeline. """ pipeline = Pipeline( [ Pipeline( [ node( bundle_train_and_test_data, ["args_raw", "train_df", "test_df"], "df_00", ) ], tags="011_bundle_train_and_test_data", ), Pipeline( [ node( impute_cols_features, ["args_raw", "df_00"], "args_intermediate" ), node( schedule_propensity_scoring, ["args_intermediate", "df_00"], "args", ), ], tags="121_prepare_args", ), Pipeline( [ node( treatment_fractions_, ["args_raw", "df_00"], "treatment_fractions", ) ], tags="131_treatment_fractions_", ), Pipeline( [node(fit_propensity, ["args", "df_00"], "propensity_model")], tags="211_fit_propensity", ), Pipeline( [ node( estimate_propensity, ["args", "df_00", "propensity_model"], "df_01", ) ], tags="221_estimate_propensity", ), Pipeline( [ node( model_for_treated_fit, ["args", "df_01"], "treated__model_dict" ), node( model_for_untreated_fit, ["args", "df_01"], "untreated__model_dict", ), ], tags="311_fit", ), Pipeline( [ node( bundle_treated_and_untreated_models, ["treated__model_dict", "untreated__model_dict"], "uplift_models_dict", ) ], tags="312_bundle_2_models", ), Pipeline( [ node( model_for_treated_predict_proba, ["args", "df_01", "uplift_models_dict"], "treated__proba", ), node( model_for_untreated_predict_proba, ["args", "df_01", "uplift_models_dict"], "untreated__proba", ), ], tags="321_predict_proba", ), Pipeline( [ node( compute_cate, ["treated__proba", "untreated__proba"], "cate_estimated", ) ], tags="411_compute_cate", ), Pipeline( [ node( add_cate_to_df, [ "args", "df_01", "cate_estimated", "treated__proba", "untreated__proba", ], "df_02", ) ], tags="421_add_cate_to_df", ), Pipeline( [ node( recommend_by_cate, ["args", "df_02", "treatment_fractions"], "df_03", ) ], tags="511_recommend_by_cate", ), Pipeline( [ node( model_for_treated_simulate_recommendation, ["args", "df_03", "uplift_models_dict"], "treated__sim_eval_df", ), node( model_for_untreated_simulate_recommendation, ["args", "df_03", "uplift_models_dict"], "untreated__sim_eval_df", ), ], tags="521_simulate_recommendation", ), Pipeline( [ node( estimate_effect, ["args", "treated__sim_eval_df", "untreated__sim_eval_df"], "estimated_effect_df", ) ], tags="531_estimate_effect", ), # Pipeline([ # node(FUNC, # ['IN'], # ['OUT'], # ), # ], tags='PIPELINE'), ] ) return pipeline